def __getNewPage(self):
pageCode = self.Get(self.__Url)
type = sys.getfilesystemencoding()
pattern = re.compile(r'<div .*?cp-pagenavi">.*?<span .*?current-comment-page">\[(.*?)\]</span>',re.S)
newPage = re.search(pattern,pageCode.decode("UTF-8").encode(type))
print pageCode.decode("UTF-8").encode(type)
if newPage != None:
return newPage.group(1)
return 1500
def __getAllPicUrl(self,pageIndex):
realurl = self.__Url + "page-" + str(pageIndex) + "#comments"
pageCode = self.Get(realurl)
type = sys.getfilesystemencoding()
pattern = re.compile('<p>.*?<a .*?view_img_link">.*?</a>.*?<img src="(.*?)".*?</p>',re.S)
items = re.findall(pattern,pageCode.decode("UTF-8").encode(type))
for item in items:
print item
def __savePics(self,img_addr,folder):
for item in img_addr:
filename = item.split('/')[-1]
print "正在保存图片:" + filename
with open(filename,'wb') as file:
img = self.Get(item)
file.write(img)
# -*- coding: utf-8 -*-
import cookielib, urllib, urllib2, socket
import zlib,StringIO
class HttpClient:
__cookie = cookielib.CookieJar()
__proxy_handler = urllib2.ProxyHandler({"http" : '42.121.6.80:8080'})#设置代理服务器与端口
__req = urllib2.build_opener(urllib2.HTTPCookieProcessor(__cookie),__proxy_handler)#生成opener
__req.addheaders = [
('Accept', 'application/javascript, */*;q=0.8'),
('User-Agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)')
]
urllib2.install_opener(__req)
def Get(self, url, refer=None):
try:
req = urllib2.Request(url)
#req.add_header('Accept-encoding', 'gzip')
if not (refer is None):
req.add_header('Referer', refer)
response = urllib2.urlopen(req, timeout=120)
html = response.read()
#gzipped = response.headers.get('Content-Encoding')
#if gzipped:
# html = zlib.decompress(html, 16+zlib.MAX_WBITS)
return html
except urllib2.HTTPError, e:
return e.read()
except socket.timeout, e:
return ''
except socket.error, e:
return ''
# -*- coding: utf-8 -*-
import cookielib, urllib, urllib2, socket
import zlib,StringIO
class HttpClient:
__cookie = cookielib.CookieJar()
__proxy_handler = urllib2.ProxyHandler({"http" : '42.121.6.80:8080'})
__req = urllib2.build_opener(urllib2.HTTPCookieProcessor(__cookie),__proxy_handler)
__req.addheaders = [
('Accept', 'application/javascript, */*;q=0.8'),
('User-Agent', 'Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)')
]
urllib2.install_opener(__req)
def Get(self, url, refer=None):
try:
req = urllib2.Request(url)
req.add_header('Accept-encoding', 'gzip')
if not (refer is None):
req.add_header('Referer', refer)
response = urllib2.urlopen(req, timeout=120)
html = response.read()
gzipped = response.headers.get('Content-Encoding')
if gzipped:
html = zlib.decompress(html, 16+zlib.MAX_WBITS)
return html
except urllib2.HTTPError, e:
return e.read()
except socket.timeout, e:
return ''
except socket.error, e:
return ''
def Post(self, url, data, refer=None):
try:
#req = urllib2.Request(url, urllib.urlencode(data))
req = urllib2.Request(url,data)
if not (refer is None):
req.add_header('Referer', refer)
return urllib2.urlopen(req, timeout=120).read()
except urllib2.HTTPError, e:
return e.read()
except socket.timeout, e:
return ''
except socket.error, e:
return ''
def Download(self, url, file):
output = open(file, 'wb')
output.write(urllib2.urlopen(url).read())
output.close()
# def urlencode(self, data):
# return urllib.quote(data)
def getCookie(self, key):
for c in self.__cookie:
if c.name == key:
return c.value
return ''
def setCookie(self, key, val, domain):
ck = cookielib.Cookie(version=0, name=key, value=val, port=None, port_specified=False, domain=domain, domain_specified=False, domain_initial_dot=False, path='/', path_specified=True, secure=False, expires=None, discard=True, comment=None, comment_url=None, rest={'HttpOnly': None}, rfc2109=False)
self.__cookie.set_cookie(ck)
#self.__cookie.clear() clean cookie
# vim : tabstop=2 shiftwidth=2 softtabstop=2 expandtab
HttpClient
# -*- coding: utf-8 -*-
from __future__ import unicode_literals
from HttpClient import HttpClient
import sys,re,os
class JianDan(HttpClient):
def __init__(self):
self.__pageIndex = 1500 #之前的图片被煎蛋吞了
self.__Url = "http://jandan.net/ooxx/"
self.__floder = "jiandan"
def __getAllPicUrl(self,pageIndex):
realurl = self.__Url + "page-" + str(pageIndex) + "#comments"
pageCode = self.Get(realurl)
type = sys.getfilesystemencoding()
pattern = re.compile('<p>.*?<a .*?view_img_link">.*?</a>.*?<img src="(.*?)".*?</p>',re.S)
items = re.findall(pattern,pageCode.decode("UTF-8").encode(type))
for item in items:
print item
self.__savePics(items,self.__floder)
def __savePics(self,img_addr,folder):
for item in img_addr:
filename = item.split('/')[-1]
print "正在保存图片:" + filename
with open(filename,'wb') as file:
img = self.Get(item)
file.write(img)
def __getNewPage(self):
pageCode = self.Get(self.__Url)
type = sys.getfilesystemencoding()
pattern = re.compile(r'<div .*?cp-pagenavi">.*?<span .*?current-comment-page">\[(.*?)\]</span>',re.S)
newPage = re.search(pattern,pageCode.decode("UTF-8").encode(type))
print pageCode.decode("UTF-8").encode(type)
if newPage != None:
return newPage.group(1)
return 1500
def start(self):
isExists=os.path.exists(self.__floder)#检测是否存在目录
print isExists
if not isExists:
os.mkdir(self.__floder)
os.chdir(self.__floder)
page = int(self.__getNewPage())
for i in range(self.__pageIndex,page):
self.__getAllPicUrl(i)
if __name__ == '__main__':
jd = JianDan()
jd.start()
JianDan
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有