search = urllib.request.quote(search)
for page in range(1,pagenum+1): url = 'http://weixin.sogou.com/weixin?type=2&query='+search+'&page='+str(page)
import urllib.request
header = ('User-Agent','Mozilla/5.0')
opener = urllib.request.build_opener()
opener.addheaders = [header]
urllib.request.install_opener(opener)
data = urllib.request.urlopen(url).read().decode()
import re
finddata = re.compile('<a target="_blank" href="(.*?)" rel="external nofollow" rel="external nofollow" .*?uigs="article_title_.*?">(.*?)</a>').findall(data)
#finddata = [('',''),('','')]
title = title.replace('<em><!--red_beg-->','')
title = title.replace('<!--red_end--></em>','')
link = link.replace('amp;','')
title_link.append(link) title_link.append(title)
import xlsxwriter
workbook = xlsxwriter.Workbook(search+'.xlsx')
worksheet = workbook.add_worksheet('微信')
for i in range(0,len(title_link),2):
worksheet.write('A'+str(i+1),title_link[i+1])
worksheet.write('C'+str(i+1),title_link[i])
workbook.close()
'''
python3.4 + windows
羽凡-2017/7/11-
用于搜索微信文章,保存标题及链接至Excel中
每个页面10秒延迟,防止被限制
import urllib.request,xlsxwriter,re,time
'''
import urllib.request
search = str(input("搜索微信文章:"))
pagenum = int(input('搜索页数:'))
import xlsxwriter
workbook = xlsxwriter.Workbook(search+'.xlsx')
search = urllib.request.quote(search)
title_link = []
for page in range(1,pagenum+1):
url = 'http://weixin.sogou.com/weixin?type=2&query='+search+'&page='+str(page)
import urllib.request
header = ('User-Agent','Mozilla/5.0')
opener = urllib.request.build_opener()
opener.addheaders = [header]
urllib.request.install_opener(opener)
data = urllib.request.urlopen(url).read().decode()
import re
finddata = re.compile('<a target="_blank" href="(.*?)" rel="external nofollow" rel="external nofollow" .*?uigs="article_title_.*?">(.*?)</a>').findall(data)
#finddata = [('',''),('','')]
for i in range(len(finddata)):
title = finddata[i][1]
title = title.replace('<em><!--red_beg-->','')
title = title.replace('<!--red_end--></em>','')
try:
#标题中可能存在引号
title = title.replace('“','"')
title = title.replace('”','"')
except:
pass
link = finddata[i][0]
link = link.replace('amp;','')
title_link.append(link)
title_link.append(title)
print('第'+str(page)+'页')
import time
time.sleep(10)
worksheet = workbook.add_worksheet('微信')
worksheet.set_column('A:A',70)
worksheet.set_column('C:C',100)
bold = workbook.add_format({'bold':True})
worksheet.write('A1','标题',bold)
worksheet.write('C1','链接',bold)
for i in range(0,len(title_link),2):
worksheet.write('A'+str(i+1),title_link[i+1])
worksheet.write('C'+str(i+1),title_link[i])
workbook.close()
print('导入Excel完毕!')
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有