#!/usr/bin/python
#-*- coding:cp936-*-#思路,将str转换成unicode,方可用正则表达式,前提是,要知道文件的编码,本例中是gbk
import cPickle as mypickle
import re
import sys
if (__name__=='__main__'):
fid1=file('demo.txt','r');#demo.txt写入字符如:编程素材网
p=re.compile('(^s+|s+$)');
phanzigbk=re.compile('[\x20-\x7f]');
phanzi=re.compile(u'[u4e00-u9fa5]');#这里要加u,注意
commlines=fid1.readlines();
fid1.close();
dictfamilyname={};
dictfirstname={};
for line in commlines:
line=p.sub('',line);
print type(line);
print line;
uline=unicode(line,'gbk');
print type(uline);
candidates=phanzi.findall(uline);
print len(candidates);
if(len(candidates)==2):
print candidates[0];
familynamegbk=candidates[0].encode('gbk');#把unicode型的变量变成str型的变量
firstnamegbk=candidates[1].encode('gbk');
if(dictfamilyname.has_key(familynamegbk)):
dictfamilyname[familynamegbk]=dictfamilyname[familynamegbk]+1;
else:
dictfamilyname[familynamegbk]=1;
if(dictfirstname.has_key(firstnamegbk)):
dictfirstname[firstnamegbk]=dictfirstname[firstnamegbk]+1;
else:
dictfirstname[firstnamegbk]=1;
familynameitems=dictfamilyname.items();
print familynameitems;
firstnameitems=dictfirstname.items();
familynameitems.sort(key=lambda d:d[1],reverse=True);
firstnameitems.sort(key=lambda d :d[1],reverse=True);
fid=file('familyname.txt','w');
for m in familynameitems:
s=m[0]+'t'+str(m[1]);
fid.write(s);
fid.write('n');
fid.close();
fid=file('firstname.txt','w');
for m in firstnameitems:
s=m[0]+'t'+str(m[1]);
fid.write(s);
fid.write('n');
fid.close();
print 'finish'
机械节能产品生产企业官网模板...
大气智能家居家具装修装饰类企业通用网站模板...
礼品公司网站模板
宽屏简约大气婚纱摄影影楼模板...
蓝白WAP手机综合医院类整站源码(独立后台)...苏ICP备2024110244号-2 苏公网安备32050702011978号 增值电信业务经营许可证编号:苏B2-20251499 | Copyright 2018 - 2025 源码网商城 (www.ymwmall.com) 版权所有