清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
# -*- coding:utf-8 -*- #制作者:archingB import urllib import urllib2 import re readme="按下回车键开始读取,q退出,w写入save.txt" def getpage(page): url='http://www.qiushibaike.com/textnew/page/' + str(page) user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)' headers = { 'User-Agent' : user_agent } try: request = urllib2.Request(url,headers = headers) response = urllib2.urlopen(request) content = response.read().decode('utf-8') pattern = re.compile('<div class="content">(.*?)<!--\d{10}-->',re.S) items=re.findall(pattern,content) return items except urllib2.URLError, e: if hasattr(e,"code"): print e.code if hasattr(e,"reason"): print e.reason def writes(item): item_swap=item.encode('utf-8') try: f=open('save.txt','a') f.write(str(M)+'\n'+item_swap+'\n') finally: if f: f.close() def main(): P=1 N=0 IS=True print readme.decode('utf-8') global M M=1 items=getpage(P) raw_input() while(IS): if(N+1==len(items)): P+=1 N=0 items=getpage(P) items[N]=re.sub(r'<br/>','\n',items[N]) print N+1,items[N] static=str(raw_input()) if(static=='q'or static=='Q'): IS=False break if(static=='w'or static=='W'): writes(items[N]) M+=1 N+=1 main()