清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
# -*- coding:utf-8 -*-
#制作者:archingB
import urllib
import urllib2
import re
readme="按下回车键开始读取,q退出,w写入save.txt"
def getpage(page):
url='http://www.qiushibaike.com/textnew/page/' + str(page)
user_agent = 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)'
headers = { 'User-Agent' : user_agent }
try:
request = urllib2.Request(url,headers = headers)
response = urllib2.urlopen(request)
content = response.read().decode('utf-8')
pattern = re.compile('<div class="content">(.*?)<!--\d{10}-->',re.S)
items=re.findall(pattern,content)
return items
except urllib2.URLError, e:
if hasattr(e,"code"):
print e.code
if hasattr(e,"reason"):
print e.reason
def writes(item):
item_swap=item.encode('utf-8')
try:
f=open('save.txt','a')
f.write(str(M)+'\n'+item_swap+'\n')
finally:
if f:
f.close()
def main():
P=1
N=0
IS=True
print readme.decode('utf-8')
global M
M=1
items=getpage(P)
raw_input()
while(IS):
if(N+1==len(items)):
P+=1
N=0
items=getpage(P)
items[N]=re.sub(r'<br/>','\n',items[N])
print N+1,items[N]
static=str(raw_input())
if(static=='q'or static=='Q'):
IS=False
break
if(static=='w'or static=='W'):
writes(items[N])
M+=1
N+=1
main()