清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import urllib.request
import re
import os
import sys
from collections import deque
if __name__ == "__main__":
url = "http://www.tuigirl8.com/forum/view/"
queue = deque()
for i in range(1,2000):
queue.append(url+str(i))
cnt = 0
while queue:
url = queue.popleft()
print(str(cnt)+url)
cnt += 1
targetDir = r"D:/%d"%cnt
if not os.path.isdir(targetDir):
os.mkdir(targetDir)
req = urllib.request.Request(url)
try:
webpage = urllib.request.urlopen(req,timeout=2)
contentBytes = webpage.read()
except:
continue
linkre = re.compile(r'(http:[^\s]*?(jpg|gif|png))')
for link,t in linkre.findall(str(contentBytes)):
print(link)
pos = link.rindex('/')
t = os.path.join(targetDir,link[pos+1:])
urllib.request.urlretrieve(link,t)