清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import re
import os
import urllib.request
home = "http://sc.chinaz.com"
def downIco(tit, url):
page = urllib.request.urlopen(url).read()
page = page.decode("utf-8")
s_key = r'href="(http://demo.sc.chinaz.com.+?)"'
re_c = re.compile(s_key)
ls = re.findall(re_c, page)
i = 0
for l in ls:
i += 1
(path, file) = os.path.split(l)
print("(" + str(i) + "/" + str(len(ls)) + ") " + file)
if file.find('ico') > 0:
continue
if not os.path.exists(tit + "/" + file):
try:
urllib.request.urlretrieve(l, tit + "/" + file)
except:
print("Error!")
def getIndex(url):
page = ""
page = urllib.request.urlopen(url).read()
page = page.decode("utf-8")
s_key = r'\<p\>\<a(.+?)\</p\>'
re_c = re.compile(s_key)
ls = re.findall(re_c, page)
i = 0
for l in ls:
i += 1
s1 = l.split('"')
url = s1[3]
tit = s1[5]
img = s1[7]
tit = tit.replace("图标", "").replace("下载", "")
print("(" + str(i) + "/" + str(len(ls)) + ") " + url + ", " + tit + ", " + img)
if not os.path.exists(tit):
os.makedirs(tit)
(path, file) = os.path.split(img)
if not os.path.exists(tit + "/" + file):
try:
urllib.request.urlretrieve(img, tit + "/" + file)
except:
print("Error!")
downIco(tit, home + url)
if __name__ == "__main__":
tubiao = "/tubiao/"
url = home + tubiao
getIndex(url)
for i in range(2, 10):
url = home + tubiao + "index_" + str(i) + ".html"
getIndex(url)