清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import re import os import urllib.request home = "http://sc.chinaz.com" def downIco(tit, url): page = urllib.request.urlopen(url).read() page = page.decode("utf-8") s_key = r'href="(http://demo.sc.chinaz.com.+?)"' re_c = re.compile(s_key) ls = re.findall(re_c, page) i = 0 for l in ls: i += 1 (path, file) = os.path.split(l) print("(" + str(i) + "/" + str(len(ls)) + ") " + file) if file.find('ico') > 0: continue if not os.path.exists(tit + "/" + file): try: urllib.request.urlretrieve(l, tit + "/" + file) except: print("Error!") def getIndex(url): page = "" page = urllib.request.urlopen(url).read() page = page.decode("utf-8") s_key = r'\<p\>\<a(.+?)\</p\>' re_c = re.compile(s_key) ls = re.findall(re_c, page) i = 0 for l in ls: i += 1 s1 = l.split('"') url = s1[3] tit = s1[5] img = s1[7] tit = tit.replace("图标", "").replace("下载", "") print("(" + str(i) + "/" + str(len(ls)) + ") " + url + ", " + tit + ", " + img) if not os.path.exists(tit): os.makedirs(tit) (path, file) = os.path.split(img) if not os.path.exists(tit + "/" + file): try: urllib.request.urlretrieve(img, tit + "/" + file) except: print("Error!") downIco(tit, home + url) if __name__ == "__main__": tubiao = "/tubiao/" url = home + tubiao getIndex(url) for i in range(2, 10): url = home + tubiao + "index_" + str(i) + ".html" getIndex(url)