清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import re import os import urllib.request # 小说目录 home = "http://www.23zw.com/olread/9/9068/" if __name__ == '__main__': url = home + "index.html" page = urllib.request.urlopen(url).read() page = page.decode("gbk") print(len(page)) s_key = 'h1>(.+?)<' re_c = re.compile(s_key) ls = re.findall(re_c, page) if len(ls) > 0: title = ls[0] print(title) if not os.path.exists(title): os.makedirs(title) urllib.request.urlretrieve(url, title + "/index.html") s_key = 'href="(.{37}?)"' re_c = re.compile(s_key) ls = re.findall(re_c, page) i = 0 for l in ls: try: i += 1 print("(" + str(i) + "/" + str(len(ls)) + ") " + l) if os.path.exists(title + "/" + l): continue url = home + l urllib.request.urlretrieve(url, title + "/" + l) except: print("error!") print("finish!")