清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
#!/usr/bin/env python # -*- coding: utf-8 -*- # @Date : 2014-12-22 14:46:40 # @Author : Your Name (you@example.org) # @Link : http://example.org # @Version : $Id$ import os import urllib.request import re import time header={"Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", "Accept-Encoding": "gzip, deflate, sdch", "Cache-Control": "max-age=0", "Accept-Language": "zh-cn,zh;q=0.8;", "Connection": "keep-alive", "Host": "www.douban.com", "Referer": "http://www.douban.com", "User-Agent": "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" " Chrome/39.0.2171.95 Safari/537.36" } def getHtml1(url): req = urllib.request.Request(url, headers = header) html = urllib.request.urlopen(req).read().decode('utf-8') return html def getHtml(url): u = urllib.request.URLopener() u.addheaders = [] u.addheader("User-Agent", "Mozilla/5.0 (Windows NT 6.3; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko)" " Chrome/39.0.2171.95 Safari/537.36") u.addheader("Accept-Language", "zh-cn,zh;q=0.8;",) u.addheader("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8") f = u.open(url) content = f.read().decode('utf-8') f.close() return content def getPicURL(html): #reg = r"http://img3.douban.com/view/photo/thumb/public/p\d+\.jpg" reg1 = r"http://www.douban.com/online/11865076/photo/\d+/\?sortby=time" picURLs = re.findall(reg1, html) return picURLs def openPic(picURL): try: html = getHtml(picURL) reg = r'<img src="http://img\d{1}.douban.com/view/photo/photo/public/p\d{10}\.jpg"' picURL = re.findall(reg, html) #print(picURL) picURL_open = picURL[0].split('"') except: print("!!!!!!!!!!!!!!!!!!!!!WARING:AN ERROR HAPPENED while openPic!!!!!!!!!!!!!!!!!!!!!") return picURL_open[1] def picDownload(picURLs, page_num): try: download_img = '' dirs = os.listdir("C:\\Users\\Lyle\\Desktop\\douPIC") for picURL in picURLs: picURL_new = openPic(picURL) if picURL_new[-15:] not in dirs: file_name = picURL_new[-15:] download_img = urllib.request.urlretrieve(picURL_new, "C:\\Users\\Lyle\\Desktop\\douPIC\\%s" % (file_name)) dirs.append(file_name) else: file_name = "副本" + str(picURL_new[-15:]) download_img = urllib.request.urlretrieve(picURL_new, "C:\\Users\\Lyle\\Desktop\\douPIC\\%s" % (file_name)) time.sleep(1) print("第%d页 第%d张 ......%s......... downloaded" % (page_num+1, picURLs.index(picURL)+1, picURL_new[-15:])) except: print("!!!!!!!!!!!!!!!!!!!!!WARING:AN ERROR happened wile picDownload!!!!!!!!!!!!!!!!!!!!!") return download_img if __name__ == '__main__': num = 0 page_num = 0 while True: html = getHtml(r'http://www.douban.com/online/11865076/album/137771083/?start=%d&sortby=time' % (num+page_num*90)) picURLs = getPicURL(html) print("**************第%d页下载开始***************" % (page_num+1)) picDownload(picURLs, page_num) print("**************第%d页下载完成***************" % (page_num+1)) page_num += 1