清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import http.cookiejar import urllib.request import urllib.parse from PIL import Image import gzip,re,random class Crawler_self: def __init__(self): self.Cr_header_data = { 'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 'Accept-Encoding':'gzip, deflate, sdch', 'Accept-Language':'zh-CN,zh;q=0.8', 'Cache-Control':'no-cache', 'Connection':'keep-alive', 'Pragma':'no-cache', 'Upgrade-Insecure-Requests':'1', 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36' } def Cr_get_web_list(self,re_data): #取得网页给定规则数据列表 cer = re.compile(r'<p\s+class="MsoNormal".*?>(\d\d\d\d\d\d)<span>') strlist = cer.findall(re_data) return strlist def Cr_get_cookies_opener(self,url_1): #取得cookies cj = http.cookiejar.CookieJar() pro = urllib.request.HTTPCookieProcessor(cj) opener = urllib.request.build_opener(pro) header = [] for key,value in self.Cr_header_data.items(): #加载请求头 elem = (key,value) header.append(elem) opener.addheaders = header return opener.open(url_1) def Cr_read_data(self,data,de_code='utf-8'): return self.Cr_ungzip(data.read()).decode(de_code) def Cr_ungzip(self,re_data): #解压网页 try: data = gzip.decompress(re_data) except: print('无需解压') return data def Cr_get_ares_list(self,url,de_code='utf-8'): b = self.Cr_get_cookies_opener(url) c = self.Cr_read_data(b,de_code) ares_list = self.Cr_get_web_list(c) #身份证前6位数字 return ares_list class Plan_Idcard: def __init__(self): self.W=[7,9,10,5,8,4,2,1,6,3,7,9,10,5,8,4,2] self.ID_num=[18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2] self.D_CHECK=['1','0','X','9','8','7','6','5','4','3','2'] self.ID_aXw = 0 def Plan_id_card(self,ID_1,id_num=10): # data = [] for each in range(id_num): self.ID_aXw = 0 a = '%03d'% random.randint(0,999) ID=ID_1+a for i in range(len(self.W)): self.ID_aXw = self.ID_aXw + int(ID[i])*self.W[i] ID_mod = self.ID_aXw % 11 a = ID + self.D_CHECK[ID_mod] data.append(a) print(data) def get_year_moth_day(self,old = 10): #获得年月日 GD_YEAR = 2015 #以2015年计算 cc = int(old) while True: dd = random.randint(1,10) if dd%2 == 0: cc = GD_YEAR - cc + dd #取年 else: cc = GD_YEAR - cc - dd if cc >= GD_YEAR: print('...你输的年岁^_^过少.自动取随机年岁..') cc = 11 continue else: break dd = random.randint(1,12) #取月 if (dd in (1,3,5,7,8,10,12)): aa = random.randint(1,31) #取日 else: aa = random.randint(1,30) if dd == 2: if (cc%4==0) and (cc%100==0): aa = random.randint(1,29) else: aa = random.randint(1,28) bb= '%04d%02d%02d' %(cc,dd,aa) return(bb) if __name__ == '__main__': #最新县及县以上行政区划代码(截止2014年10月31日) url_1 = 'http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201504/t20150415_712722.html' a = Crawler_self() c = a.Cr_get_ares_list(url_1) while True: get_num = input('输要获取多少个身份证:') if get_num.isalnum() != True: print('请输入合法的数字') continue get_num = int(get_num)//10 if get_num==0: get_num = 1 year_old = input('请输入大约年龄:') if year_old.isalnum() != True: print('请输入合法的数字') continue for each in range(0,get_num): a_a = c[random.randint(0,len(c)-1)] dd = Plan_Idcard() b_b = dd.get_year_moth_day(year_old) dd.Plan_id_card(a_a+b_b) break