清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import http.cookiejar
import urllib.request
import urllib.parse
from PIL import Image
import gzip,re,random
class Crawler_self:
def __init__(self):
self.Cr_header_data = {
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
'Accept-Encoding':'gzip, deflate, sdch',
'Accept-Language':'zh-CN,zh;q=0.8',
'Cache-Control':'no-cache',
'Connection':'keep-alive',
'Pragma':'no-cache',
'Upgrade-Insecure-Requests':'1',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/45.0.2454.101 Safari/537.36'
}
def Cr_get_web_list(self,re_data): #取得网页给定规则数据列表
cer = re.compile(r'<p\s+class="MsoNormal".*?>(\d\d\d\d\d\d)<span>')
strlist = cer.findall(re_data)
return strlist
def Cr_get_cookies_opener(self,url_1): #取得cookies
cj = http.cookiejar.CookieJar()
pro = urllib.request.HTTPCookieProcessor(cj)
opener = urllib.request.build_opener(pro)
header = []
for key,value in self.Cr_header_data.items(): #加载请求头
elem = (key,value)
header.append(elem)
opener.addheaders = header
return opener.open(url_1)
def Cr_read_data(self,data,de_code='utf-8'):
return self.Cr_ungzip(data.read()).decode(de_code)
def Cr_ungzip(self,re_data): #解压网页
try:
data = gzip.decompress(re_data)
except:
print('无需解压')
return data
def Cr_get_ares_list(self,url,de_code='utf-8'):
b = self.Cr_get_cookies_opener(url)
c = self.Cr_read_data(b,de_code)
ares_list = self.Cr_get_web_list(c) #身份证前6位数字
return ares_list
class Plan_Idcard:
def __init__(self):
self.W=[7,9,10,5,8,4,2,1,6,3,7,9,10,5,8,4,2]
self.ID_num=[18,17,16,15,14,13,12,11,10,9,8,7,6,5,4,3,2]
self.D_CHECK=['1','0','X','9','8','7','6','5','4','3','2']
self.ID_aXw = 0
def Plan_id_card(self,ID_1,id_num=10): #
data = []
for each in range(id_num):
self.ID_aXw = 0
a = '%03d'% random.randint(0,999)
ID=ID_1+a
for i in range(len(self.W)):
self.ID_aXw = self.ID_aXw + int(ID[i])*self.W[i]
ID_mod = self.ID_aXw % 11
a = ID + self.D_CHECK[ID_mod]
data.append(a)
print(data)
def get_year_moth_day(self,old = 10): #获得年月日
GD_YEAR = 2015 #以2015年计算
cc = int(old)
while True:
dd = random.randint(1,10)
if dd%2 == 0:
cc = GD_YEAR - cc + dd #取年
else:
cc = GD_YEAR - cc - dd
if cc >= GD_YEAR:
print('...你输的年岁^_^过少.自动取随机年岁..')
cc = 11
continue
else:
break
dd = random.randint(1,12) #取月
if (dd in (1,3,5,7,8,10,12)):
aa = random.randint(1,31) #取日
else:
aa = random.randint(1,30)
if dd == 2:
if (cc%4==0) and (cc%100==0):
aa = random.randint(1,29)
else:
aa = random.randint(1,28)
bb= '%04d%02d%02d' %(cc,dd,aa)
return(bb)
if __name__ == '__main__':
#最新县及县以上行政区划代码(截止2014年10月31日)
url_1 = 'http://www.stats.gov.cn/tjsj/tjbz/xzqhdm/201504/t20150415_712722.html'
a = Crawler_self()
c = a.Cr_get_ares_list(url_1)
while True:
get_num = input('输要获取多少个身份证:')
if get_num.isalnum() != True:
print('请输入合法的数字')
continue
get_num = int(get_num)//10
if get_num==0:
get_num = 1
year_old = input('请输入大约年龄:')
if year_old.isalnum() != True:
print('请输入合法的数字')
continue
for each in range(0,get_num):
a_a = c[random.randint(0,len(c)-1)]
dd = Plan_Idcard()
b_b = dd.get_year_moth_day(year_old)
dd.Plan_id_card(a_a+b_b)
break