清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
import re
import os
import shutil
import urllib.request
home = "http://jingwei.supfree.net"
def find_txt(file, s):
b_find = False
if os.path.exists(file):
f = open(file, "r")
ls_txt = f.readlines()
f.close()
for l in ls_txt:
if l.find(s) >= 0:
b_find = True
break
return b_find
def append_txt(file, s):
f = open(file, "a")
f.write(s)
f.close()
def getjw(p2):
url = home + "/" + p2
page = urllib.request.urlopen(url).read()
try:
page = page.decode("gb2312")
except:
try:
page = page.decode("gbk")
except:
page = page.decode("utf-8")
ls = re.findall(re.compile('botitle18">(.+?)<'), page)
if len(ls) == 2:
return ls[0].strip(" "), ls[1].strip(" ")
def dwon_city(s_I, s_II, p):
file = "jingwei.txt"
url = home + "/" + p
page = urllib.request.urlopen(url).read()
try:
page = page.decode("gb2312")
except:
try:
page = page.decode("gbk")
except:
page = page.decode("utf-8")
ls1 = re.findall(re.compile('href="(mengzi\.asp.+?)"'), page)
ls2 = re.findall(re.compile('经纬度">(.+?)</a'), page)
#print(len(ls1))
#print(len(ls2))
for i in range(len(ls1)):
#print(" " + ls2[i] + " " + ls1[i])
if not find_txt(file, s_I + " " + s_II + " " + ls2[i]):
(j, w) = getjw(ls1[i])
print(" " + ls2[i] + " " + j + " " + w)
s3 = s_I + " " + s_II + " " + ls2[i] + " " + j + " " + w + "\n"
append_txt(file, s3)
else:
print(" " + ls2[i] + " 已存在")
if __name__ == "__main__":
#try:
url = home
page = urllib.request.urlopen(url).read()
page = page.decode("gb2312")
page = page.replace("\r\n", "")
page = page.replace(" ", "")
ls = re.findall(re.compile('class="bredbotitle14">(.+?)</a><'), page)
print(len(ls))
file = "log.txt"
for l in ls:
i_s = l.find("<")
s_I = l[:i_s]
print(s_I)
if find_txt(file, s_I):
continue
s2 = l[i_s:] + "<"
ls2 = re.findall(re.compile('href="(.+?)<'), s2)
for l2 in ls2:
l2 = l2.replace('">', " ")
ls3 = l2.split(" ")
#if l2 == "|" or l2.find(">") > 0:
# continue
if len(ls3) != 2:
continue
print(" " + ls3[1] + " " + ls3[0])
s_II = ls3[1]
if s_II.find("?") >= 0:
continue
dwon_city(s_I, s_II, ls3[0])
append_txt(file, s_I + "\n")
#except:
# print("error!")
print("finished!")