下载全国城市经纬度

清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>


import re
import os
import shutil
import urllib.request

home = "http://jingwei.supfree.net"

def find_txt(file, s):
    b_find = False
    if os.path.exists(file):
        f = open(file, "r")
        ls_txt = f.readlines()
        f.close()     
        
        for l in ls_txt:
            if l.find(s) >= 0:
                b_find = True
                break
    return b_find

def append_txt(file, s):
    f = open(file, "a")
    f.write(s)
    f.close()    

def getjw(p2):
    url = home + "/" + p2
    page = urllib.request.urlopen(url).read()
    try:
        page = page.decode("gb2312")   
    except:
        try:
            page = page.decode("gbk")
        except:
            page = page.decode("utf-8")
    ls = re.findall(re.compile('botitle18">(.+?)<'), page)
    if len(ls) == 2:
        return ls[0].strip(" "), ls[1].strip(" ")


def dwon_city(s_I, s_II, p):
    file = "jingwei.txt"
    url = home + "/" + p
    page = urllib.request.urlopen(url).read()
    try:
        page = page.decode("gb2312")   
    except:
        try:
            page = page.decode("gbk")
        except:
            page = page.decode("utf-8")
    
    ls1 = re.findall(re.compile('href="(mengzi\.asp.+?)"'), page)
    ls2 = re.findall(re.compile('经纬度">(.+?)</a'), page)
    #print(len(ls1))
    #print(len(ls2))
    for i in range(len(ls1)):
        #print("        " + ls2[i] + " " + ls1[i])
        if not find_txt(file, s_I + " " + s_II + " " + ls2[i]):
            (j, w) = getjw(ls1[i])
            print("    " + ls2[i] + " " + j + " " + w)
            s3 = s_I + " " + s_II + " " + ls2[i] + " " + j + " " + w + "\n"
            append_txt(file, s3)
        else:
            print("    " + ls2[i] + " 已存在")


if __name__ == "__main__":
    #try:
    url = home
    page = urllib.request.urlopen(url).read()
    page = page.decode("gb2312")
    
    page = page.replace("\r\n", "")
    page = page.replace(" ", "")
    ls = re.findall(re.compile('class="bredbotitle14">(.+?)</a><'), page)
    print(len(ls))
    file = "log.txt"
    
    for l in ls:
        i_s = l.find("<")
        s_I = l[:i_s]
        print(s_I)
        if find_txt(file, s_I):
            continue
        
        s2 = l[i_s:] + "<"
        ls2 = re.findall(re.compile('href="(.+?)<'), s2)
        for l2 in ls2:
            l2 = l2.replace('">', " ")
            ls3 = l2.split(" ")
            #if l2 == "|" or l2.find(">") > 0:
            #    continue
            if len(ls3) != 2:
                continue
            print("  " + ls3[1] + " " + ls3[0])
            s_II = ls3[1]
            if s_II.find("?") >= 0:
                continue
            
            dwon_city(s_I, s_II, ls3[0])
        append_txt(file, s_I + "\n")
    #except:
    #    print("error!")
    print("finished!")