下载png图标素材

2015/10/21 38791

import re
import os
import urllib.request

home = "http://sc.chinaz.com"

def downIco(tit, url):
    page = urllib.request.urlopen(url).read()
    page = page.decode("utf-8")

    s_key = r'href="(http://demo.sc.chinaz.com.+?)"'
    re_c = re.compile(s_key)
    ls = re.findall(re_c, page)
    i = 0
    for l in ls:
        i += 1
        
        (path, file) = os.path.split(l)
        print("(" + str(i) + "/" + str(len(ls)) + ") " + file)
        if file.find('ico') > 0:
            continue        
        if not os.path.exists(tit + "/" + file):
            try:
                urllib.request.urlretrieve(l, tit + "/" + file)
            except:
                print("Error!")
def getIndex(url):
    page = ""
    page = urllib.request.urlopen(url).read()
    page = page.decode("utf-8")
    
    s_key = r'\<p\>\<a(.+?)\</p\>'
    re_c = re.compile(s_key)
    ls = re.findall(re_c, page)
    i = 0
    for l in ls:
        i += 1
        s1 = l.split('"')
        url = s1[3]
        tit = s1[5]
        img = s1[7]
        tit = tit.replace("图标", "").replace("下载", "")
        print("(" + str(i) + "/" + str(len(ls)) + ") " + url + ", " + tit + ", " + img)
        if not os.path.exists(tit):
            os.makedirs(tit)
        (path, file) = os.path.split(img)
        if not os.path.exists(tit + "/" + file):
            try:
                urllib.request.urlretrieve(img, tit + "/" + file)
            except:
                print("Error!")
        downIco(tit, home + url)
        
if __name__ == "__main__":

    tubiao = "/tubiao/"
    url = home + tubiao
    getIndex(url)
    
    for i in range(2, 10):
        url = home + tubiao + "index_" + str(i) + ".html"
        getIndex(url)
代码片段