清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
#coding:utf-8 import requests,xlrd,datetime,threading,sys from bs4 import BeautifulSoup reload(sys) sys.setdefaultencoding("utf-8") def beian(url,excel): f = xlrd.open_workbook(excel) sheet1 = f.sheet_by_name('Sheet1') num_cols = sheet1.ncols for curr_col in range(num_cols): rows = sheet1.col_values(curr_col) for each in rows: data = {'s': each,'guid': '1e4b4b3f-310f-4aaa-90f7-a552db48758d'} r = requests.post(url,data=data) soup = BeautifulSoup(r.content,'html.parser') tags = soup.find_all('div',id='contenthtml') try: for tag in tags: d_name = tag.find('td',class_='tdright').get_text() #print d_name print ('%s 已备案') % each except AttributeError: print ('%s 未备案') % each if __name__ == "__main__": url = 'http://tool.chinaz.com/beian.aspx' excel = (r'C:\\1.xlsx') threads = [] print "程序开始运行%s" % datetime.datetime.now() t1 = threading.Thread(target=beian,args=(url,excel)) threads.append(t1) for th in threads: th.setDaemon(True) th.start() th.join() print "程序结束时间%s" % datetime.datetime.now() #beian(url,excel)