清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
def URLtoUTF8(string): """""" g_code_type = ['utf-8', 'utf8', 'gb18030', 'gb2312', 'gbk', 'ISO-8859-2'] try: tmp = urllib.unquote(str(string)) code = chardet.detect(tmp)['encoding'] try: g_code_type.index(code.lower()) tmp = tmp.decode(code) except: try: tmp = tmp.decode('utf8') except: tmp = tmp.decode('gb18030') except: tmp = json.dumps(string) tmp = tmp.replace(u'"','') pass tmp = tmp.replace(u'\xa0',' ') return tmp