清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
def URLtoUTF8(string):
""""""
g_code_type = ['utf-8', 'utf8', 'gb18030', 'gb2312', 'gbk', 'ISO-8859-2']
try:
tmp = urllib.unquote(str(string))
code = chardet.detect(tmp)['encoding']
try:
g_code_type.index(code.lower())
tmp = tmp.decode(code)
except:
try:
tmp = tmp.decode('utf8')
except:
tmp = tmp.decode('gb18030')
except:
tmp = json.dumps(string)
tmp = tmp.replace(u'"','')
pass
tmp = tmp.replace(u'\xa0',' ')
return tmp