清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
#!/usr/bin/env python #-*- coding:utf-8-*- import urllib2 import re import hashlib import json #--------------------------------------------------- 工具 start def md5(str): ''' 计算MD5值 ''' m = hashlib.md5() m.update(str) return m.hexdigest() def search(regex, content, group = 1): ''' 搜索指定正则匹配的内容 ''' pattern = re.search(regex, content, re.DOTALL) if(pattern != None): return pattern.group(group) return '' def findall(regex, content): ''' 查找指定正则匹配的所有内容 ''' return re.findall(regex, content, re.DOTALL) def cleanHtmlTag(content): ''' 清理HTML标签 ''' return content or re.sub(r'<[^>]*?>', '', content).strip() def cleanedSearch(regex, content, group = 1): ''' 查找匹配的指定字符串并清除HTML标签 ''' return cleanHtmlTag(search(regex, content, group)) def httpGet(url, encoding='gbk'): ''' 发送Http GET请求,返回内容 ''' return urllib2.urlopen(url).read().decode(encoding, 'ignore').encode('utf-8') def toJson(dict): return json.dumps(dict, ensure_ascii=False, indent=4) #--------------------------------------------------- 工具 end