清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
#!/usr/bin/python
# -*- coding: utf-8 -*-
import os
import re
import shutil
import httplib
import traceback
import threading
title_re = re.compile('<title>(.+?)</title>', re.DOTALL)
album_re = re.compile('<td class="item" valign="top">所属专辑:</td>.*?title="(?P<album>.+?)">', re.DOTALL)
max_threads = 20
cache_filename = 'cache.txt'
cache = {}
def listFiles(rootDir, ext = None):
list_dirs = os.walk(rootDir)
list_ret = []
for root, dirs, files in list_dirs:
for f in files:
if not ext is None:
if not f.endswith(ext):
continue
list_ret.append(os.path.join(root, f))
return list_ret
def filterFiles(lst):
global cache
def _filter(x):
x = os.path.basename(x)
if x.find('.') >= 0:
return False
try:
int(x)
except:
return False
if x in cache:
if os.path.exists(cache[x]):
return False
return True
return filter(_filter, lst)
def readHTTPInfo(uri):
try:
conn = httplib.HTTPConnection("www.xiami.com")
url = '/song/%s' % uri
conn.request("GET", url)
r = conn.getresponse()
if r.status != 200:
print r.status, r.reason
# print r.read()
return '', ''
data = r.read()
conn.close()
match = title_re.search(data)
if match:
result = match.group(1)
pos2 = result.find('-')
pos = result.find(',', pos2)
if pos > 0:
result = result[:pos]
pos = result.rfind('-')
if pos > 0:
return result[:pos].strip(), result[pos+1:].strip()
print result.strip()
else:
print 'no title'
return '', ''
except Exception:
traceback.print_exc()
return '', ''
def utf2gbk(s):
return s.decode('utf8').encode('gbk')
def copy2RenameMp3(filename, idx = 0):
uri = os.path.basename(filename)
name, author = readHTTPInfo(uri)
mp3name = utf2gbk('%s - %s.mp3' % (author, name))
print filename, idx, '->', mp3name
if len(name) > 0 and len(author):
cache[uri] = mp3name
shutil.copy(filename, mp3name)
def readCache():
global cache
try:
fp = open(cache_filename, 'rb')
lines = fp.readlines()
for x in lines:
pos = x.find(' ')
if pos >= 0:
cache[x[:pos]] = x[pos + 1:].strip()
fp.close()
except:
pass
def writeCache():
global cache
if len(cache) == 0:
return
fp = open(cache_filename, 'wb')
lines = ['%s %s' % (k, v) for k, v in cache.iteritems()]
fp.write('\n'.join(lines))
fp.close()
def main():
readCache()
list_files = listFiles('.')
list_files = filterFiles(list_files)
threads = []
for filename in list_files:
copy2RenameMp3(filename)
# th = threading.Thread(target = copy2RenameMp3, args = (filename, len(threads) + 1), name = filename)
# threads.append(th)
if len(threads) > max_threads:
map(lambda th: th.start(), threads)
map(lambda th: th.join(), threads)
threads = []
if threads:
map(lambda th: th.start(), threads)
map(lambda th: th.join(), threads)
writeCache()
if __name__ == '__main__':
main()