清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
# coding:utf-8
# python 2.7
import csv
import math
import sys
import os
from sys import exit
#平均分割CVS文件
def deal_csv(head,data,n,filepath):
name = filepath.split('.')[0]
line_number = 1000000
if int(math.ceil(len(data)/float(n)))<=line_number:
line_number = int(math.ceil(len(data)/float(n)))
else:
n = int(math.ceil(len(data)/float(line_number)))
temp = []
for i in range(len(data)-1,-1,-1):
temp.append(data[i])
if i==(n-1)*line_number:
reader = csv.writer(open("%s_%d.csv"%(name,n),'wb'))
reader.writerow(head)
for line in temp:
reader.writerow(line)
n-=1
temp=[]
print 'SourceFile -> %s'%(name)
print 'FilePath -> %s'%(os.getcwd())
print 'Status -> Success'
#读取文件内容并按行转化成数组
def read_csv(filepath,sign):
reader = csv.reader(file(filepath,'rb'))
data = []
if sign==',':
for line in reader:
data.append(line)
else:
for line in reader:
data.append(line[0].split(sign))
return data
def read_file(filepath,sign):
file = open(filepath)
try:
data = []
reader = file.readlines()
for line in reader:
data.append(line.strip('\n').split(sign))
return data
finally:
file.close()
if __name__=="__main__":
data = []
filepath=''
sign=''
if len(sys.argv)==3:
sign=','
elif len(sys.argv)==4:
sign=sys.argv[3]
else:
sys.exit()
#获取文件名
filepath=sys.argv[1]
#获取数据
data = read_file(filepath,sign)
#获取分割数
n=int(sys.argv[2])
deal_csv(data[0],data[1:],n,filepath)