清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
# -*- coding:utf-8 -*-
import csv
import os
class csvread():
def __init__(self,filename,lstname):
self.lstname = lstname#lstname 用于比对csv中相同列
self.filename = filename
self.header = self.fields()
self.data = self.getdata()
self.dataset = self.getset()
#test update
#以字典方式获取csv数据
def getdata(self):
data = []
with open(self.filename,newline = "") as csvfile:
reader = csv.DictReader(csvfile)
for row in reader:
data.append(row)
return data
#获取序列
def getset(self):
lst = set()
for row in self.data:
lst.add(row.get(self.lstname))
return lst
#获取表头
def fields(self):
with open(self.filename,newline = "") as csvfile:
reader = csv.reader(csvfile)
fields = reader.__next__()
return fields
def __sub__(self,other):
#diff表示公共序列的差集,即表1中存在而表2不存在的列
diff = self.dataset - other.dataset
rdata = []
#根据diff生产基于表1的记录
for row in self.data:
if row[self.lstname] in diff:
rdata.append(row)
f1 = os.path.basename(self.filename)
f2 = os.path.basename(other.filename)
f1 = f1.split('.')[0]
f2 = f2.split('.')[0]
filename = f1+'_'+f2+'.csv'
filename = os.path.join(os.path.dirname(self.filename),filename)
#写入csv文件
with open(filename,'w',newline="") as csvfile:
writer = csv.DictWriter(csvfile,fieldnames=self.header)
writer.writeheader()
writer.writerows(rdata)
return filename
if __name__ == '__main__':
import sys
#print(sys.argv[0])
#for i in sys.argv:
filename1 = sys.argv[1]
print(filename1)
filename2 = sys.argv[2]
print(filename1,filename2)
file1 = csvread(filename1)
file2 = csvread(filename2)
file1-file2
file2-file1