清华大佬耗费三个月吐血整理的几百G的资源,免费分享!....>>>
#!/bin/env python #coding:utf-8 #统计nginx的访问ip和流量 #具有时间段分析功能 import sys import ip_location import time import re reload(sys) sys.setdefaultencoding('utf-8') #time_start=sys.argv[1] #time_start=sys.argv[2] ipflow={} ipnum={} #nginx日志 log_file="/data/logs/lolo.log" #时间的正则和格式 re_time='\d{2}\/\w{3}\/\d{4}:\d{2}:\d{2}:\d{2}' str_time='%d/%b/%Y:%H:%M:%S' #"时间段" class TimeParser(object): def __init__(self,re_time,str_time): self.re_time=re.compile(re_time) self.str_time=str_time def get(self,line): t=re.search(self.re_time,line).group(0) return time.mktime(time.strptime(t,self.str_time)) def inPeriod(self,line): t=self.get(line) return (t>time.mktime(time.strptime(start_time,self.str_time)) and t<time.mktime(time.strptime(end_time,self.str_time))) #处理函数 class ParseLog(object): def __init__(self,file_name): self.file_name=file_name self.re_time=re.compile(re_time) self.srt_time=str_time def show(self): fd=open(self.file_name,"r") contens=fd.readlines() fd.close() Time=TimeParser(self.re_time,self.srt_time) for line in contens: if Time.inPeriod(line): ip=line.split()[1] flow=line.split()[10] #采用集合 if ip in set(k.lower() for k in ipflow): ipnum[ip]+=1 ipflow[ip]=int(ipflow[ip])+int(flow) else: ipnum[ip]=1 ipflow[ip]=int(flow) for k in ipnum: name=ip_location.ip_location(k) print "访问IP:%s 访问次数:%d 访问流量%.3fK 归属地:%s" %(k,int(ipnum[k]),ipflow[k],name) if __name__ == "__main__": if len(sys.argv) != 3: print "输入的参数错误" sys.exit() start_time=sys.argv[1] end_time=sys.argv[2] p=ParseLog(log_file) p.show()