123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354 |
- # coding: utf-8
- import gzip
- import sys
- import os
- import re
- starttime= '21:00:01'
- endtime = '23:59:59'
- date = '23'
- keywords = 'api/v1/get_index_ad'
- file_type = 'gz' #'gz' or ''
- log_type = 'nginx' #'fpm' or 'nginx'
- filename = 'api.test.com.access.log-20160523.gz'
- if file_type == 'gz':
- logfile = gzip.GzipFile(filename)
- else:
- logfile = open(filename,'r')
- result_log = os.getcwd() + '/' + starttime + '-' + endtime + '.log'
- def time_interval(starttime,endtime,line):
- if len(line) > 5:
- if log_type == 'fpm':
- line_group = re.search('\s(\d+)/\w{3}/\d{4}:(\d{2}:\d{2}:\d{2})\s\+0800\s(.*?)\s',line)
- if log_type == 'nginx':
- line_group = re.search('\[(\d+)/\w{3}/\d{4}:(\d{2}:\d{2}:\d{2})',line)
- line_date = line_group.group(1)
- line_time = line_group.group(2)
- if line_date == date:
- if starttime <= line_time and line_time <= endtime:
- if keywords:
- if keywords in line:
- return line,line_time
- else:
- return None,line_time
- return line,line_time
- if line_time > endtime:
- sys.exit(0)
- return None,line_time
- line = logfile.readline()
- n = 0
- while line:
- result,line_time = time_interval(starttime,endtime,line)
- if result:
- with open(result_log,'a+') as f:
- f.write(line)
- n += 1
- if not n % 5000000:
- print n
- print line_time
- line = logfile.readline()
- print n
- logfile.close()
- f.close()
|