# coding: utf-8 import gzip import sys import os import re starttime= '21:00:01' endtime = '23:59:59' date = '23' keywords = 'api/v1/get_index_ad' file_type = 'gz' #'gz' or '' log_type = 'nginx' #'fpm' or 'nginx' filename = 'api.test.com.access.log-20160523.gz' if file_type == 'gz': logfile = gzip.GzipFile(filename) else: logfile = open(filename,'r') result_log = os.getcwd() + '/' + starttime + '-' + endtime + '.log' def time_interval(starttime,endtime,line): if len(line) > 5: if log_type == 'fpm': line_group = re.search('\s(\d+)/\w{3}/\d{4}:(\d{2}:\d{2}:\d{2})\s\+0800\s(.*?)\s',line) if log_type == 'nginx': line_group = re.search('\[(\d+)/\w{3}/\d{4}:(\d{2}:\d{2}:\d{2})',line) line_date = line_group.group(1) line_time = line_group.group(2) if line_date == date: if starttime <= line_time and line_time <= endtime: if keywords: if keywords in line: return line,line_time else: return None,line_time return line,line_time if line_time > endtime: sys.exit(0) return None,line_time line = logfile.readline() n = 0 while line: result,line_time = time_interval(starttime,endtime,line) if result: with open(result_log,'a+') as f: f.write(line) n += 1 if not n % 5000000: print n print line_time line = logfile.readline() print n logfile.close() f.close()