nginx_or_php_log_tool.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354
  1. # coding: utf-8
  2. import gzip
  3. import sys
  4. import os
  5. import re
  6. starttime= '21:00:01'
  7. endtime = '23:59:59'
  8. date = '23'
  9. keywords = 'api/v1/get_index_ad'
  10. file_type = 'gz' #'gz' or ''
  11. log_type = 'nginx' #'fpm' or 'nginx'
  12. filename = 'api.test.com.access.log-20160523.gz'
  13. if file_type == 'gz':
  14. logfile = gzip.GzipFile(filename)
  15. else:
  16. logfile = open(filename,'r')
  17. result_log = os.getcwd() + '/' + starttime + '-' + endtime + '.log'
  18. def time_interval(starttime,endtime,line):
  19. if len(line) > 5:
  20. if log_type == 'fpm':
  21. line_group = re.search('\s(\d+)/\w{3}/\d{4}:(\d{2}:\d{2}:\d{2})\s\+0800\s(.*?)\s',line)
  22. if log_type == 'nginx':
  23. line_group = re.search('\[(\d+)/\w{3}/\d{4}:(\d{2}:\d{2}:\d{2})',line)
  24. line_date = line_group.group(1)
  25. line_time = line_group.group(2)
  26. if line_date == date:
  27. if starttime <= line_time and line_time <= endtime:
  28. if keywords:
  29. if keywords in line:
  30. return line,line_time
  31. else:
  32. return None,line_time
  33. return line,line_time
  34. if line_time > endtime:
  35. sys.exit(0)
  36. return None,line_time
  37. line = logfile.readline()
  38. n = 0
  39. while line:
  40. result,line_time = time_interval(starttime,endtime,line)
  41. if result:
  42. with open(result_log,'a+') as f:
  43. f.write(line)
  44. n += 1
  45. if not n % 5000000:
  46. print n
  47. print line_time
  48. line = logfile.readline()
  49. print n
  50. logfile.close()
  51. f.close()