apache-fake-log-gen.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697
  1. #!/usr/bin/python
  2. import time
  3. import datetime
  4. import pytz
  5. import numpy
  6. import random
  7. import gzip
  8. import zipfile
  9. import sys
  10. import argparse
  11. from faker import Faker
  12. from random import randrange
  13. #todo:
  14. # allow writing different patterns (Common Log, Apache Error log etc)
  15. # log rotation
  16. class switch(object):
  17. def __init__(self, value):
  18. self.value = value
  19. self.fall = False
  20. def __iter__(self):
  21. """Return the match method once, then stop"""
  22. yield self.match
  23. raise StopIteration
  24. def match(self, *args):
  25. """Indicate whether or not to enter a case suite"""
  26. if self.fall or not args:
  27. return True
  28. elif self.value in args: # changed for v1.5, see below
  29. self.fall = True
  30. return True
  31. else:
  32. return False
  33. parser = argparse.ArgumentParser(__file__, description="Fake Apache Log Generator")
  34. parser.add_argument("--output", "-o", dest='output_type', help="Write to a Log file, a gzip file or to STDOUT", choices=['LOG','GZ','CONSOLE'] )
  35. parser.add_argument("--num", "-n", dest='num_lines', help="Number of lines to generate (0 for infinite)", type=int, default=1)
  36. parser.add_argument("--prefix", "-p", dest='file_prefix', help="Prefix the output file name", type=str)
  37. args = parser.parse_args()
  38. log_lines = args.num_lines
  39. file_prefix = args.file_prefix
  40. output_type = args.output_type
  41. faker = Faker()
  42. timestr = time.strftime("%Y%m%d-%H%M%S")
  43. otime = datetime.datetime.now()
  44. outFileName = 'access_log_'+timestr+'.log' if not file_prefix else file_prefix+'_access_log_'+timestr+'.log'
  45. for case in switch(output_type):
  46. if case('LOG'):
  47. f = open(outFileName,'w')
  48. break
  49. if case('GZ'):
  50. f = gzip.open(outFileName+'.gz','w')
  51. break
  52. if case('CONSOLE'): pass
  53. if case():
  54. f = sys.stdout
  55. response=["200","404","500","301"]
  56. verb=["GET","POST","DELETE","PUT"]
  57. resources=["/list","/wp-content","/wp-admin","/explore","/search/tag/list","/app/main/posts","/posts/posts/explore","/apps/cart.jsp?appID="]
  58. ualist = [faker.firefox, faker.chrome, faker.safari, faker.internet_explorer, faker.opera]
  59. flag = True
  60. while (flag):
  61. increment = datetime.timedelta(seconds=random.randint(30,300))
  62. otime += increment
  63. ip = faker.ipv4()
  64. dt = otime.strftime('%d/%b/%Y:%H:%M:%S')
  65. tz = datetime.datetime.now(pytz.timezone('US/Pacific')).strftime('%z')
  66. vrb = numpy.random.choice(verb,p=[0.6,0.1,0.1,0.2])
  67. uri = random.choice(resources)
  68. if uri.find("apps")>0:
  69. uri += `random.randint(1000,10000)`
  70. resp = numpy.random.choice(response,p=[0.9,0.04,0.02,0.04])
  71. byt = int(random.gauss(5000,50))
  72. referer = faker.uri()
  73. useragent = numpy.random.choice(ualist,p=[0.5,0.3,0.1,0.05,0.05] )()
  74. f.write('%s - - [%s %s] "%s %s HTTP/1.0" %s %s "%s" "%s"\n' % (ip,dt,tz,vrb,uri,resp,byt,referer,useragent))
  75. log_lines = log_lines - 1
  76. flag = False if log_lines == 0 else True