apache-fake-log-gen.py 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105
  1. #!/usr/bin/python
  2. import time
  3. import datetime
  4. import pytz
  5. import numpy
  6. import random
  7. import gzip
  8. import zipfile
  9. import sys
  10. import argparse
  11. from faker import Faker
  12. from random import randrange
  13. from tzlocal import get_localzone
  14. local = get_localzone()
  15. #todo:
  16. # allow writing different patterns (Common Log, Apache Error log etc)
  17. # log rotation
  18. class switch(object):
  19. def __init__(self, value):
  20. self.value = value
  21. self.fall = False
  22. def __iter__(self):
  23. """Return the match method once, then stop"""
  24. yield self.match
  25. raise StopIteration
  26. def match(self, *args):
  27. """Indicate whether or not to enter a case suite"""
  28. if self.fall or not args:
  29. return True
  30. elif self.value in args: # changed for v1.5, see below
  31. self.fall = True
  32. return True
  33. else:
  34. return False
  35. parser = argparse.ArgumentParser(__file__, description="Fake Apache Log Generator")
  36. parser.add_argument("--output", "-o", dest='output_type', help="Write to a Log file, a gzip file or to STDOUT", choices=['LOG','GZ','CONSOLE'] )
  37. parser.add_argument("--num", "-n", dest='num_lines', help="Number of lines to generate (0 for infinite)", type=int, default=1)
  38. parser.add_argument("--prefix", "-p", dest='file_prefix', help="Prefix the output file name", type=str)
  39. parser.add_argument("--sleep", "-s", help="Sleep this long between lines (in seconds)", default=0.0, type=float)
  40. args = parser.parse_args()
  41. log_lines = args.num_lines
  42. file_prefix = args.file_prefix
  43. output_type = args.output_type
  44. faker = Faker()
  45. timestr = time.strftime("%Y%m%d-%H%M%S")
  46. otime = datetime.datetime.now()
  47. outFileName = 'access_log_'+timestr+'.log' if not file_prefix else file_prefix+'_access_log_'+timestr+'.log'
  48. for case in switch(output_type):
  49. if case('LOG'):
  50. f = open(outFileName,'w')
  51. break
  52. if case('GZ'):
  53. f = gzip.open(outFileName+'.gz','w')
  54. break
  55. if case('CONSOLE'): pass
  56. if case():
  57. f = sys.stdout
  58. response=["200","404","500","301"]
  59. verb=["GET","POST","DELETE","PUT"]
  60. resources=["/list","/wp-content","/wp-admin","/explore","/search/tag/list","/app/main/posts","/posts/posts/explore","/apps/cart.jsp?appID="]
  61. ualist = [faker.firefox, faker.chrome, faker.safari, faker.internet_explorer, faker.opera]
  62. flag = True
  63. while (flag):
  64. if args.sleep:
  65. increment = datetime.timedelta(seconds=args.sleep)
  66. else:
  67. increment = datetime.timedelta(seconds=random.randint(30, 300))
  68. otime += increment
  69. ip = faker.ipv4()
  70. dt = otime.strftime('%d/%b/%Y:%H:%M:%S')
  71. tz = datetime.datetime.now(local).strftime('%z')
  72. vrb = numpy.random.choice(verb,p=[0.6,0.1,0.1,0.2])
  73. uri = random.choice(resources)
  74. if uri.find("apps")>0:
  75. uri += `random.randint(1000,10000)`
  76. resp = numpy.random.choice(response,p=[0.9,0.04,0.02,0.04])
  77. byt = int(random.gauss(5000,50))
  78. referer = faker.uri()
  79. useragent = numpy.random.choice(ualist,p=[0.5,0.3,0.1,0.05,0.05] )()
  80. f.write('%s - - [%s %s] "%s %s HTTP/1.0" %s %s "%s" "%s"\n' % (ip,dt,tz,vrb,uri,resp,byt,referer,useragent))
  81. log_lines = log_lines - 1
  82. flag = False if log_lines == 0 else True
  83. if args.sleep:
  84. time.sleep(args.sleep)