python分析apache访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python
# coding=utf-8
 
#------------------------------------------------------
# Name:     Apache 日志分析脚本
# Purpose:   此脚本只用来分析Apache的访问日志
# Version:   2.0
# Author:    LEO
# Created:   2013-4-26
# Modified:   2013-5-4
# Copyright:  (c) LEO 2013
#------------------------------------------------------
 
import sys
import time
 
#该类是用来打印格式
class displayFormat(object):
 
  def format_size(self,size):
    '''格式化流量单位'''
    KB = 1024     
    MB = 1048576    
    GB = 1073741824  
    TB = 1099511627776
    if size >= TB :
      size = str(size / TB) + 'T'
    elif size < KB :
      size = str(size) + 'B'
    elif size >= GB and size < TB:
      size = str(size / GB) + 'G'
    elif size >= MB and size < GB :
      size = str(size / MB) + 'M'
    else :
      size = str(size / KB) + 'K'
    return size
 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
 
  def transverse_line(self) :
    '''输出横线'''
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
 
  def head(self):
    '''输出头部信息'''
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
 
  def error_print(self) :
    '''输出错误信息'''
    print
    print 'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
    print
    sys.exit(1)
 
  def execut_time(self):
    '''输出脚本执行的时间'''
    print
    print "Script Execution Time: %.3f second" % time.clock()
    print
 
#该类是用来生成主机信息的字典
class hostInfo(object):
   
  host_info = ['200','404','500','302','304','503','403','times','size']
 
  def __init__(self,host):
    self.host = host = {}.fromkeys(self.host_info,0)
 
  def increment(self,status_times_size,is_size):
    '''该方法是用来给host_info中的各个值加1'''
    if status_times_size == 'times':
      self.host['times'] += 1
    elif is_size:
      self.host['size'] = self.host['size'] + status_times_size
    else:
      self.host[status_times_size] += 1
 
  def get_value(self,value):
    '''该方法是取到各个主机信息中对应的值'''
    return self.host[value]
 
#该类是用来分析文件
class fileAnalysis(object):
  def __init__(self):
    '''初始化一个空字典'''
    self.report_dict = {}
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
 
  def split_eachline_todict(self,line):
    '''分割文件中的每一行,并返回一个字典'''
    split_line = line.split()
    split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
    return split_dict
 
  def generate_log_report(self,logfile):
    '''读取文件,分析split_eachline_todict方法生成的字典'''
    for line in logfile:
      try:
        line_dict = self.split_eachline_todict(line)
        host = line_dict['remote_host']
        status = line_dict['status']
      except ValueError :
        continue
      except IndexError :
        continue
 
      if host not in self.report_dict :
        host_info_obj = hostInfo(host)
        self.report_dict[host] = host_info_obj
      else :
        host_info_obj = self.report_dict[host]
 
      host_info_obj.increment('times',False)  
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try:
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError:
        bytes_sent = 0
      host_info_obj.increment(bytes_sent,True)
    return self.report_dict
 
  def return_sorted_list(self,true_dict):
    '''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
    for host_key in true_dict :
      host_value = true_dict[host_key]
      times = host_value.get_value('times') 
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size') 
      self.total_traffic = self.total_traffic + size 
 
      o200 = host_value.get_value('200')
      o404 = host_value.get_value('404')
      o500 = host_value.get_value('500')
      o403 = host_value.get_value('403')
      o302 = host_value.get_value('302')
      o304 = host_value.get_value('304')
      o503 = host_value.get_value('503')
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size}
 
      self.total_200 = self.total_200 + o200
      self.total_404 = self.total_404 + o404
      self.total_500 = self.total_500 + o500
      self.total_302 = self.total_302 + o302
      self.total_304 = self.total_304 + o304
      self.total_503 = self.total_503 + o503
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
    return sorted_list
 
class Main(object):
  def main(self) :
    '''主调函数'''
    display_format = displayFormat()
    arg_length = len(sys.argv)
    if arg_length == 1 :
      display_format.error_print()
    elif arg_length == 2 or arg_length == 3:
      infile_name = sys.argv[1]
      try :
        infile = open(infile_name,'r')
        if arg_length == 3 :
          lines = int(sys.argv[2])
        else :
          lines = 0
      except IOError,e :
        print
        print e
        display_format.error_print()
      except ValueError :
        print
        print "Please Enter A Volid Number !!"
        display_format.error_print()
    else :
      display_format.error_print()
 
    fileAnalysis_obj = fileAnalysis()
    not_true_dict = fileAnalysis_obj.generate_log_report(infile)
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
    total_ip = len(log_report)
    if lines :
      log_report = log_report[0:lines]
    infile.close()
 
    print
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
    total_request_times = fileAnalysis_obj.total_request_times
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times)
    print
    display_format.head()
    display_format.transverse_line()
 
    for host in log_report :
      times = host[1]['times']
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503'])
                         
    if (not lines) or total_ip == lines :
      display_format.transverse_line()
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503)
    display_format.execut_time()
 
if __name__ == '__main__':
  main_obj = Main()
  main_obj.main()
Python 相关文章推荐
Python中for循环详解
Jan 17 Python
python简单文本处理的方法
Jul 10 Python
Python批量创建迅雷任务及创建多个文件
Feb 13 Python
速记Python布尔值
Nov 09 Python
Pandas 合并多个Dataframe(merge,concat)的方法
Jun 08 Python
python使用PyQt5的简单方法
Feb 27 Python
python中类的输出或类的实例输出为这种形式的原因
Aug 12 Python
pygame实现五子棋游戏
Oct 29 Python
tensorboard实现同时显示训练曲线和测试曲线
Jan 21 Python
Django实现celery定时任务过程解析
Apr 21 Python
如何利用pycharm进行代码更新比较
Nov 04 Python
python 爬虫爬取京东ps4售卖情况
Dec 18 Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
python持久性管理pickle模块详细介绍
Feb 18 #Python
You might like
鸡肋的PHP单例模式应用详解
2013/06/03 PHP
PHP IE中下载附件问题解决方法
2014/01/07 PHP
PHP基于接口技术实现简单的多态应用完整实例
2017/04/26 PHP
jQuery '行 4954 错误: 不支持该属性或方法' 的问题解决方法
2011/01/19 Javascript
Jquery中的CheckBox、RadioButton、DropDownList的取值赋值实现代码
2011/10/12 Javascript
JavaScript 操作table,可以新增行和列并且隔一行换背景色代码分享
2013/07/05 Javascript
使用js简单实现了tree树菜单
2013/11/20 Javascript
javascript模拟map输出与去除重复项的方法
2015/02/09 Javascript
总结Javascript中的隐式类型转换
2016/08/24 Javascript
jQuery实现手机上输入后隐藏键盘功能
2017/01/04 Javascript
原生js实现放大镜特效
2017/03/08 Javascript
jquery图片放大镜效果
2017/06/23 jQuery
JS实现给json数组动态赋值的方法示例
2020/03/19 Javascript
JavaScript 用fetch 实现异步下载文件功能
2017/07/21 Javascript
Vue组件库发布到npm详解
2018/02/17 Javascript
编写React组件项目实践分析
2018/03/04 Javascript
LayerClose弹窗关闭刷新方法
2018/08/17 Javascript
vue实现pdf导出解决生成canvas模糊等问题(推荐)
2018/10/18 Javascript
extract-text-webpack-plugin用法详解
2019/02/14 Javascript
Vue+Express实现登录注销功能的实例代码
2019/05/05 Javascript
[03:40]DOTA2英雄梦之声_第01期_炼金术士
2014/06/23 DOTA
python验证码识别教程之利用投影法、连通域法分割图片
2018/06/04 Python
python和opencv实现抠图
2018/07/18 Python
python对视频画框标记后保存的方法
2018/12/07 Python
python3实现钉钉消息推送的方法示例
2019/03/14 Python
Pythony运维入门之Socket网络编程详解
2019/04/15 Python
使用Pandas对数据进行筛选和排序的实现
2019/07/29 Python
python web框架 django wsgi原理解析
2019/08/20 Python
Python爬虫爬取博客实现可视化过程解析
2020/06/29 Python
Python 整行读取文本方法并去掉readlines换行\n操作
2020/09/03 Python
英国领先的露营和露营车品牌之一:OLPRO
2019/08/06 全球购物
综合测评自我鉴定
2013/10/08 职场文书
2014年教师节座谈会发言稿
2014/09/10 职场文书
山东省召开党的群众路线教育实践活动总结大会新闻稿
2014/10/21 职场文书
2014年保洁工作总结
2014/11/24 职场文书
如何书写读后感?(附范文)
2019/07/26 职场文书