python分析apache访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python
# coding=utf-8
 
#------------------------------------------------------
# Name:     Apache 日志分析脚本
# Purpose:   此脚本只用来分析Apache的访问日志
# Version:   2.0
# Author:    LEO
# Created:   2013-4-26
# Modified:   2013-5-4
# Copyright:  (c) LEO 2013
#------------------------------------------------------
 
import sys
import time
 
#该类是用来打印格式
class displayFormat(object):
 
  def format_size(self,size):
    '''格式化流量单位'''
    KB = 1024     
    MB = 1048576    
    GB = 1073741824  
    TB = 1099511627776
    if size >= TB :
      size = str(size / TB) + 'T'
    elif size < KB :
      size = str(size) + 'B'
    elif size >= GB and size < TB:
      size = str(size / GB) + 'G'
    elif size >= MB and size < GB :
      size = str(size / MB) + 'M'
    else :
      size = str(size / KB) + 'K'
    return size
 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
 
  def transverse_line(self) :
    '''输出横线'''
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
 
  def head(self):
    '''输出头部信息'''
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
 
  def error_print(self) :
    '''输出错误信息'''
    print
    print 'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
    print
    sys.exit(1)
 
  def execut_time(self):
    '''输出脚本执行的时间'''
    print
    print "Script Execution Time: %.3f second" % time.clock()
    print
 
#该类是用来生成主机信息的字典
class hostInfo(object):
   
  host_info = ['200','404','500','302','304','503','403','times','size']
 
  def __init__(self,host):
    self.host = host = {}.fromkeys(self.host_info,0)
 
  def increment(self,status_times_size,is_size):
    '''该方法是用来给host_info中的各个值加1'''
    if status_times_size == 'times':
      self.host['times'] += 1
    elif is_size:
      self.host['size'] = self.host['size'] + status_times_size
    else:
      self.host[status_times_size] += 1
 
  def get_value(self,value):
    '''该方法是取到各个主机信息中对应的值'''
    return self.host[value]
 
#该类是用来分析文件
class fileAnalysis(object):
  def __init__(self):
    '''初始化一个空字典'''
    self.report_dict = {}
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
 
  def split_eachline_todict(self,line):
    '''分割文件中的每一行,并返回一个字典'''
    split_line = line.split()
    split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
    return split_dict
 
  def generate_log_report(self,logfile):
    '''读取文件,分析split_eachline_todict方法生成的字典'''
    for line in logfile:
      try:
        line_dict = self.split_eachline_todict(line)
        host = line_dict['remote_host']
        status = line_dict['status']
      except ValueError :
        continue
      except IndexError :
        continue
 
      if host not in self.report_dict :
        host_info_obj = hostInfo(host)
        self.report_dict[host] = host_info_obj
      else :
        host_info_obj = self.report_dict[host]
 
      host_info_obj.increment('times',False)  
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try:
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError:
        bytes_sent = 0
      host_info_obj.increment(bytes_sent,True)
    return self.report_dict
 
  def return_sorted_list(self,true_dict):
    '''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
    for host_key in true_dict :
      host_value = true_dict[host_key]
      times = host_value.get_value('times') 
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size') 
      self.total_traffic = self.total_traffic + size 
 
      o200 = host_value.get_value('200')
      o404 = host_value.get_value('404')
      o500 = host_value.get_value('500')
      o403 = host_value.get_value('403')
      o302 = host_value.get_value('302')
      o304 = host_value.get_value('304')
      o503 = host_value.get_value('503')
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size}
 
      self.total_200 = self.total_200 + o200
      self.total_404 = self.total_404 + o404
      self.total_500 = self.total_500 + o500
      self.total_302 = self.total_302 + o302
      self.total_304 = self.total_304 + o304
      self.total_503 = self.total_503 + o503
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
    return sorted_list
 
class Main(object):
  def main(self) :
    '''主调函数'''
    display_format = displayFormat()
    arg_length = len(sys.argv)
    if arg_length == 1 :
      display_format.error_print()
    elif arg_length == 2 or arg_length == 3:
      infile_name = sys.argv[1]
      try :
        infile = open(infile_name,'r')
        if arg_length == 3 :
          lines = int(sys.argv[2])
        else :
          lines = 0
      except IOError,e :
        print
        print e
        display_format.error_print()
      except ValueError :
        print
        print "Please Enter A Volid Number !!"
        display_format.error_print()
    else :
      display_format.error_print()
 
    fileAnalysis_obj = fileAnalysis()
    not_true_dict = fileAnalysis_obj.generate_log_report(infile)
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
    total_ip = len(log_report)
    if lines :
      log_report = log_report[0:lines]
    infile.close()
 
    print
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
    total_request_times = fileAnalysis_obj.total_request_times
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times)
    print
    display_format.head()
    display_format.transverse_line()
 
    for host in log_report :
      times = host[1]['times']
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503'])
                         
    if (not lines) or total_ip == lines :
      display_format.transverse_line()
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503)
    display_format.execut_time()
 
if __name__ == '__main__':
  main_obj = Main()
  main_obj.main()
Python 相关文章推荐
python计数排序和基数排序算法实例
Apr 25 Python
Python简单计算给定某一年的某一天是星期几示例
Jun 27 Python
python组合无重复三位数的实例
Nov 13 Python
Python完成哈夫曼树编码过程及原理详解
Jul 29 Python
python如何获取apk的packagename和activity
Jan 10 Python
python实现单张图像拼接与批量图片拼接
Mar 23 Python
使用Python将Exception异常错误堆栈信息写入日志文件
Apr 08 Python
Python pandas 列转行操作详解(类似hive中explode方法)
May 18 Python
在keras里面实现计算f1-score的代码
Jun 15 Python
python和JavaScript哪个容易上手
Jun 23 Python
django 实现后台从富文本提取纯文本
Jul 02 Python
python如何运行js语句
Sep 09 Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
python持久性管理pickle模块详细介绍
Feb 18 #Python
You might like
解决CodeIgniter伪静态失效
2014/06/09 PHP
PHP使用Pear发送邮件(Windows环境)
2016/01/05 PHP
js实现动态添加、删除行、onkeyup表格求和示例
2013/08/18 Javascript
raphael.js绘制中国地图 地图绘制方法
2014/02/12 Javascript
jQuery实现自动与手动切换的滚动新闻特效代码分享
2015/08/27 Javascript
AngularJS中实现显示或隐藏动画效果的方式总结
2015/12/31 Javascript
全面总结Javascript对数组对象的各种操作
2017/01/22 Javascript
jQuery实现简单漂亮的Nav导航菜单效果
2017/03/29 jQuery
Bootstrap.css与layDate日期选择样式起冲突的解决办法
2017/04/07 Javascript
Angular 4.x中表单Reactive Forms详解
2017/04/25 Javascript
使用ES6语法重构React代码详解
2017/05/09 Javascript
浅谈JavaScript的innerWidth与innerHeight
2017/10/12 Javascript
打通前后端构建一个Vue+Express的开发环境
2018/07/17 Javascript
浅谈针对Vue相同路由不同参数的刷新问题
2018/09/29 Javascript
原生JS forEach()和map()遍历的区别、兼容写法及jQuery $.each、$.map遍历操作
2019/02/27 jQuery
vue通过video.js解决m3u8视频播放格式的方法
2019/07/30 Javascript
JavaScript数值类型知识汇总
2019/11/17 Javascript
使用vue cli4.x搭建vue项目的过程详解
2020/05/08 Javascript
[02:33]2014DOTA2 TI每日综述 LGD涉险晋级DK闯入胜者组
2014/07/14 DOTA
python访问mysql数据库的实现方法(2则示例)
2016/01/06 Python
Python迭代器和生成器定义与用法示例
2018/02/10 Python
使用django-crontab实现定时任务的示例
2018/02/26 Python
最新自我评价范文
2013/11/16 职场文书
应届生自荐信范文
2014/02/21 职场文书
2014年公司迎新年活动方案
2014/02/24 职场文书
行政人事专员岗位职责
2014/03/05 职场文书
新教师培训方案
2014/06/08 职场文书
小学生感恩老师演讲稿
2014/08/28 职场文书
开展批评与自我批评发言稿
2014/10/16 职场文书
大学生实习推荐信
2015/03/27 职场文书
治理商业贿赂工作总结
2015/08/10 职场文书
PyQt5爬取12306车票信息程序的实现
2021/05/14 Python
解读Vue组件注册方式
2021/05/15 Vue.js
JS实现扫雷项目总结
2021/05/19 Javascript
Win11如何启用启动修复 ? Win11执行启动修复的三种方法
2022/04/08 数码科技
MyBatis在注解上使用动态SQL方式(@select使用if)
2022/07/07 Java/Android