python分析apache访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python
# coding=utf-8
 
#------------------------------------------------------
# Name:     Apache 日志分析脚本
# Purpose:   此脚本只用来分析Apache的访问日志
# Version:   2.0
# Author:    LEO
# Created:   2013-4-26
# Modified:   2013-5-4
# Copyright:  (c) LEO 2013
#------------------------------------------------------
 
import sys
import time
 
#该类是用来打印格式
class displayFormat(object):
 
  def format_size(self,size):
    '''格式化流量单位'''
    KB = 1024     
    MB = 1048576    
    GB = 1073741824  
    TB = 1099511627776
    if size >= TB :
      size = str(size / TB) + 'T'
    elif size < KB :
      size = str(size) + 'B'
    elif size >= GB and size < TB:
      size = str(size / GB) + 'G'
    elif size >= MB and size < GB :
      size = str(size / MB) + 'M'
    else :
      size = str(size / KB) + 'K'
    return size
 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
 
  def transverse_line(self) :
    '''输出横线'''
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
 
  def head(self):
    '''输出头部信息'''
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
 
  def error_print(self) :
    '''输出错误信息'''
    print
    print 'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
    print
    sys.exit(1)
 
  def execut_time(self):
    '''输出脚本执行的时间'''
    print
    print "Script Execution Time: %.3f second" % time.clock()
    print
 
#该类是用来生成主机信息的字典
class hostInfo(object):
   
  host_info = ['200','404','500','302','304','503','403','times','size']
 
  def __init__(self,host):
    self.host = host = {}.fromkeys(self.host_info,0)
 
  def increment(self,status_times_size,is_size):
    '''该方法是用来给host_info中的各个值加1'''
    if status_times_size == 'times':
      self.host['times'] += 1
    elif is_size:
      self.host['size'] = self.host['size'] + status_times_size
    else:
      self.host[status_times_size] += 1
 
  def get_value(self,value):
    '''该方法是取到各个主机信息中对应的值'''
    return self.host[value]
 
#该类是用来分析文件
class fileAnalysis(object):
  def __init__(self):
    '''初始化一个空字典'''
    self.report_dict = {}
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
 
  def split_eachline_todict(self,line):
    '''分割文件中的每一行,并返回一个字典'''
    split_line = line.split()
    split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
    return split_dict
 
  def generate_log_report(self,logfile):
    '''读取文件,分析split_eachline_todict方法生成的字典'''
    for line in logfile:
      try:
        line_dict = self.split_eachline_todict(line)
        host = line_dict['remote_host']
        status = line_dict['status']
      except ValueError :
        continue
      except IndexError :
        continue
 
      if host not in self.report_dict :
        host_info_obj = hostInfo(host)
        self.report_dict[host] = host_info_obj
      else :
        host_info_obj = self.report_dict[host]
 
      host_info_obj.increment('times',False)  
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try:
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError:
        bytes_sent = 0
      host_info_obj.increment(bytes_sent,True)
    return self.report_dict
 
  def return_sorted_list(self,true_dict):
    '''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
    for host_key in true_dict :
      host_value = true_dict[host_key]
      times = host_value.get_value('times') 
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size') 
      self.total_traffic = self.total_traffic + size 
 
      o200 = host_value.get_value('200')
      o404 = host_value.get_value('404')
      o500 = host_value.get_value('500')
      o403 = host_value.get_value('403')
      o302 = host_value.get_value('302')
      o304 = host_value.get_value('304')
      o503 = host_value.get_value('503')
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size}
 
      self.total_200 = self.total_200 + o200
      self.total_404 = self.total_404 + o404
      self.total_500 = self.total_500 + o500
      self.total_302 = self.total_302 + o302
      self.total_304 = self.total_304 + o304
      self.total_503 = self.total_503 + o503
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
    return sorted_list
 
class Main(object):
  def main(self) :
    '''主调函数'''
    display_format = displayFormat()
    arg_length = len(sys.argv)
    if arg_length == 1 :
      display_format.error_print()
    elif arg_length == 2 or arg_length == 3:
      infile_name = sys.argv[1]
      try :
        infile = open(infile_name,'r')
        if arg_length == 3 :
          lines = int(sys.argv[2])
        else :
          lines = 0
      except IOError,e :
        print
        print e
        display_format.error_print()
      except ValueError :
        print
        print "Please Enter A Volid Number !!"
        display_format.error_print()
    else :
      display_format.error_print()
 
    fileAnalysis_obj = fileAnalysis()
    not_true_dict = fileAnalysis_obj.generate_log_report(infile)
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
    total_ip = len(log_report)
    if lines :
      log_report = log_report[0:lines]
    infile.close()
 
    print
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
    total_request_times = fileAnalysis_obj.total_request_times
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times)
    print
    display_format.head()
    display_format.transverse_line()
 
    for host in log_report :
      times = host[1]['times']
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503'])
                         
    if (not lines) or total_ip == lines :
      display_format.transverse_line()
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503)
    display_format.execut_time()
 
if __name__ == '__main__':
  main_obj = Main()
  main_obj.main()
Python 相关文章推荐
python设置windows桌面壁纸的实现代码
Jan 28 Python
Python设计模式之享元模式原理与用法实例分析
Jan 11 Python
Python 多维List创建的问题小结
Jan 18 Python
Python numpy中矩阵的基本用法汇总
Feb 12 Python
PyQt QListWidget修改列表项item的行高方法
Jun 20 Python
Python实现二叉搜索树BST的方法示例
Jul 30 Python
Python调用钉钉自定义机器人的实现
Jan 03 Python
python批量处理txt文件的实例代码
Jan 13 Python
Python xlrd模块导入过程及常用操作
Jun 10 Python
Python pip安装第三方库实现过程解析
Jul 09 Python
pdf论文中python画的图Type 3 fonts字体不兼容的解决方案
Apr 24 Python
python模块与C和C++动态库相互调用实现过程示例
Nov 02 Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
python持久性管理pickle模块详细介绍
Feb 18 #Python
You might like
PHP Ajax实现无刷新附件上传
2016/08/17 PHP
微信推送功能实现方式图文详解
2019/07/12 PHP
jQuery技巧大放送 学习jquery的朋友可以看下
2009/10/14 Javascript
JQuery 选择器 xpath 语法应用
2010/05/13 Javascript
node.js中Socket.IO的进阶使用技巧
2014/11/04 Javascript
JS实现很酷的EMAIL地址添加功能实例
2015/02/28 Javascript
javascript比较两个日期相差天数的方法
2015/07/24 Javascript
JS组件Bootstrap按钮组与下拉按钮详解
2016/05/10 Javascript
H5用户注册表单页 注册模态框!
2016/09/17 Javascript
JS控制TreeView的结点选择
2016/11/11 Javascript
Vue.js结合bootstrap前端实现分页和排序效果
2018/12/29 Javascript
JS实现数组深拷贝的方法分析
2019/03/06 Javascript
JavaScript相等运算符的九条规则示例详解
2019/10/20 Javascript
利用React高阶组件实现一个面包屑导航的示例
2020/08/23 Javascript
wxpython中利用线程防止假死的实现方法
2014/08/11 Python
浅谈Python浅拷贝、深拷贝及引用机制
2016/12/15 Python
解决python读取几千万行的大表内存问题
2018/06/26 Python
Python使用pyserial进行串口通信的实例
2019/07/02 Python
Python实现微信小程序支付功能
2019/07/25 Python
Python脚本操作Excel实现批量替换功能
2019/11/20 Python
利用Python如何制作贪吃蛇及AI版贪吃蛇详解
2020/08/24 Python
全方位了解CSS3的Regions扩展
2015/08/07 HTML / CSS
Nike荷兰官方网站:Nike.com (NL)
2018/04/19 全球购物
prAna官网:瑜伽、旅行和冒险服装
2019/03/10 全球购物
定制别致的瑜伽垫:Sugarmat
2019/06/21 全球购物
献爱心大型公益活动策划方案
2014/09/15 职场文书
科长个人四风问题整改措施思想汇报
2014/10/13 职场文书
2015年七夕爱情寄语
2015/03/24 职场文书
2015夏季作息时间调整通知
2015/04/24 职场文书
防汛通知
2015/04/25 职场文书
仰望星空观后感
2015/06/10 职场文书
体育委员竞选稿
2015/11/21 职场文书
优质服务标语口号
2015/12/26 职场文书
2016年小学教师师德承诺书
2016/03/25 职场文书
Vue2项目中对百度地图的封装使用详解
2022/06/16 Vue.js
SpringBoot详解整合Redis缓存方法
2022/07/15 Java/Android