python分析apache访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python
# coding=utf-8
 
#------------------------------------------------------
# Name:     Apache 日志分析脚本
# Purpose:   此脚本只用来分析Apache的访问日志
# Version:   2.0
# Author:    LEO
# Created:   2013-4-26
# Modified:   2013-5-4
# Copyright:  (c) LEO 2013
#------------------------------------------------------
 
import sys
import time
 
#该类是用来打印格式
class displayFormat(object):
 
  def format_size(self,size):
    '''格式化流量单位'''
    KB = 1024     
    MB = 1048576    
    GB = 1073741824  
    TB = 1099511627776
    if size >= TB :
      size = str(size / TB) + 'T'
    elif size < KB :
      size = str(size) + 'B'
    elif size >= GB and size < TB:
      size = str(size / GB) + 'G'
    elif size >= MB and size < GB :
      size = str(size / MB) + 'M'
    else :
      size = str(size / KB) + 'K'
    return size
 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
 
  def transverse_line(self) :
    '''输出横线'''
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
 
  def head(self):
    '''输出头部信息'''
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
 
  def error_print(self) :
    '''输出错误信息'''
    print
    print 'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
    print
    sys.exit(1)
 
  def execut_time(self):
    '''输出脚本执行的时间'''
    print
    print "Script Execution Time: %.3f second" % time.clock()
    print
 
#该类是用来生成主机信息的字典
class hostInfo(object):
   
  host_info = ['200','404','500','302','304','503','403','times','size']
 
  def __init__(self,host):
    self.host = host = {}.fromkeys(self.host_info,0)
 
  def increment(self,status_times_size,is_size):
    '''该方法是用来给host_info中的各个值加1'''
    if status_times_size == 'times':
      self.host['times'] += 1
    elif is_size:
      self.host['size'] = self.host['size'] + status_times_size
    else:
      self.host[status_times_size] += 1
 
  def get_value(self,value):
    '''该方法是取到各个主机信息中对应的值'''
    return self.host[value]
 
#该类是用来分析文件
class fileAnalysis(object):
  def __init__(self):
    '''初始化一个空字典'''
    self.report_dict = {}
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
 
  def split_eachline_todict(self,line):
    '''分割文件中的每一行,并返回一个字典'''
    split_line = line.split()
    split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
    return split_dict
 
  def generate_log_report(self,logfile):
    '''读取文件,分析split_eachline_todict方法生成的字典'''
    for line in logfile:
      try:
        line_dict = self.split_eachline_todict(line)
        host = line_dict['remote_host']
        status = line_dict['status']
      except ValueError :
        continue
      except IndexError :
        continue
 
      if host not in self.report_dict :
        host_info_obj = hostInfo(host)
        self.report_dict[host] = host_info_obj
      else :
        host_info_obj = self.report_dict[host]
 
      host_info_obj.increment('times',False)  
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try:
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError:
        bytes_sent = 0
      host_info_obj.increment(bytes_sent,True)
    return self.report_dict
 
  def return_sorted_list(self,true_dict):
    '''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
    for host_key in true_dict :
      host_value = true_dict[host_key]
      times = host_value.get_value('times') 
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size') 
      self.total_traffic = self.total_traffic + size 
 
      o200 = host_value.get_value('200')
      o404 = host_value.get_value('404')
      o500 = host_value.get_value('500')
      o403 = host_value.get_value('403')
      o302 = host_value.get_value('302')
      o304 = host_value.get_value('304')
      o503 = host_value.get_value('503')
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size}
 
      self.total_200 = self.total_200 + o200
      self.total_404 = self.total_404 + o404
      self.total_500 = self.total_500 + o500
      self.total_302 = self.total_302 + o302
      self.total_304 = self.total_304 + o304
      self.total_503 = self.total_503 + o503
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
    return sorted_list
 
class Main(object):
  def main(self) :
    '''主调函数'''
    display_format = displayFormat()
    arg_length = len(sys.argv)
    if arg_length == 1 :
      display_format.error_print()
    elif arg_length == 2 or arg_length == 3:
      infile_name = sys.argv[1]
      try :
        infile = open(infile_name,'r')
        if arg_length == 3 :
          lines = int(sys.argv[2])
        else :
          lines = 0
      except IOError,e :
        print
        print e
        display_format.error_print()
      except ValueError :
        print
        print "Please Enter A Volid Number !!"
        display_format.error_print()
    else :
      display_format.error_print()
 
    fileAnalysis_obj = fileAnalysis()
    not_true_dict = fileAnalysis_obj.generate_log_report(infile)
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
    total_ip = len(log_report)
    if lines :
      log_report = log_report[0:lines]
    infile.close()
 
    print
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
    total_request_times = fileAnalysis_obj.total_request_times
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times)
    print
    display_format.head()
    display_format.transverse_line()
 
    for host in log_report :
      times = host[1]['times']
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503'])
                         
    if (not lines) or total_ip == lines :
      display_format.transverse_line()
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503)
    display_format.execut_time()
 
if __name__ == '__main__':
  main_obj = Main()
  main_obj.main()
Python 相关文章推荐
Python基于pygame实现的font游戏字体(附源码)
Nov 11 Python
Python编程中实现迭代器的一些技巧小结
Jun 21 Python
Python通过matplotlib绘制动画简单实例
Dec 13 Python
Python logging管理不同级别log打印和存储实例
Jan 19 Python
VSCode下好用的Python插件及配置
Apr 06 Python
django中模板的html自动转意方法
May 27 Python
python 定时器,轮询定时器的实例
Feb 20 Python
5款Python程序员高频使用开发工具推荐
Apr 10 Python
Python单例模式的四种创建方式实例解析
Mar 04 Python
python爬虫使用requests发送post请求示例详解
Aug 05 Python
如何利用python正则表达式匹配版本信息
Dec 09 Python
5个pandas调用函数的方法让数据处理更加灵活自如
Apr 24 Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
python持久性管理pickle模块详细介绍
Feb 18 #Python
You might like
也谈php网站在线人数统计
2008/04/09 PHP
php foreach、while性能比较
2009/10/15 PHP
探讨多键值cookie(php中cookie存取数组)的详解
2013/06/06 PHP
PHP基于MySQL数据库实现对象持久层的方法
2015/06/17 PHP
php实现购物车功能(上)
2020/07/23 PHP
深入解析PHP的Laravel框架中的event事件操作
2016/03/21 PHP
In Javascript Class, how to call the prototype method.(three method)
2007/01/09 Javascript
Flash对联广告的关闭按钮讨论
2007/01/30 Javascript
js 绑定带参数的事件以及手动触发事件
2010/04/27 Javascript
jQuery中removeAttr()方法用法实例
2015/01/05 Javascript
纯javascript响应式树形菜单效果
2015/11/10 Javascript
深入理解JavaScript中的对象复制(Object Clone)
2016/05/18 Javascript
jQuery的图片轮播插件PgwSlideshow使用详解
2016/08/11 Javascript
jQuery可见性过滤选择器用法示例
2016/09/09 Javascript
js实现表格筛选功能
2017/01/18 Javascript
vue项目tween方法实现返回顶部的示例代码
2018/03/02 Javascript
使用jquery模拟a标签的click事件无法实现跳转的解决
2018/12/04 jQuery
vue路由教程之静态路由
2019/09/03 Javascript
Vue记住滚动条和实现下拉加载的完美方法
2020/07/31 Javascript
[00:56]PWL开团时刻DAY8——追追追追追!
2020/11/09 DOTA
IntelliJ 中配置 Anaconda的过程图解
2020/06/01 Python
Python读取xlsx数据生成图标代码实例
2020/08/12 Python
爱尔兰领先的在线体育用品零售商:theGAAstore
2018/04/16 全球购物
服装设计专业自荐书范文
2013/12/30 职场文书
员工工作表现评语
2014/04/26 职场文书
产品包装策划方案
2014/05/18 职场文书
护士求职信
2014/07/05 职场文书
“四风”问题自我剖析材料思想汇报
2014/09/23 职场文书
党员个人自我剖析材料
2014/10/08 职场文书
农民工工资承诺书大全
2015/05/04 职场文书
2016大学生就业指导课心得体会
2016/01/15 职场文书
vue backtop组件的实现完整代码
2021/04/07 Vue.js
Innodb存储引擎中的后台线程详解
2022/04/03 MySQL
《王国之心》迎来了发售的20周年, 野村哲发布贺图
2022/04/11 其他游戏
使用MybatisPlus打印sql语句
2022/04/22 SQL Server
jdbc中自带MySQL 连接池实践示例
2022/07/23 MySQL