python分析apache访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python
# coding=utf-8
 
#------------------------------------------------------
# Name:     Apache 日志分析脚本
# Purpose:   此脚本只用来分析Apache的访问日志
# Version:   2.0
# Author:    LEO
# Created:   2013-4-26
# Modified:   2013-5-4
# Copyright:  (c) LEO 2013
#------------------------------------------------------
 
import sys
import time
 
#该类是用来打印格式
class displayFormat(object):
 
  def format_size(self,size):
    '''格式化流量单位'''
    KB = 1024     
    MB = 1048576    
    GB = 1073741824  
    TB = 1099511627776
    if size >= TB :
      size = str(size / TB) + 'T'
    elif size < KB :
      size = str(size) + 'B'
    elif size >= GB and size < TB:
      size = str(size / GB) + 'G'
    elif size >= MB and size < GB :
      size = str(size / MB) + 'M'
    else :
      size = str(size / KB) + 'K'
    return size
 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s'
 
  def transverse_line(self) :
    '''输出横线'''
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10)
 
  def head(self):
    '''输出头部信息'''
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503')
 
  def error_print(self) :
    '''输出错误信息'''
    print
    print 'Usage : ' + sys.argv[0] + ' ApacheLogFilePath [Number]'
    print
    sys.exit(1)
 
  def execut_time(self):
    '''输出脚本执行的时间'''
    print
    print "Script Execution Time: %.3f second" % time.clock()
    print
 
#该类是用来生成主机信息的字典
class hostInfo(object):
   
  host_info = ['200','404','500','302','304','503','403','times','size']
 
  def __init__(self,host):
    self.host = host = {}.fromkeys(self.host_info,0)
 
  def increment(self,status_times_size,is_size):
    '''该方法是用来给host_info中的各个值加1'''
    if status_times_size == 'times':
      self.host['times'] += 1
    elif is_size:
      self.host['size'] = self.host['size'] + status_times_size
    else:
      self.host[status_times_size] += 1
 
  def get_value(self,value):
    '''该方法是取到各个主机信息中对应的值'''
    return self.host[value]
 
#该类是用来分析文件
class fileAnalysis(object):
  def __init__(self):
    '''初始化一个空字典'''
    self.report_dict = {}
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0
 
  def split_eachline_todict(self,line):
    '''分割文件中的每一行,并返回一个字典'''
    split_line = line.split()
    split_dict = {'remote_host':split_line[0],'status':split_line[-2],'bytes_sent':split_line[-1],}
    return split_dict
 
  def generate_log_report(self,logfile):
    '''读取文件,分析split_eachline_todict方法生成的字典'''
    for line in logfile:
      try:
        line_dict = self.split_eachline_todict(line)
        host = line_dict['remote_host']
        status = line_dict['status']
      except ValueError :
        continue
      except IndexError :
        continue
 
      if host not in self.report_dict :
        host_info_obj = hostInfo(host)
        self.report_dict[host] = host_info_obj
      else :
        host_info_obj = self.report_dict[host]
 
      host_info_obj.increment('times',False)  
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try:
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError:
        bytes_sent = 0
      host_info_obj.increment(bytes_sent,True)
    return self.report_dict
 
  def return_sorted_list(self,true_dict):
    '''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序'''
    for host_key in true_dict :
      host_value = true_dict[host_key]
      times = host_value.get_value('times') 
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size') 
      self.total_traffic = self.total_traffic + size 
 
      o200 = host_value.get_value('200')
      o404 = host_value.get_value('404')
      o500 = host_value.get_value('500')
      o403 = host_value.get_value('403')
      o302 = host_value.get_value('302')
      o304 = host_value.get_value('304')
      o503 = host_value.get_value('503')
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500,'403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size}
 
      self.total_200 = self.total_200 + o200
      self.total_404 = self.total_404 + o404
      self.total_500 = self.total_500 + o500
      self.total_302 = self.total_302 + o302
      self.total_304 = self.total_304 + o304
      self.total_503 = self.total_503 + o503
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],t[1]['size']),reverse=True)
    return sorted_list
 
class Main(object):
  def main(self) :
    '''主调函数'''
    display_format = displayFormat()
    arg_length = len(sys.argv)
    if arg_length == 1 :
      display_format.error_print()
    elif arg_length == 2 or arg_length == 3:
      infile_name = sys.argv[1]
      try :
        infile = open(infile_name,'r')
        if arg_length == 3 :
          lines = int(sys.argv[2])
        else :
          lines = 0
      except IOError,e :
        print
        print e
        display_format.error_print()
      except ValueError :
        print
        print "Please Enter A Volid Number !!"
        display_format.error_print()
    else :
      display_format.error_print()
 
    fileAnalysis_obj = fileAnalysis()
    not_true_dict = fileAnalysis_obj.generate_log_report(infile)
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict)
    total_ip = len(log_report)
    if lines :
      log_report = log_report[0:lines]
    infile.close()
 
    print
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic)
    total_request_times = fileAnalysis_obj.total_request_times
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times)
    print
    display_format.head()
    display_format.transverse_line()
 
    for host in log_report :
      times = host[1]['times']
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503'])
                         
    if (not lines) or total_ip == lines :
      display_format.transverse_line()
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503)
    display_format.execut_time()
 
if __name__ == '__main__':
  main_obj = Main()
  main_obj.main()
Python 相关文章推荐
Python中处理unchecked未捕获异常实例
Jan 17 Python
Python+django实现简单的文件上传
Aug 17 Python
Django 连接sql server数据库的方法
Jun 30 Python
Django中使用 Closure Table 储存无限分级数据
Jun 06 Python
python中while和for的区别总结
Jun 28 Python
Python 一键获取百度网盘提取码的方法
Aug 01 Python
20行Python代码实现视频字符化功能
Apr 13 Python
Python稀疏矩阵及参数保存代码实现
Apr 18 Python
Python基于network模块制作电影人物关系图
Jun 19 Python
详解用python -m http.server搭一个简易的本地局域网
Sep 24 Python
python中把元组转换为namedtuple方法
Dec 09 Python
Python如何加载模型并查看网络
Jul 15 Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
python持久性管理pickle模块详细介绍
Feb 18 #Python
You might like
一道求$b相对于$a的相对路径的php代码
2010/08/08 PHP
PHP编程最快明白(第一讲 软件环境和准备工作)
2010/10/25 PHP
PHP的Socket网络编程入门指引
2015/08/11 PHP
PHP 二维数组和三维数组的过滤
2016/03/16 PHP
php7安装mongoDB扩展的方法分析
2017/08/02 PHP
模仿JQuery sortable效果 代码有错但值得看看
2009/11/05 Javascript
从阶乘函数对比Javascript和C#的异同
2012/05/31 Javascript
jquery悬浮提示框完整实例
2016/01/13 Javascript
bootstrap datetimepicker日期插件使用方法
2017/01/13 Javascript
JS+HTML5实现上传图片预览效果完整实例【测试可用】
2017/04/20 Javascript
微信小程序 input表单与redio及下拉列表的使用实例
2017/09/20 Javascript
利用pm2部署多个node.js项目的配置教程
2017/10/22 Javascript
微信小程序时间轴实现方法示例
2019/01/14 Javascript
一秒学会微信小程序制作table表格
2019/02/14 Javascript
[47:31]完美世界DOTA2联赛PWL S3 INK ICE vs DLG 第一场 12.12
2020/12/16 DOTA
python简单猜数游戏实例
2015/07/09 Python
pycharm 将django中多个app放到同个文件夹apps的处理方法
2018/05/30 Python
基于python实现的百度新歌榜、热歌榜下载器(附代码)
2019/08/05 Python
Python使用matplotlib绘制三维参数曲线操作示例
2019/09/10 Python
Python谱减法语音降噪实例
2019/12/18 Python
使用python实现希尔、计数、基数基础排序的代码
2019/12/25 Python
python同义词替换的实现(jieba分词)
2020/01/21 Python
python实现将range()函数生成的数字存储在一个列表中
2020/04/02 Python
PyTorch-GPU加速实例
2020/06/23 Python
pycharm2020.2 配置使用的方法详解
2020/09/16 Python
requests在python中发送请求的实例讲解
2021/02/17 Python
CSS3 特效范例整理
2011/08/22 HTML / CSS
法国美发器材和产品购物网站:Beauty Coiffure
2016/12/05 全球购物
玩具反斗城西班牙网上商城:ToysRUs西班牙
2017/01/19 全球购物
澳大利亚电商Catch新西兰站:Catch.co.nz
2020/05/30 全球购物
市场营销专业个人求职信范文
2013/12/14 职场文书
企业道德讲堂实施方案
2014/03/19 职场文书
2015个人年度工作总结范文
2015/05/28 职场文书
初中同学会致辞
2015/08/01 职场文书
银行服务理念口号
2015/12/25 职场文书
Vue.js 带下拉选项的输入框(Textbox with Dropdown)组件
2021/04/17 Vue.js