python分析nignx访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python 
# coding=utf-8 
 
#------------------------------------------------------ 
# Name:     nginx 日志分析脚本 
# Purpose:   此脚本只用来分析nginx的访问日志 
# Version:   1.0 
# Author:    LEO 
# Created:   2013-05-07 
# Modified:   2013-05-07 
# Copyright:  (c) LEO 2013 
#------------------------------------------------------ 
 
import sys 
import time 
 
#该类是用来打印格式 
class displayFormat(object): 
 
  def format_size(self,size): 
    '''''格式化流量单位''' 
    KB = 1024      #KB -> B B是字节 
    MB = 1048576    #MB -> B 
    GB = 1073741824   #GB -> B 
    TB = 1099511627776 #TB -> B 
    if size >= TB : 
      size = str(size / TB) + 'T' 
    elif size < KB : 
      size = str(size) + 'B' 
    elif size >= GB and size < TB: 
      size = str(size / GB) + 'G' 
    elif size >= MB and size < GB : 
      size = str(size / MB) + 'M' 
    else : 
      size = str(size / KB) + 'K' 
    return size 
 
  #定义字符串格式化 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s' 
 
  def transverse_line(self) : 
    '''''输出横线''' 
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10) 
 
  def head(self): 
    '''''输出头部信息''' 
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503') 
 
  def error_print(self) : 
    '''''输出错误信息''' 
    print 
    print 'Usage : ' + sys.argv[0] + ' NginxLogFilePath [Number]' 
    print 
    sys.exit(1) 
 
  def execut_time(self): 
    '''''输出脚本执行的时间''' 
    print 
    print "Script Execution Time: %.3f second" % time.clock() 
    print 
 
#该类是用来生成主机信息的字典 
class hostInfo(object): 
  host_info = ['200','404','500','302','304','503','403','times','size'] 
 
  def __init__(self,host): 
    self.host = host = {}.fromkeys(self.host_info,0) 
 
  def increment(self,status_times_size,is_size): 
    '''''该方法是用来给host_info中的各个值加1''' 
    if status_times_size == 'times': 
      self.host['times'] += 1 
    elif is_size: 
      self.host['size'] = self.host['size'] + status_times_size 
    else: 
      self.host[status_times_size] += 1 
 
  def get_value(self,value): 
    '''''该方法是取到各个主机信息中对应的值''' 
    return self.host[value] 
 
#该类是用来分析文件 
class fileAnalysis(object): 
  def __init__(self): 
    '''''初始化一个空字典''' 
    self.report_dict = {} 
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0 
 
  def split_eachline_todict(self,line): 
    '''''分割文件中的每一行,并返回一个字典''' 
    split_line = line.split() 
    split_dict = {'remote_host':split_line[0],'status':split_line[8], 
           'bytes_sent':split_line[9],} 
    return split_dict 
 
  def generate_log_report(self,logfile): 
    '''''读取文件,分析split_eachline_todict方法生成的字典''' 
    for line in logfile: 
      try: 
        line_dict = self.split_eachline_todict(line) 
        host = line_dict['remote_host'] 
        status = line_dict['status'] 
      except ValueError : 
        continue 
      except IndexError : 
        continue 
 
      if host not in self.report_dict : 
        host_info_obj = hostInfo(host) 
        self.report_dict[host] = host_info_obj 
      else : 
        host_info_obj = self.report_dict[host] 
 
      host_info_obj.increment('times',False) 
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try: 
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError: 
        bytes_sent = 0 
      host_info_obj.increment(bytes_sent,True) 
    return self.report_dict 
 
  def return_sorted_list(self,true_dict): 
    '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序''' 
    for host_key in true_dict : 
      host_value = true_dict[host_key] 
      times = host_value.get_value('times')            
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size')            
      self.total_traffic = self.total_traffic + size  
 
      o200 = host_value.get_value('200') 
      o404 = host_value.get_value('404') 
      o500 = host_value.get_value('500') 
      o403 = host_value.get_value('403') 
      o302 = host_value.get_value('302') 
      o304 = host_value.get_value('304') 
      o503 = host_value.get_value('503') 
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500, 
                  '403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size} 
 
      self.total_200 = self.total_200 + o200 
      self.total_404 = self.total_404 + o404 
      self.total_500 = self.total_500 + o500 
      self.total_302 = self.total_302 + o302 
      self.total_304 = self.total_304 + o304 
      self.total_503 = self.total_503 + o503 
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],
                               t[1]['size']),reverse=True) 
 
    return sorted_list 
 
class Main(object): 
  def main(self) : 
    '''''主调函数''' 
    display_format = displayFormat() 
    arg_length = len(sys.argv) 
    if arg_length == 1 : 
      display_format.error_print() 
    elif arg_length == 2 or arg_length == 3: 
      infile_name = sys.argv[1] 
      try : 
        infile = open(infile_name,'r') 
        if arg_length == 3 : 
          lines = int(sys.argv[2]) 
        else : 
          lines = 0 
      except IOError,e : 
        print 
        print e 
        display_format.error_print() 
      except ValueError : 
        print 
        print "Please Enter A Volid Number !!" 
        display_format.error_print() 
    else : 
      display_format.error_print() 
 
    fileAnalysis_obj = fileAnalysis() 
    not_true_dict = fileAnalysis_obj.generate_log_report(infile) 
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict) 
    total_ip = len(log_report) 
    if lines : 
      log_report = log_report[0:lines] 
    infile.close() 
 
    print 
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic) 
    total_request_times = fileAnalysis_obj.total_request_times 
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times) 
    print 
    display_format.head() 
    display_format.transverse_line() 
 
    for host in log_report : 
      times = host[1]['times'] 
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100 
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503']) 
                         
    if (not lines) or total_ip == lines : 
      display_format.transverse_line() 
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503) 
 
    display_format.execut_time() 
 
if __name__ == '__main__': 
  main_obj = Main() 
  main_obj.main()
Python 相关文章推荐
Python 爬虫爬取指定博客的所有文章
Feb 17 Python
使用Python判断质数(素数)的简单方法讲解
May 05 Python
基于Python实现的微信好友数据分析
Feb 26 Python
Python中XlsxWriter模块简介与用法分析
Apr 24 Python
使用python读取csv文件快速插入数据库的实例
Jun 21 Python
用xpath获取指定标签下的所有text的实例
Jan 02 Python
python中的colorlog库使用详解
Jul 05 Python
tensorflow tf.train.batch之数据批量读取方式
Jan 20 Python
基于Numba提高python运行效率过程解析
Mar 02 Python
OpenCV绘制圆端矩形的示例代码
Aug 30 Python
Python字符串常规操作小结
Apr 03 Python
python 单机五子棋对战游戏
Apr 28 Python
python分析apache访问日志脚本分享
Feb 26 #Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
You might like
php入门学习知识点五 关于php数组的几个基本操作
2011/07/14 PHP
php实现redis数据库指定库号迁移的方法
2015/01/14 PHP
PHP大神的十大优良习惯
2016/09/14 PHP
PHP用FTP类上传文件视频等的简单实现方法
2016/09/23 PHP
快速解决PHP调用Word组件DCOM权限的问题
2017/12/27 PHP
HTML5之lang属性与dir属性的详解
2013/06/19 Javascript
js delete 用法(删除对象属性及变量)
2014/08/24 Javascript
使用jQuery简单实现模拟浏览器搜索功能
2014/12/21 Javascript
jQuery实现图片预加载效果
2015/11/27 Javascript
Bootstrap每天必学之标签页(Tab)插件
2020/08/09 Javascript
jQuery.form插件的使用及跨域异步上传文件
2016/04/27 Javascript
基于vue的下拉刷新指令和滚动刷新指令
2016/12/23 Javascript
Angular动态添加、删除输入框并计算值实例代码
2017/03/29 Javascript
深入探究node之Transform
2017/07/20 Javascript
Vue 幸运大转盘实现思路详解
2019/05/06 Javascript
简要讲解Python编程中线程的创建与锁的使用
2016/02/28 Python
CentOS 6.X系统下升级Python2.6到Python2.7 的方法
2016/10/12 Python
Python使用re模块实现信息筛选的方法
2018/04/29 Python
tensorflow 输出权重到csv或txt的实例
2018/06/14 Python
python opencv将表格图片按照表格框线分割和识别
2019/10/30 Python
在Django下创建项目以及设置settings.py教程
2019/12/03 Python
Pytorch evaluation每次运行结果不同的解决
2020/01/02 Python
python实现打砖块游戏
2020/02/25 Python
pandas统计重复值次数的方法实现
2021/02/20 Python
西班牙汉普顿小姐:购买帆布鞋和太阳镜
2016/10/23 全球购物
奥地利体育网上商店:Gigasport
2019/10/09 全球购物
财务会计专业个人求职信范本
2014/01/08 职场文书
护士岗位求职应聘自荐书范文
2014/02/12 职场文书
医德医风自我评价
2014/09/19 职场文书
民主评议教师党员自我评价
2015/03/04 职场文书
内勤岗位职责范本
2015/04/13 职场文书
写给女朋友的检讨书
2015/05/06 职场文书
2016年推广普通话宣传周活动总结
2016/04/06 职场文书
2019消防宣传标语!
2019/07/10 职场文书
Django使用channels + websocket打造在线聊天室
2021/05/20 Python
一文了解JavaScript用Element Traversal新属性遍历子元素
2021/11/27 Javascript