python分析nignx访问日志脚本分享


Posted in Python onFebruary 26, 2015
#!/usr/bin/env python 
# coding=utf-8 
 
#------------------------------------------------------ 
# Name:     nginx 日志分析脚本 
# Purpose:   此脚本只用来分析nginx的访问日志 
# Version:   1.0 
# Author:    LEO 
# Created:   2013-05-07 
# Modified:   2013-05-07 
# Copyright:  (c) LEO 2013 
#------------------------------------------------------ 
 
import sys 
import time 
 
#该类是用来打印格式 
class displayFormat(object): 
 
  def format_size(self,size): 
    '''''格式化流量单位''' 
    KB = 1024      #KB -> B B是字节 
    MB = 1048576    #MB -> B 
    GB = 1073741824   #GB -> B 
    TB = 1099511627776 #TB -> B 
    if size >= TB : 
      size = str(size / TB) + 'T' 
    elif size < KB : 
      size = str(size) + 'B' 
    elif size >= GB and size < TB: 
      size = str(size / GB) + 'G' 
    elif size >= MB and size < GB : 
      size = str(size / MB) + 'M' 
    else : 
      size = str(size / KB) + 'K' 
    return size 
 
  #定义字符串格式化 
  formatstring = '%-15s %-10s %-12s %8s %10s %10s %10s %10s %10s %10s %10s' 
 
  def transverse_line(self) : 
    '''''输出横线''' 
    print self.formatstring % ('-'*15,'-'*10,'-'*12,'-'*12,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10,'-'*10) 
 
  def head(self): 
    '''''输出头部信息''' 
    print self.formatstring % ('IP','Traffic','Times','Times%','200','404','500','403','302','304','503') 
 
  def error_print(self) : 
    '''''输出错误信息''' 
    print 
    print 'Usage : ' + sys.argv[0] + ' NginxLogFilePath [Number]' 
    print 
    sys.exit(1) 
 
  def execut_time(self): 
    '''''输出脚本执行的时间''' 
    print 
    print "Script Execution Time: %.3f second" % time.clock() 
    print 
 
#该类是用来生成主机信息的字典 
class hostInfo(object): 
  host_info = ['200','404','500','302','304','503','403','times','size'] 
 
  def __init__(self,host): 
    self.host = host = {}.fromkeys(self.host_info,0) 
 
  def increment(self,status_times_size,is_size): 
    '''''该方法是用来给host_info中的各个值加1''' 
    if status_times_size == 'times': 
      self.host['times'] += 1 
    elif is_size: 
      self.host['size'] = self.host['size'] + status_times_size 
    else: 
      self.host[status_times_size] += 1 
 
  def get_value(self,value): 
    '''''该方法是取到各个主机信息中对应的值''' 
    return self.host[value] 
 
#该类是用来分析文件 
class fileAnalysis(object): 
  def __init__(self): 
    '''''初始化一个空字典''' 
    self.report_dict = {} 
    self.total_request_times,self.total_traffic,self.total_200, 
    self.total_404,self.total_500,self.total_403,self.total_302, 
    self.total_304,self.total_503 = 0,0,0,0,0,0,0,0,0 
 
  def split_eachline_todict(self,line): 
    '''''分割文件中的每一行,并返回一个字典''' 
    split_line = line.split() 
    split_dict = {'remote_host':split_line[0],'status':split_line[8], 
           'bytes_sent':split_line[9],} 
    return split_dict 
 
  def generate_log_report(self,logfile): 
    '''''读取文件,分析split_eachline_todict方法生成的字典''' 
    for line in logfile: 
      try: 
        line_dict = self.split_eachline_todict(line) 
        host = line_dict['remote_host'] 
        status = line_dict['status'] 
      except ValueError : 
        continue 
      except IndexError : 
        continue 
 
      if host not in self.report_dict : 
        host_info_obj = hostInfo(host) 
        self.report_dict[host] = host_info_obj 
      else : 
        host_info_obj = self.report_dict[host] 
 
      host_info_obj.increment('times',False) 
      if status in host_info_obj.host_info : 
        host_info_obj.increment(status,False) 
      try: 
        bytes_sent = int(line_dict['bytes_sent']) 
      except ValueError: 
        bytes_sent = 0 
      host_info_obj.increment(bytes_sent,True) 
    return self.report_dict 
 
  def return_sorted_list(self,true_dict): 
    '''''计算各个状态次数、流量总量,请求的总次数,并且计算各个状态的总量 并生成一个正真的字典,方便排序''' 
    for host_key in true_dict : 
      host_value = true_dict[host_key] 
      times = host_value.get_value('times')            
      self.total_request_times = self.total_request_times + times 
      size = host_value.get_value('size')            
      self.total_traffic = self.total_traffic + size  
 
      o200 = host_value.get_value('200') 
      o404 = host_value.get_value('404') 
      o500 = host_value.get_value('500') 
      o403 = host_value.get_value('403') 
      o302 = host_value.get_value('302') 
      o304 = host_value.get_value('304') 
      o503 = host_value.get_value('503') 
 
      true_dict[host_key] = {'200':o200,'404':o404,'500':o500, 
                  '403':o403,'302':o302,'304':o304, 
                  '503':o503,'times':times,'size':size} 
 
      self.total_200 = self.total_200 + o200 
      self.total_404 = self.total_404 + o404 
      self.total_500 = self.total_500 + o500 
      self.total_302 = self.total_302 + o302 
      self.total_304 = self.total_304 + o304 
      self.total_503 = self.total_503 + o503 
 
    sorted_list = sorted(true_dict.items(),key=lambda t:(t[1]['times'],
                               t[1]['size']),reverse=True) 
 
    return sorted_list 
 
class Main(object): 
  def main(self) : 
    '''''主调函数''' 
    display_format = displayFormat() 
    arg_length = len(sys.argv) 
    if arg_length == 1 : 
      display_format.error_print() 
    elif arg_length == 2 or arg_length == 3: 
      infile_name = sys.argv[1] 
      try : 
        infile = open(infile_name,'r') 
        if arg_length == 3 : 
          lines = int(sys.argv[2]) 
        else : 
          lines = 0 
      except IOError,e : 
        print 
        print e 
        display_format.error_print() 
      except ValueError : 
        print 
        print "Please Enter A Volid Number !!" 
        display_format.error_print() 
    else : 
      display_format.error_print() 
 
    fileAnalysis_obj = fileAnalysis() 
    not_true_dict = fileAnalysis_obj.generate_log_report(infile) 
    log_report = fileAnalysis_obj.return_sorted_list(not_true_dict) 
    total_ip = len(log_report) 
    if lines : 
      log_report = log_report[0:lines] 
    infile.close() 
 
    print 
    total_traffic = display_format.format_size(fileAnalysis_obj.total_traffic) 
    total_request_times = fileAnalysis_obj.total_request_times 
    print 'Total IP: %s  Total Traffic: %s  Total Request Times: %d' 
       % (total_ip,total_traffic,total_request_times) 
    print 
    display_format.head() 
    display_format.transverse_line() 
 
    for host in log_report : 
      times = host[1]['times'] 
      times_percent = (float(times) / float(fileAnalysis_obj.total_request_times)) * 100 
      print display_format.formatstring % (host[0],
                         display_format.format_size(host[1]['size']),
                         times,str(times_percent)[0:5],
                         host[1]['200'],host[1]['404'],
                         host[1]['500'],host[1]['403'],
                         host[1]['302'],host[1]['304'],host[1]['503']) 
                         
    if (not lines) or total_ip == lines : 
      display_format.transverse_line() 
      print display_format.formatstring % (total_ip,total_traffic, 
                         total_request_times,'100%',
                         fileAnalysis_obj.total_200,
                         fileAnalysis_obj.total_404,
                         fileAnalysis_obj.total_500, 
                         fileAnalysis_obj.total_403,
                         fileAnalysis_obj.total_302, 
                         fileAnalysis_obj.total_304,
                         fileAnalysis_obj.total_503) 
 
    display_format.execut_time() 
 
if __name__ == '__main__': 
  main_obj = Main() 
  main_obj.main()
Python 相关文章推荐
pip安装Python库时遇到的问题及解决方法
Nov 23 Python
python+selenium识别验证码并登录的示例代码
Dec 21 Python
Python实现结构体代码实例
Feb 10 Python
python计算波峰波谷值的方法(极值点)
Feb 18 Python
jupyter notebook参数化运行python方式
Apr 10 Python
spyder 在控制台(console)执行python文件,debug python程序方式
Apr 20 Python
python读取xml文件方法解析
Aug 04 Python
详解Python中的路径问题
Sep 02 Python
python 获取字典特定值对应的键的实现
Sep 29 Python
Python基础之教你怎么在M1系统上使用pandas
May 08 Python
微信小程序调用python模型
Apr 21 Python
使用scrapy实现增量式爬取方式
Jun 21 Python
python分析apache访问日志脚本分享
Feb 26 #Python
Python构造函数及解构函数介绍
Feb 26 #Python
python中的__slots__使用示例
Feb 26 #Python
Python map和reduce函数用法示例
Feb 26 #Python
Python中运行并行任务技巧
Feb 26 #Python
Python通过递归遍历出集合中所有元素的方法
Feb 25 #Python
Python THREADING模块中的JOIN()方法深入理解
Feb 18 #Python
You might like
基于empty函数的输出详解
2013/06/17 PHP
Yii调试查看执行SQL语句的方法
2016/07/15 PHP
php模拟实现斗地主发牌
2020/04/22 PHP
PHP实现页面静态化深入讲解
2021/03/04 PHP
Aster vs KG BO3 第一场2.19
2021/03/10 DOTA
javascript 仿QQ滑动菜单效果代码
2010/09/03 Javascript
js中设置元素class的三种方法小结
2011/08/28 Javascript
contains和compareDocumentPosition 方法来确定是否HTML节点间的关系
2011/09/13 Javascript
一个JavaScript处理textarea中的字符成每一行实例
2014/09/22 Javascript
JS在浏览器中解析Base64编码图像
2017/02/09 Javascript
详解nodejs操作mongodb数据库封装DB类
2017/04/10 NodeJs
基于react组件之间的参数传递(详解)
2017/09/05 Javascript
微信小程序中换行空格(多个空格)写法详解
2018/07/10 Javascript
vue动态路由:路由参数改变,视图不更新问题的解决
2019/11/05 Javascript
Vue-CLI与Vuex使用方法实例分析
2020/01/06 Javascript
关于javascript中的promise的用法和注意事项(推荐)
2021/01/15 Javascript
Python 执行字符串表达式函数(eval exec execfile)
2014/08/11 Python
numpy 进行数组拼接,分别在行和列上合并的实例
2018/05/08 Python
pandas带有重复索引操作方法
2018/06/08 Python
numpy使用fromstring创建矩阵的实例
2018/06/15 Python
Python爬虫使用脚本登录Github并查看信息
2018/07/16 Python
Python学习笔记之Break和Continue用法分析
2019/08/14 Python
python 实现仿微信聊天时间格式化显示的代码
2020/04/17 Python
pycharm远程连接vagrant虚拟机中mariadb数据库
2020/06/05 Python
Python3.7安装pyaudio教程解析
2020/07/24 Python
美国汽车性能部件和赛车零件网站:Vivid Racing
2018/03/27 全球购物
需求分析说明书
2014/05/09 职场文书
六查六看六改心得体会
2014/10/14 职场文书
单位租车协议书
2015/01/29 职场文书
个人承诺书格式范文
2015/04/29 职场文书
学校青年志愿者活动总结
2015/05/06 职场文书
遗嘱格式范本
2015/08/07 职场文书
opencv用VS2013调试时用Image Watch插件查看图片
2021/07/26 Python
MySQL里面的子查询的基本使用
2021/08/02 MySQL
Shell脚本一键安装Nginx服务自定义Nginx版本
2022/03/20 Servers
windows11选中自动复制怎么开启? Win11自动复制所选内容的方法
2022/07/23 数码科技