當前位置：首頁 > 编程资源 > 编程问答 >内容正文

编程问答

java统计日志qps_【原创】基于日志增量，统计qps，并基于ip排序

發布時間：2025/4/5 编程问答 21 豆豆

生活随笔收集整理的這篇文章主要介紹了 java统计日志qps_【原创】基于日志增量，统计qps，并基于ip排序小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

增量統計日志行數(只統計上一秒)

dns_qps.py

#!/usr/bin/env python

#_*_coding:utf-8_*_

import datetime

import re

import os

log_files = './dns_logs' #填寫要分析的源日志

seek_files = './seek_log.tmp' #生成的臨時文件，只用于保存位置seek信息，建議放/tmp/下

last_second = datetime.datetime.now() - datetime.timedelta(seconds=1) #取上一秒的時間，因為當前秒可能日志取不全

current_time = last_second.strftime('%d-%b-%Y %T') #時間格式根據日志格式做修改

#current_time = '17-Jun-2016 14:17:20'

if not os.path.exists(seek_files):

with open(seek_files, 'w+') as s:

s.write(str(0))

def write_file_end(log_files, seek_files):

with open(log_files, 'r') as f: #先找到log的最后字節位置

f.seek(0, 2) #seek(0)移動到文件起始、(0,2)移動到結尾

end_seek = f.tell()

with open(seek_files, 'w+') as s:

s.write(str(end_seek)) #把log的最后字節位置記錄到seek文件

def get_count(log_files, begin):

count = 0

dns_pattern = re.compile(current_time+r'\.(\w+)(\s+)'+"queries: info: client") #這個正則要根據你的日志格式來，可以放到前邊，聲明一個變量，我這圖省事了

with open(log_files, 'r') as f: #打開log，并移位到seek中記錄的地方，從那開始讀取行數并做累加，讀完以后在更新seek文件到最新點

f.seek(begin)

for line in f.xreadlines():

if dns_pattern.match(line):

count += 1

print(count)

if __name__ == '__main__':

try:

with open(seek_files, 'r') as t:

if len(t.read()) == 0: #seek文件為空，強制begin=0,不為空，begin就等于seek

begin = 0

else:

t.seek(0)

begin = int(t.read())

with open(log_files, 'r') as f: #拿到end，end值為log文件的最后位置

f.seek(0, 2)

end = f.tell()

if end < begin: #因為日志定期會切分，切分后log的end將為0，此時begin強制為0，不然輸出的count將為0

begin = 0

get_count(log_files, begin) #得到上一秒的總行數

write_file_end(log_files, seek_files) #把日志最后的位置保存給seek文件，用于下一秒的獲取

except Exception, e:

print(0)

注意事項

把這個腳本放在zabbix之類的監控里，一秒執行一次，就可以算出每秒(其實是上一秒)增量部分的行數了，然后zabbix拿著這個度數繪圖

# 運行腳本

/usr/bin/python2.6 dns_qps.py

下邊更進一步，拿著這個增量，基于源ip，我們再做一個排序，把排序結果記錄到一個log里

增量統計日志(tcpdump出來的數據)并排序，記錄日志

dns_request_sort.py

#!/usr/bin/env python

#_*_coding:utf-8_*_

import datetime

import re

import os

import logging

import sys

master_dir = "/Data/logs/dns_qps/tcpdump_53" #把tcpdump的數據放這個目錄里，原生的tcpdump數據

today = datetime.datetime.now().strftime('%Y%m%d')

log_files = os.path.join(master_dir, today+'.log'),

log_files = log_files[0]

if not os.path.exists(log_files):

sys.exit('Can not find logfile')

# for tcpdump

seek_files = '/tmp/seek_log_4_tcpdump_53.tmp' #同理存放seek信息的

last_second = datetime.datetime.now() - datetime.timedelta(seconds=1) #取上一秒的時間，因為當前秒可能日志取不全

current_time = last_second.strftime('%T')

#current_time = '16:54:17'

warning_num = 4 #設定閾值

warning_log = "/Data/logs/dns_qps/dns_warning_sort.log" #生成的排序ip的日志

def Mylogger(msg, logfile, level='info'):

logging.basicConfig(

level=logging.INFO,

format='%(asctime)s [%(levelname)s] %(message)s',

filename=logfile,

filemode='a+')

if level == 'critical':

logging.critical(msg)

elif level == 'error':

logging.error(msg)

elif level == 'warn':

logging.warn(msg)

elif level == 'info':

logging.info(msg)

else:

logging.debug(msg)

if not os.path.exists(seek_files):

with open(seek_files, 'w+') as s:

s.write(str(0))

def write_file_end(log_files, seek_files):

with open(log_files, 'r') as f: #先找到log的最后字節位置

f.seek(0, 2)

end_seek = f.tell()

with open(seek_files, 'w+') as s:

s.write(str(end_seek)) #把log的最后字節位置記錄到seek文件

def get_count(log_files, begin):

dns_pattern = re.compile(current_time+r'\.\d+ IP (\d+\.\d+\.\d+\.\d+)\.\d+[\s\S]*')

ip_list = []

with open(log_files, 'r') as f: #打開log，并移位到seek中記錄的地方，從那開始讀取行數并做累加，讀完以后在更新seek文件到最新點

f.seek(begin)

for line in f.xreadlines():

if dns_pattern.match(line):

ip_list.append(dns_pattern.match(line).groups()[0]) #groups()[0]是匹配的ip字段

for item in set(ip_list): #最后得到一個大列表，里邊放著一秒內的多個ip，可能有重疊,set()去重并循環它，每個item就是一個ip

if ip_list.count(item) >= warning_num: #列表中出現的ip的數量超過閾值就記錄日志

Mylogger("%s %s 此IP: %s 訪問達到 %d 次" % (today, current_time, item, ip_list.count(item)),

warning_log, level='warn')

if __name__ == '__main__':

try:

with open(seek_files, 'r') as t:

if len(t.read()) == 0:

begin = 0

else:

t.seek(0)

begin = int(t.read())

with open(log_files, 'r') as f:

f.seek(0, 2)

end = f.tell()

if end < begin: #因為日志定期會切分，切分后log的end將為0，此時begin強制為0，不然輸出的count將為0

begin = 0

get_count(log_files, begin)

write_file_end(log_files, seek_files)

except Exception, e:

pass

最后附上跑tcpdump的那個腳本

本來想用python的pypcap來抓取數據包，然后用dpkt來分析的，但測了一下，抓出來的東西都是亂碼，就放棄了，直接用shell調用tcpdump腳本了

while_53.sh

#!/usr/bin/env bash

master_dir="/Data/logs/dns_qps/tcpdump_53"

device="em1"

mkdir -p ${master_dir}

chown -R zabbix.zabbix ${master_dir}

sudo /bin/kill -9 `ps -ef|grep tcpdump|grep -v grep|awk '{print$2}'` 2>/dev/null

today=`date +%Y%m%d`

sudo /usr/sbin/tcpdump -i ${device} -nn udp port 53|grep "> 10.*\.53:" >> ${master_dir}/${today}.log &

把2個腳本放到crontab跑起來

00 00 * * * cd /Data/shell && sh while_53.sh #這個每天重啟一次tcpdump用于按天分割日志

* * * * * cd /Data/shell && /usr/bin/python dns_request_sort.py #沒那么敏感的要求，一分鐘跑一次足矣

# 看下生成的日志

2016-08-26 16:20:01,568 [WARNING] 20160826 16:20:00 此IP: 10.1.0.110 訪問達到 70 次

2016-08-26 16:21:01,616 [WARNING] 20160826 16:21:00 此IP: 10.1.0.110 訪問達到 67 次

2016-08-26 16:22:01,665 [WARNING] 20160826 16:22:00 此IP: 10.1.0.110 訪問達到 68 次

2016-08-26 16:23:01,714 [WARNING] 20160826 16:23:00 此IP: 10.1.0.110 訪問達到 65 次

2016-08-26 16:24:01,766 [WARNING] 20160826 16:24:00 此IP: 10.1.20.253 訪問達到 100 次

2016-08-26 16:24:01,766 [WARNING] 20160826 16:24:00 此IP: 10.1.0.110 訪問達到 72 次

2016-08-26 16:25:01,815 [WARNING] 20160826 16:25:00 此IP: 10.1.0.110 訪問達到 59 次

總結

以上是生活随笔為你收集整理的java统计日志qps_【原创】基于日志增量，统计qps，并基于ip排序的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： linux赋予文件夹所有权限_linux
下一篇： java xsl生成word文件_导出生