python爬取cnnvd,粘贴可用
生活随笔
收集整理的這篇文章主要介紹了
python爬取cnnvd,粘贴可用
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
粘貼可用,目前因為無性能要求無多線程操作
# @Time : 2022/9/28 21:56 # @Author : pzh # @File : ali_cvd_detail.py # -*- coding: utf-8 -*-```java import re import requests from random import randint import time from lxml import etree from cache import memory_cache import datetime import threading from LoggingUtils import logger import math import osdef get_onepage_content(url):user_agent = ['Mozilla/5.0 (Windows NT 10.0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.7113.93 Safari/537.36','Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:91.0) Gecko/20100101 Firefox/91.0','Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/92.0.4482.0 Safari/537.36 Edg/92.0.874.0']try:response = requests.get(url, headers={'User-Agent':user_agent[randint(0,2)]},timeout=60)if response.status_code == 200:return response.textreturnexcept Exception:returndef show_cve_content(res):e = etree.HTML(res);list = e.xpath("//div[@class='list_list']/ul/li/div/p/a/text()")return listpath ="E://Asiainfo//webappss//pythonDemo//Test//ali_cnnvd_";# 分頁數據放到一個文件里面 def save_content_to_text(content,now_time):with open(path+now_time+'.txt','a+') as f:f.write(content + '\n')pageSize=30def main():#每天凌晨3點執行mainlogger.info("當前任務為:"+'120'+"秒執行一次")timer = threading.Timer(120, main)timer.start()#獲取任務的總數startTime = time.time()value = memory_cache.get_value("TotalCNNVD")logger.info("當前任務獲取上次總數為:"+str(value))#循環的所有數據htmls ="";#獲取最新的總數htmlss = get_onepage_content('http://www.cnnvd.org.cn/web/vulnerability/queryLds.tag?pageno=1&pageSize=1&repairLd=');e = etree.HTML(htmlss)totalSizeList=e.xpath("//div[@class='page']/a[1]/text()")totalList = re.findall(r"[1-9]+\.?[0-9]*",str(totalSizeList).replace(",",""))total = totalList [0]logger.info("當前最新任務獲取總數為:"+total)#判斷,進入任務,如果不一樣說明有更新的,取最新的減去上次的if(str(value) != str(total)):#存入這次的total的數量,下次進入時使用memory_cache.set_value("TotalCNNVD",total,86400)#如果取出來的值是None說明是第一次進入,取全量if(str(value) == 'None') :Num = int(total)else:Num = int(total)-int(value)#正式需要把2替換成pageNumpageNum = math.ceil(Num / pageSize)logger.info("需要循環的頁數為:"+str(pageNum))now_time=datetime.datetime.now().strftime('%Y-%m-%d')logger.info("當前任務文件路徑及名稱:"+path+now_time+'.txt')for pagenum in range(1, 2) :url = f'http://www.cnnvd.org.cn/web/vulnerability/queryLds.tag?pageno={pagenum}&pageSize={pageSize}&repairLd='html = get_onepage_content(url);htmls=html+htmls;#如果當天已經存在,就刪除if (os.path.exists(path+now_time+".txt")):os.remove(path+now_time+".txt")os.close()for content in show_cve_content(htmls):htmldetail = get_onepage_content(f'http://www.cnnvd.org.cn/web/xxk/ldxqById.tag?CNNVD={content}')e = etree.HTML(htmldetail)#漏洞標題biaoti = e.xpath("//div[@class='detail_xq w770']/h2/text()")[0].strip()#漏洞描述loudong = e.xpath("//div[@class='d_ldjj']/p[2]/text()")[0].strip()#漏洞公告jianyi = e.xpath("//div[@class='d_ldjj m_t_20']/p[@class='ldgg']/a/text()")#受影響實體gongji1 = e.xpath("//div[@class='vulnerability_list']/ul/p/text()")[0].strip()#補丁try :gongji3 = e.xpath("//div[@class='vulnerability_list']/ul/li/div/a/text()")[0].strip()except Exception: gongji3='暫無'#CNNVD編號gongji4 = e.xpath("//div[@class='detail_xq w770']/ul/li[1]/span/text()")[0].strip()#危害等級gongji5 = e.xpath("//div[@class='detail_xq w770']/ul/li[2]/a[1]/text()")[0].strip()#CVE編號gongji6 = e.xpath("//div[@class='detail_xq w770']/ul/li[3]/a/text()")[0].strip()#漏洞類型gongji7 = e.xpath("//div[@class='detail_xq w770']/ul/li[4]/a/text()")[0].strip()#威脅類型gongji8 = e.xpath("//div[@class='detail_xq w770']/ul/li[6]/a/text()")[0].strip()content = str(loudong)+","+str(biaoti)+","+str(gongji1)+",補丁:"+str(gongji3)+","+str(gongji4)+",危害等級:"+str(gongji5)+",CVE編號:"+str(gongji6)+",漏洞類型:"+str(gongji7)+",威脅類型:"+str(gongji8)save_content_to_text(str(content),now_time)endTime = time.time()logger.info("cnnvd循環",str(pagenum)+"完成,耗時:",endTime - startTime)else: logger.info("無更新")# 獲取現在時間 now_time = datetime.datetime.now() # 獲取明天時間 next_time = now_time + datetime.timedelta(days=+1) next_year = next_time.date().year next_month = next_time.date().month next_day = next_time.date().day # 獲取明天3點時間 next_time = datetime.datetime.strptime(str(next_year)+"-"+str(next_month)+"-"+str(next_day)+" 03:00:00", "%Y-%m-%d %H:%M:%S") # # 獲取昨天時間 # last_time = now_time + datetime.timedelta(days=-1)# 獲取距離明天3點時間,單位為秒 timer_start_time = (next_time - now_time).total_seconds() logger.info("獲取距離明天3點時間(秒):"+str(timer_start_time))#定時器,參數為(多少時間后執行,單位為秒,執行的方法) timer = threading.Timer(timer_start_time, main) timer.start()if __name__ == "__main__":main()總結
以上是生活随笔為你收集整理的python爬取cnnvd,粘贴可用的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: SharePoint 入门介绍
- 下一篇: Python智能对话机器人实现