改进 网站资源探测工具(添加代理)
生活随笔
收集整理的這篇文章主要介紹了
改进 网站资源探测工具(添加代理)
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
對網站資源探測工具進行調整,并且添加代理,防止多次訪問ip被封的情況。
#獲取代理,并寫入agents列 def agent_list(url):global agent_listsagent_lists = []header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'}r = requests.get(url,headers = header)agent_info = BeautifulSoup(r.content,'html.parser').find(id = "ip_list").find_all('tr')[1:]for i in range(len(agent_info)):info = agent_info[i].find_all('td')agents = {info[5].string : 'http://' + info[1].string}agent_lists.append(agents)修改后的 網站資源掃描工具:
#!/usr/bin/env python3 # -*- coding: utf-8 -*- # by 默不知然 2018-03-15import threading from threading import Thread from bs4 import BeautifulSoup import time import sys import requests import getopt import random#創建類,并對目標網站發起請求 class scan_thread (threading.Thread):global real_url_listreal_url_list = []def __init__(self,url):threading.Thread.__init__(self)self.url = urldef run(self):try:proxy = random.sample(agent_lists,1)[0]r = requests.get(self.url, proxies = proxy)print(self.url,'------->>',str(r.status_code))if int(r.status_code) == 200:real_url_list.append(self.url)l[0] = l[0] - 1except Exception as e:print(e)#獲取字典并構造url并聲明掃描線程 def url_makeup(dicts,url,threshold):global url_listglobal lurl_list = []l =[]l.append(0)dic = str(dicts)with open (dic,'r') as f:code_list = f.readlines()for i in code_list:url_list.append(url+i.replace('\r','').replace('\n',''))while len(url_list):try:if l[0] < threshold:n = url_list.pop(0)l[0] = l[0] + 1thread = scan_thread(n)thread.start()except KeyboardInterrupt:print('用戶停止了程序,完成目錄掃描。')sys.exit()#獲取輸入參數 def get_args():global get_urlglobal get_dictsglobal get_thresholdtry:options,args = getopt.getopt(sys.argv[1:],"u:f:n:")except getopt.GetoptError:print("錯誤參數")sys.exit()for option,arg in options:if option == '-u':get_url = argif option == '-f':get_dicts = argif option == '-n':get_threshold = int(arg) #獲取代理,并寫入agents列 def agent_list(url):global agent_listsagent_lists = []header = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:57.0) Gecko/20100101 Firefox/57.0'}r = requests.get(url,headers = header)agent_info = BeautifulSoup(r.content,'html.parser').find(id = "ip_list").find_all('tr')[1:]for i in range(len(agent_info)):info = agent_info[i].find_all('td')agents = {info[5].string : 'http://' + info[1].string}agent_lists.append(agents)#主函數,運行掃描程序 def main():agent_url = 'http://www.xicidaili.com/nn/1'agent_list(agent_url)get_args()url = get_urldicts = get_dictsthreshold = get_thresholdurl_makeup(dicts,url,threshold)time.sleep(0.5)print('目標網站存在目錄: ','\n', real_url_list)with open(r'網站目錄掃描結果.txt','w') as f:for i in real_url_list:f.write(i)f.write('\n')if __name__ == '__main__':main()?
對某網站掃描結果:
?
轉載于:https://www.cnblogs.com/kaiho/p/8578848.html
總結
以上是生活随笔為你收集整理的改进 网站资源探测工具(添加代理)的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Oracle-计算岁数
- 下一篇: mvc:default-servlet-