當(dāng)前位置：首頁(yè) > 编程资源 > 编程问答 >内容正文

编程问答

howdoi 简单分析

發(fā)布時(shí)間：2023/12/15 编程问答 23 豆豆

生活随笔收集整理的這篇文章主要介紹了 howdoi 简单分析小編覺(jué)得挺不錯(cuò)的,現(xiàn)在分享給大家,幫大家做個(gè)參考.

對(duì)howdoi的一個(gè)簡(jiǎn)單分析。

曾經(jīng)看到過(guò)下面的這樣一段js代碼：

try{doSth(); } catch (e){ask_url = "https://stackoverflow.com/search?q="window.location.href= ask_url + encodeURIComponent(e) }

howdoi基本就是把這個(gè)流程做成了Python腳本。其基本流程如下：

step1：利用site語(yǔ)法組裝搜索語(yǔ)句(默認(rèn)指定搜索stackoverflow網(wǎng)站)
step2:利用google搜索接口獲取搜索引擎第一頁(yè)排名第一的連接
step3：訪(fǎng)問(wèn)該鏈接，根據(jù)排名從高倒下，提取代碼塊文本
step4：提取到就顯示到終端，沒(méi)有提取到就提示未找到答案

當(dāng)然，howdoi也作了一些其他的工作：

代理設(shè)置
既往問(wèn)題進(jìn)行緩存，提高下次查詢(xún)的速度
查詢(xún)的目標(biāo)網(wǎng)站可配置
做成Python script腳本命令，方便快捷
代碼高亮格式化輸出

更多分析請(qǐng)看代碼注釋：

!/usr/bin/env python###################################################### # # howdoi - instant coding answers via the command line # written by Benjamin Gleitzman (gleitz@mit.edu) # inspired by Rich Jones (rich@anomos.info) # ######################################################import argparse #用于獲取腳本命令行參數(shù) import glob import os import random import re import requests #用于發(fā)送http(s)請(qǐng)求 import requests_cache import sys from . import __version__ #用于控制臺(tái)彩色高亮格式化輸出 from pygments import highlight from pygments.lexers import guess_lexer, get_lexer_by_name from pygments.formatters.terminal import TerminalFormatter from pygments.util import ClassNotFound # 用于網(wǎng)頁(yè)解析 from pyquery import PyQuery as pqfrom requests.exceptions import ConnectionError from requests.exceptions import SSLError# 兼容Python2.x和Python3.x的庫(kù) if sys.version < '3':import codecsfrom urllib import quote as url_quotefrom urllib import getproxies# 處理unicode: http://stackoverflow.com/a/6633040/305414def u(x):return codecs.unicode_escape_decode(x)[0] else:from urllib.request import getproxiesfrom urllib.parse import quote as url_quotedef u(x):return x#設(shè)置google搜索url if os.getenv('HOWDOI_DISABLE_SSL'): # 使用系統(tǒng)環(huán)境變量中非SSL的http代替httpsSEARCH_URL = 'http://www.google.com/search?q=site:{0}%20{1}'VERIFY_SSL_CERTIFICATE = False else:SEARCH_URL = 'https://www.google.com/search?q=site:{0}%20{1}'VERIFY_SSL_CERTIFICATE = True #設(shè)置目標(biāo)問(wèn)答網(wǎng)站 URL = os.getenv('HOWDOI_URL') or 'stackoverflow.com'#瀏覽器UA，用于偽造瀏覽器請(qǐng)求，防止網(wǎng)站對(duì)腳本請(qǐng)求進(jìn)行屏蔽 USER_AGENTS = ('Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:11.0) Gecko/20100101 Firefox/11.0','Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:22.0) Gecko/20100 101 Firefox/22.0','Mozilla/5.0 (Windows NT 6.1; rv:11.0) Gecko/20100101 Firefox/11.0',('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_4) AppleWebKit/536.5 (KHTML, like Gecko) ''Chrome/19.0.1084.46 Safari/536.5'),('Mozilla/5.0 (Windows; Windows NT 6.1) AppleWebKit/536.5 (KHTML, like Gecko) Chrome/19.0.1084.46''Safari/536.5'), ) #格式化答案輸出 ANSWER_HEADER = u('--- Answer {0} ---\n{1}') NO_ANSWER_MSG = '< no answer given >'#設(shè)置緩存文件路徑 XDG_CACHE_DIR = os.environ.get('XDG_CACHE_HOME',os.path.join(os.path.expanduser('~'), '.cache')) CACHE_DIR = os.path.join(XDG_CACHE_DIR, 'howdoi') CACHE_FILE = os.path.join(CACHE_DIR, 'cache{0}'.format(sys.version_info[0] if sys.version_info[0] == 3 else ''))#獲取代理（在國(guó)內(nèi)China尤其有用，不解釋） def get_proxies():proxies = getproxies()filtered_proxies = {}for key, value in proxies.items():if key.startswith('http'):if not value.startswith('http'):filtered_proxies[key] = 'http://%s' % valueelse:filtered_proxies[key] = valuereturn filtered_proxiesdef _get_result(url):try:return requests.get(url, headers={'User-Agent': random.choice(USER_AGENTS)}, proxies=get_proxies(),verify=VERIFY_SSL_CERTIFICATE).textexcept requests.exceptions.SSLError as e:print('[ERROR] Encountered an SSL Error. Try using HTTP instead of ''HTTPS by setting the environment variable "HOWDOI_DISABLE_SSL".\n')raise e# 獲取google搜索結(jié)果中的連接 def _get_links(query):result = _get_result(SEARCH_URL.format(URL, url_quote(query)))html = pq(result)#用pyquery進(jìn)行解析return [a.attrib['href'] for a in html('.l')] or \[a.attrib['href'] for a in html('.r')('a')]def get_link_at_pos(links, position):if not links:return Falseif len(links) >= position:link = links[position - 1]else:link = links[-1]return link#代碼格式化輸出函數(shù) def _format_output(code, args):if not args['color']:return codelexer = None# try to find a lexer using the StackOverflow tags# or the query argumentsfor keyword in args['query'].split() + args['tags']:try:lexer = get_lexer_by_name(keyword)breakexcept ClassNotFound:pass# no lexer found above, use the guesserif not lexer:try:lexer = guess_lexer(code)except ClassNotFound:return codereturn highlight(code,lexer,TerminalFormatter(bg='dark'))#利用政策匹配判斷連接是否是問(wèn)題 def _is_question(link):return re.search('questions/\d+/', link)#獲取問(wèn)題連接 def _get_questions(links):return [link for link in links if _is_question(link)]#獲取答案（主要是解析stackoverflow的問(wèn)答頁(yè)面） def _get_answer(args, links):links = _get_questions(links)link = get_link_at_pos(links, args['pos'])if not link:return Falseif args.get('link'):return linkpage = _get_result(link + '?answertab=votes')html = pq(page)first_answer = html('.answer').eq(0)#第一個(gè)答案instructions = first_answer.find('pre') or first_answer.find('code')#pre和code標(biāo)簽為目標(biāo)代碼塊args['tags'] = [t.text for t in html('.post-tag')]if not instructions and not args['all']:text = first_answer.find('.post-text').eq(0).text()elif args['all']:texts = []for html_tag in first_answer.items('.post-text > *'):current_text = html_tag.text()if current_text:if html_tag[0].tag in ['pre', 'code']:texts.append(_format_output(current_text, args))else:texts.append(current_text)texts.append('\n---\nAnswer from {0}'.format(link))text = '\n'.join(texts)else:text = _format_output(instructions.eq(0).text(), args)if text is None:text = NO_ANSWER_MSGtext = text.strip()return textdef _get_instructions(args):links = _get_links(args['query'])if not links:return Falseanswers = []append_header = args['num_answers'] > 1initial_position = args['pos']for answer_number in range(args['num_answers']):current_position = answer_number + initial_positionargs['pos'] = current_positionanswer = _get_answer(args, links)if not answer:continueif append_header:answer = ANSWER_HEADER.format(current_position, answer)answer += '\n'answers.append(answer)return '\n'.join(answers)#啟動(dòng)緩存 def _enable_cache():if not os.path.exists(CACHE_DIR):os.makedirs(CACHE_DIR)requests_cache.install_cache(CACHE_FILE)#清除緩存 def _clear_cache():for cache in glob.glob('{0}*'.format(CACHE_FILE)):os.remove(cache)# 腳本主函數(shù) def howdoi(args):#構(gòu)造查詢(xún)（主要是把問(wèn)號(hào)刪除）args['query'] = ' '.join(args['query']).replace('?', '')try:return _get_instructions(args) or 'Sorry, couldn\'t find any help with that topic\n'except (ConnectionError, SSLError):return 'Failed to establish network connection\n'#獲取用戶(hù)輸入的命令行參數(shù) def get_parser():parser = argparse.ArgumentParser(description='instant coding answers via the command line')parser.add_argument('query', metavar='QUERY', type=str, nargs='*',help='the question to answer')parser.add_argument('-p', '--pos', help='select answer in specified position (default: 1)', default=1, type=int)parser.add_argument('-a', '--all', help='display the full text of the answer',action='store_true')parser.add_argument('-l', '--link', help='display only the answer link',action='store_true')parser.add_argument('-c', '--color', help='enable colorized output',action='store_true')parser.add_argument('-n', '--num-answers', help='number of answers to return', default=1, type=int)parser.add_argument('-C', '--clear-cache', help='clear the cache',action='store_true')parser.add_argument('-v', '--version', help='displays the current version of howdoi',action='store_true')return parser#啟動(dòng)函數(shù) def command_line_runner():parser = get_parser()args = vars(parser.parse_args())# 輸出腳本版本if args['version']:print(__version__)return# 清除緩存if args['clear_cache']:_clear_cache()print('Cache cleared successfully')return# 如果沒(méi)有query，就輸出幫助信息if not args['query']:parser.print_help()return# 如果環(huán)境變量設(shè)置了禁止緩存，就清除緩存if not os.getenv('HOWDOI_DISABLE_CACHE'):_enable_cache()# 彩色輸出if os.getenv('HOWDOI_COLORIZE'):args['color'] = True# 如果用戶(hù)Python版本小于3就進(jìn)行utf-8編碼，如否，就正常啟動(dòng)if sys.version < '3':print(howdoi(args).encode('utf-8', 'ignore'))else:print(howdoi(args))if __name__ == '__main__':command_line_runner()

轉(zhuǎn)載于:https://www.cnblogs.com/taceywong/p/8051095.html

總結(jié)

以上是生活随笔為你收集整理的howdoi 简单分析的全部?jī)?nèi)容，希望文章能夠幫你解決所遇到的問(wèn)題。

如果覺(jué)得生活随笔網(wǎng)站內(nèi)容還不錯(cuò)，歡迎將生活随笔推薦給好友。

上一篇：完全模仿lol的手游
下一篇： dns_probe_possible连不