爬取中国大学排名
import requests
from bs4 import BeautifulSoup
import bs4
url = "http://www.zuihaodaxue.cn/zuihaodaxuepaiming2019.html"
ulist = []# 獲取HTML頁面
def getHTML(url):try:r = requests.get(url, timeout=30)r.raise_for_status()r.encoding = r.apparent_encodingreturn r.textexcept:print("異常")return ""# 提取排名信息
def getUniv(ulist, html):soup = BeautifulSoup(html, "html.parser")for tr in soup.tbody.children:if isinstance(tr, bs4.element.Tag): # tr是bs4語句tds = tr('td')ulist.append([tds[0].string, tds[1].string, tds[2].string, tds[3].string])return ulist# 輸出排名信息
def readUniv(ulist):s = eval(input("請輸入要查詢前多少名大學:"))print("前{}名的排名如下:".format(s))print("{:^3}\t{:^20}\t{:^10}\t{:^5}".format("排名", "學校名稱", "城市", "分數"))for i in range(s):u = ulist[i]print("{:^3}\t{:^20}\t{:^10}\t{:^5}".format(u[0], u[1], u[2], u[3]))html = getHTML(url)
ulist = getUniv(ulist, html)
readUniv(ulist)
?
總結
- 上一篇: Python之网络图片爬取
- 下一篇: 信息提取的一般方法