當前位置：首頁 > 编程语言 > python >内容正文

python

【Grades Crawler】利用python编写爬虫爬取西电教务处成绩并本地保存

發布時間：2024/3/12 python 36 豆豆

生活随笔收集整理的這篇文章主要介紹了【Grades Crawler】利用python编写爬虫爬取西电教务处成绩并本地保存小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

Grades Crawler 項目介紹
由于教務處成績不易自己打印，故編寫爬蟲Grades Crawler, 其可以將自己的（需要合法的學生帳號信息）教務處成績爬取下來，并本地保存為excel格式或直接保存在sqlite數據庫中

需額外安裝的 libraries
bs4, xlrd, xlwt(保存成績為excel格式)

python代碼 (待有空時會詳細講解)

# -*-encoding:utf-8-*- # coding=utf-8 __author__ = 'ysc' import requests from bs4 import BeautifulSoup import xlrd import xlwtclass ScrapeGrade:def __init__(self, auth_url=None, log_url=None):if not auth_url:self.auth_url = "http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fjwxt.xidian.edu.cn%2Fcaslogin.jsp"self.log_url = "http://jwxt.xidian.edu.cn/caslogin.jsp"else:self.auth_url = auth_urlself.log_url = log_urlself.session = requests.Session()def login(self, id='1302051****', password='****'):r = self.session.get(self.auth_url)data = r.textbsObj = BeautifulSoup(data, "html.parser")lt_value = bsObj.find(attrs={"name": "lt"})['value']exe_value = bsObj.find(attrs={"name": "execution"})['value']params = {'username': id, 'password': password,"submit": "", "lt": lt_value, "execution": exe_value,"_eventId": "submit", "rmShown": '1'}headers = {'User-Agent': "Mozilla/5.0 (Windows NT 10.0; WOW64; rv:44.0) Gecko/20100101 Firefox/44.0",'Accept': "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",# "Host": "ids.xidian.edu.cn","Accept-Language": "zh-CN,zh;q=0.8,en-US;q=0.5,en;q=0.3","Accept-Encoding": "gzip, deflate","Referer": "http://ids.xidian.edu.cn/authserver/login?service=http%3A%2F%2Fjwxt.xidian.edu.cn%2Fcaslogin.jsp",# 'X-Requested-With': "XMLHttpRequest","Content-Type": "application/x-www-form-urlencoded"}s = self.session.post(self.auth_url, data=params, headers=headers)s = self.session.get(self.log_url)def store_into_db_by_term(self):# 按學期進行分類import sqlite3conn = sqlite3.connect('grades_term.db')# conn.text_factory = str ##!!!c = conn.cursor()try:# init the counter of the sheetrow = 0# 打開成績頁面grade_page = self.session.get("http://jwxt.xidian.edu.cn/gradeLnAllAction.do?type=ln&oper=qbinfo&lnxndm=2015-2016%D1%A7%C4%EA%B5%DA%D2%BB%D1%A7%C6%DA(%C1%BD%D1%A7%C6%DA)")bsObj2 = BeautifulSoup(grade_page.text, "html.parser")# datas 包含了所有學期的成績, tabledatas = bsObj2.find_all("table", attrs={"class": "titleTop2"})# seme 指每學期的成績. tablefor i, seme in enumerate(datas):#寫入一行標題thths = seme.find_all('th')titles = []for col, th in enumerate(ths):print(th.string.strip(), end=' ')th = th.string.strip()if th != '學分' and th != "成績":titles.append(th + r' text')else:titles.append(th + r' real')# table.write(row, col, th.string.strip(), self.set_style('Times New Roman', 220, True))# Create tablesent = '''CREATE TABLE {0} ( '''.format('table' + str(i+1))for ith, title in enumerate(titles):sent += titleif ith < len(titles) - 1:sent += ", "sent += ")"try:c.execute(sent)conn.commit()except sqlite3.OperationalError:passprint('\n')row += 1# 各科成績subs = seme.findAll('td', attrs={"align": "center"})col_iter = 0len_ths = len(ths)grade_subs = []# sub為具體的某科成績for sub in subs:if sub.string:if sub.string.strip() != '':print(sub.string.strip(), end=' ')grade_subs.append("'" + sub.string.strip()+"'")else:print("' '", end=' ')grade_subs.append("' '")else:print(sub.find('p').string.strip(), end=' ')grade_subs.append("'" + sub.find('p').string.strip() + "'")col_iter += 1if col_iter == len_ths:# 此時一科的成績以及visited, 該訪問下一科print('\n')# Insert a row of datasent = '''INSERT INTO {0} VALUES( '''.format('table' + str(i+1))for ith, grade_sub in enumerate(grade_subs):sent += grade_subif ith < len(grade_subs) - 1:sent += ", "sent += ")"try:c.execute(sent)conn.commit()except sqlite3.OperationalError as e:print(e)print(sent)exit(-2)row += 1col_iter = 0grade_subs = []print("\n")# 保存到xls中finally:conn.close()def store_into_db_by_prop(self):# 按科目屬性(必修\選修)進行分類import sqlite3conn = sqlite3.connect('grades_prop.db')c = conn.cursor()try:# init the counter of the sheetrow = 0# 打開成績頁面grade_page = self.session.get("http://jwxt.xidian.edu.cn/gradeLnAllAction.do?type=ln&oper=sxinfo&lnsxdm=001")bsObj2 = BeautifulSoup(grade_page.text, "html.parser")# datas 包含了所有學期的成績, tabledatas = bsObj2.find_all("table", attrs={"class": "titleTop2"})# seme 指每學期的成績. tablefor i, seme in enumerate(datas):#寫入一行標題thths = seme.find_all('th')titles = []for col, th in enumerate(ths):print(th.string.strip(), end=' ')th = th.string.strip()if th != '學分' and th != "成績":titles.append(th + r' text')else:titles.append(th + r' real')# table.write(row, col, th.string.strip(), self.set_style('Times New Roman', 220, True))# Create tablesent = '''CREATE TABLE {0} ( '''.format('table' + str(i+1))for ith, title in enumerate(titles):sent += titleif ith < len(titles) - 1:sent += ", "sent += ")"try:c.execute(sent)conn.commit()except sqlite3.OperationalError:passprint('\n')row += 1# 各科成績subs = seme.findAll('tr', attrs={'class': "odd"})col_iter = 0len_ths = len(ths)grade_subs = []# sub為具體的某科信息for sub in subs:infors = sub.findAll('td') #, attrs={"align": "center"})for infor in infors:if infor.string:if infor.string.strip() != '':print(infor.string.strip(), end=' ')grade_subs.append("'" + infor.string.strip()+"'")else:print("' '", end=' ')grade_subs.append("' '")else:infor = infor.find('p').string.strip()if infor != '':print(infor, end=' ')grade_subs.append("'" + infor + "'")else:print("' '", end=' ')grade_subs.append("' '")# 此時一科的成績已經visited, 該訪問下一科print('\n')# Insert a row of datasent = '''INSERT INTO {0} VALUES( '''.format('table' + str(i+1))for ith, grade_sub in enumerate(grade_subs):sent += grade_subif ith < len(grade_subs) - 1:sent += ", "sent += ")"try:c.execute(sent)conn.commit()except sqlite3.OperationalError as e:print(e)print(sent)exit(-2)row += 1col_iter = 0grade_subs = []print("\n")# 保存到xls中finally:conn.close()def set_style(self, name, height, bold=False):style = xlwt.XFStyle()font = xlwt.Font()font.name = name # 'Times New Roman'font.bold = boldfont.color_index = 4font.height = height'''borders= xlwt.Borders()borders.left= 6borders.right= 6borders.top= 6borders.bottom= 6'''style.font = font# style.borders = bordersreturn styledef store_into_xls(self):file = xlwt.Workbook()table = file.add_sheet('grades', cell_overwrite_ok=True)# init the counter of the sheetrow = 0# 打開成績頁面grade_page = self.session.get("http://jwxt.xidian.edu.cn/gradeLnAllAction.do?type=ln&oper=qbinfo&lnxndm=2015-2016%D1%A7%C4%EA%B5%DA%D2%BB%D1%A7%C6%DA(%C1%BD%D1%A7%C6%DA)")bsObj2 = BeautifulSoup(grade_page.text, "html.parser")# datas 包含了所有學期的成績, tabledatas = bsObj2.find_all("table", attrs={"class": "titleTop2"})# seme 指每學期的成績. tablefor seme in datas:#寫入一行標題thths = seme.find_all('th')for col, th in enumerate(ths):print(th.string.strip(), end=' ')table.write(row, col, th.string.strip(), self.set_style('Times New Roman', 220, True))print('\n')row += 1# 各科成績subs = seme.findAll('td', attrs={"align": "center"})col_iter = 0len_ths = len(ths)# sub為具體的某科成績for sub in subs:if sub.string:print(sub.string.strip(), end=' ')table.write(row, col_iter, sub.string.strip())else:print(sub.find('p').string.strip(), end=' ')table.write(row, col_iter, sub.find('p').string.strip())col_iter += 1if col_iter == len_ths:print('\n')row += 1col_iter = 0print("\n")# 保存到xls中file.save('demo.xls')if __name__ == '__main__':# 初始化爬蟲對象sg = ScrapeGrade()# 登錄(在此處傳入正確的個人學號與密碼信息)sg.login(id='1302051***', password='1234567')# 保存成績為excelsg.store_into_xls()

總結

以上是生活随笔為你收集整理的【Grades Crawler】利用python编写爬虫爬取西电教务处成绩并本地保存的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Android利用Cookie实现码源登
下一篇： Python基础1：数据类型、序列

python

【Grades Crawler】利用python编写爬虫 爬取西电教务处成绩并本地保存

總結

【Grades Crawler】利用python编写爬虫爬取西电教务处成绩并本地保存