日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當前位置: 首頁 > 编程语言 > python >内容正文

python

路飞学院-Python爬虫实战密训班-第2章

發布時間:2025/7/14 python 30 豆豆
生活随笔 收集整理的這篇文章主要介紹了 路飞学院-Python爬虫实战密训班-第2章 小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

通過架設flask或django web服務器,同時后臺采用requests和bs4模塊來爬取web微信程序相關信息。

?

在代碼過程中遇到一些問題,1、需要認真分析網絡請求包。

2、cookies 在獲取收發信息時需要攜帶

3、SyncKey等相關信息在checklogin時就已經返回了,一定要仔細分析每個網絡請求,所有的請求頭中的參數都可以在以前的服務器返回值中找到,有些參數是固定的如deviceid,有些是time.time()即可

?

?

import re
import time
import json
import requests
from django.shortcuts import render
from django.shortcuts import HttpResponse
# 當前時間戳
CURRENT_TIME = None
QCODE = None

LOGIN_COOKIE_DICT = {}
TICKET_COOKIE_DICT = {}
TICKET_DICT = {}
TIPS = 1

USER_INIT_DATA = {}

def login(request):
base_qcode_url = 'https://login.wx.qq.com/jslogin?appid=wx782c26e4c19acffb&redirect_uri=https%3A%2F%2Fwx.qq.com%2Fcgi-bin%2Fmmwebwx-bin%2Fwebwxnewloginpage&fun=new&lang=zh_CN&_={0}'
global CURRENT_TIME
CURRENT_TIME = str(time.time())
q_code_url = base_qcode_url.format(CURRENT_TIME)
response = requests.get(q_code_url)
# 二維碼后綴
code = re.findall('uuid = "(.*)";',response.text)[0]
global QCODE
QCODE = code
return render(request, 'login.html', {'code': code})

def long_polling(request):
print('polling....')
ret = {'status': 408, 'data': None}
# https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid=IZpsHyzTNw==&tip=1&r=-897465901&_=1486956149964
# 408,201,200
try:
global TIPS
base_login_url = 'https://login.wx.qq.com/cgi-bin/mmwebwx-bin/login?loginicon=true&uuid={0}&tip={1}&r=-897465901&_={2}'
login_url = base_login_url.format(QCODE,TIPS,CURRENT_TIME)
response_login = requests.get(login_url)
if "window.code=201" in response_login.text:
TIPS = 0
avatar = re.findall("userAvatar = '(.*)';",response_login.text)[0]
ret['data'] = avatar
ret['status'] = 201
elif 'window.code=200' in response_login.text:
# 掃碼點擊確認后,獲取cookie
LOGIN_COOKIE_DICT.update(response_login.cookies.get_dict())
redirect_uri = re.findall('redirect_uri="(.*)";', response_login.text)[0]
redirect_uri += '&fun=new&version=v2&lang=zh_CN'

# 獲取票據,Cookie,返回值
response_ticket = requests.get(redirect_uri, cookies=LOGIN_COOKIE_DICT)
TICKET_COOKIE_DICT.update(response_ticket.cookies.get_dict())
print(response_ticket.text)
from bs4 import BeautifulSoup
soup = BeautifulSoup(response_ticket.text,'html.parser')
for tag in soup.find():
TICKET_DICT[tag.name] = tag.string

ret['status'] = 200

# https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=AZfYKn7CWTeZE_iMTHwv7GFB@qrticket_0&uuid=IeFZHVi6Jw==&lang=zh_CN&scan=1
# https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxnewloginpage?ticket=AepqqS0wvk1UN6bCGiaHHWXQ@qrticket_0&uuid=we1gq4TyyA==&lang=zh_CN&scan=1486957549"
except Exception as e:
print(e)
return HttpResponse(json.dumps(ret))


def index(request):
# 初始化用戶基本信息
# https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?r=-909239606&lang=zh_CN&pass_ticket=Tpc2XEec%252BJ0q2qNRw6nqWzGSsQ3jM2LZtBCVJZfjvMTDxjiyJ9mO5eRtCNOveeXO


user_init_url = 'http://wx.qq.com/cgi-bin/mmwebwx-bin/webwxinit?pass_ticket=%s&r=%s' % (TICKET_DICT['pass_ticket'], int(time.time()))

form_data = {
'BaseRequest': {
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
}
}
all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOKIE_DICT)
all_cookie_dict.update(TICKET_COOKIE_DICT)

response_init = requests.post(user_init_url, json=form_data, cookies=all_cookie_dict)
response_init.encoding = 'utf-8'
user_init_data = json.loads(response_init.text)
# for k,v in user_init_data.items():
# print(k,v)
USER_INIT_DATA.update(user_init_data)
"""
form_data = {
'BaseRequest':{
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
}
}
all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOKIE_DICT)
all_cookie_dict.update(TICKET_COOKIE_DICT)

response_init = requests.post(user_init_url,json=form_data,)
response_init.encoding = 'utf-8'
print(response_init.text)
"""

return render(request, 'index.html',{'data': user_init_data})


def contact_list(request):
"""
獲取聯系人列表
:param request:
:return:
"""
# https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}
base_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxgetcontact?lang=zh_CN&pass_ticket={0}&r={1}&seq=0&skey={2}"
url = base_url.format(TICKET_DICT['pass_ticket'], str(time.time()), TICKET_DICT['skey'])

all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOKIE_DICT)
all_cookie_dict.update(TICKET_COOKIE_DICT)
response = requests.get(url,cookies=all_cookie_dict)
response.encoding = 'utf-8'
contact_list_dict = json.loads(response.text)
return render(request, 'contact_list.html',{'obj': contact_list_dict})


def send_msg(request):
for k,v in USER_INIT_DATA.items():
print(k,"======>",v)
from_user_id = USER_INIT_DATA['User']['UserName']
to_user_id = request.POST.get('user_id')
msg = request.POST.get('user_msg')

send_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsendmsg?lang=zh_CN&pass_ticket=" + TICKET_DICT['pass_ticket']
import base64

form_data = {
'BaseRequest': {
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
},
'Msg':{
"ClientMsgId": str(time.time()),
"Content": '%(content)s',
"FromUserName": from_user_id,
"LocalID": str(time.time()),
"ToUserName": to_user_id,
"Type": 1
},
'Scene':0
}
import json
# 字符串
form_data_str = json.dumps(form_data)
# 進行格式化
form_data_str = form_data_str %{'content':msg}

# 轉換成字節
form_data_bytes = bytes(form_data_str,encoding='utf-8')

all_cookie_dict = {}
all_cookie_dict.update(LOGIN_COOKIE_DICT)
all_cookie_dict.update(TICKET_COOKIE_DICT)

response = requests.post(send_url, data=form_data_bytes, cookies=all_cookie_dict, headers={
'Content-Type': 'application/json'})
print(response.text)

return HttpResponse('ok')

def get_msg(request):
# 檢查是否有新消息到來
sync_url = "https://webpush.weixin.qq.com/cgi-bin/mmwebwx-bin/synccheck"

sync_data_list = []
for item in USER_INIT_DATA['SyncKey']['List']:
temp = "%s_%s" % (item['Key'], item['Val'])
sync_data_list.append(temp)
sync_data_str = "|".join(sync_data_list)
nid = int(time.time())
sync_dict = {
"r": nid,
"skey": TICKET_DICT['skey'],
"sid": TICKET_DICT['wxsid'],
"uin": TICKET_DICT['wxuin'],
"deviceid": "e531777446530354",
"synckey": sync_data_str
}
all_cookie = {}
all_cookie.update(LOGIN_COOKIE_DICT)
all_cookie.update(TICKET_COOKIE_DICT)
response_sync = requests.get(sync_url, params=sync_dict, cookies=all_cookie)
print(response_sync.text)
if 'selector:"2"' in response_sync.text:
fetch_msg_url = "https://wx.qq.com/cgi-bin/mmwebwx-bin/webwxsync?sid=%s&skey=%s&lang=zh_CN&pass_ticket=%s" % (TICKET_DICT['wxsid'], TICKET_DICT['skey'], TICKET_DICT['pass_ticket'])

form_data = {
'BaseRequest': {
'DeviceID': 'e531777446530354',
'Sid': TICKET_DICT['wxsid'],
'Skey': TICKET_DICT['skey'],
'Uin': TICKET_DICT['wxuin']
},
'SyncKey': USER_INIT_DATA['SyncKey'],
'rr': str(time.time())
}
response_fetch_msg = requests.post(fetch_msg_url, json=form_data)
response_fetch_msg.encoding = 'utf-8'
res_fetch_msg_dict = json.loads(response_fetch_msg.text)
USER_INIT_DATA['SyncKey'] = res_fetch_msg_dict['SyncKey']
for item in res_fetch_msg_dict['AddMsgList']:
print(item['Content'], ":::::", item['FromUserName'], "---->", item['ToUserName'], )
return HttpResponse('ok')

轉載于:https://www.cnblogs.com/aili10000/p/9282184.html

總結

以上是生活随笔為你收集整理的路飞学院-Python爬虫实战密训班-第2章的全部內容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯,歡迎將生活随笔推薦給好友。