日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 > 编程资源 > 编程问答 >内容正文

编程问答

(golang)HTTP基本认证机制及使用gocolly登录爬取

發(fā)布時間:2024/4/17 编程问答 29 豆豆
生活随笔 收集整理的這篇文章主要介紹了 (golang)HTTP基本认证机制及使用gocolly登录爬取 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

內(nèi)網(wǎng)有個網(wǎng)頁用了HTTP基本認(rèn)證機(jī)制,想用gocolly爬取,不知道怎么登錄,只好研究HTTP基本認(rèn)證機(jī)制

參考這里:https://www.jb51.net/article/89070.htm??

下面開始參考作者dotcoo了:-)

看了<<http權(quán)威指南>>第12章HTTP基本認(rèn)證機(jī)制(本站下載地址://www.jb51.net/books/93254.html),感覺講的蠻詳細(xì)的,寫了一個小小例子測試.

請求響應(yīng)過程:

==> GET /hello HTTP/1.1 Host: 127.0.0.1:12345 <== HTTP/1.1 401 Unauthorized WWW-Authenticate: Basic realm="Dotcoo User Login" ==> GET /hello HTTP/1.1 Host: 127.0.0.1:12345 Authorization: Basic YWRtaW46YWRtaW5wd2Q= <== HTTP/1.1 200 OK Content-Type: text/plain; charset=utf-8

golang HTTP基本認(rèn)證機(jī)制的實(shí)現(xiàn)代碼

package main import ("fmt""io""net/http""log""encoding/base64""strings" ) // hello world, the web server func HelloServer(w http.ResponseWriter, req *http.Request) {auth := req.Header.Get("Authorization")if auth == "" {w.Header().Set("WWW-Authenticate", `Basic realm="Dotcoo User Login"`)w.WriteHeader(http.StatusUnauthorized)return}fmt.Println(auth)auths := strings.SplitN(auth, " ", 2)if len(auths) != 2 {fmt.Println("error")return}authMethod := auths[0]authB64 := auths[1]switch authMethod {case "Basic":authstr, err := base64.StdEncoding.DecodeString(authB64)if err != nil {fmt.Println(err)io.WriteString(w, "Unauthorized!\n")return}fmt.Println(string(authstr))userPwd := strings.SplitN(string(authstr), ":", 2)if len(userPwd) != 2 {fmt.Println("error")return}username := userPwd[0]password := userPwd[1]fmt.Println("Username:", username)fmt.Println("Password:", password)fmt.Println()default:fmt.Println("error")return}io.WriteString(w, "hello, world!\n") } func main() {http.HandleFunc("/hello", HelloServer)err := http.ListenAndServe(":8000", nil)if err != nil {log.Fatal("ListenAndServe: ", err)} }

試驗(yàn)了上面的例子后,基本明白了HTTP基本認(rèn)證的過程。但是怎么用gocolly訪問呢?

參考:https://stackoverflow.com/questions/50576248/using-colly-framework-i-cant-login-to-the-evernote-account

但是答復(fù)者M(jìn)atías Insaurralde提供的模擬瀏覽器訪問的例子編譯不通過,不明白其中的hptsKey的意思。代碼放在下面供參考(可跳過):

package evernoteimport ("bytes""errors""fmt""io/ioutil""net/http""net/http/cookiejar""net/url""regexp""strings" )const (evernoteLoginURL = "https://www.evernote.com/Login.action" )var (evernoteJSParamsExpr = regexp.MustCompile(`document.getElementById\("(.*)"\).value = "(.*)"`)evernoteRedirectExpr = regexp.MustCompile(`Redirecting to <a href="(.*)">`)errNoMatches = errors.New("No matches")errRedirectURL = errors.New("Redirect URL not found") )// EvernoteClient wraps all methods required to interact with the website. type EvernoteClient struct {Username stringPassword stringhttpClient *http.Client// These parameters persist during the login process:hpts stringhptsh string }// NewEvernoteClient initializes a new Evernote client. func NewEvernoteClient(username, password string) *EvernoteClient {// Allocate a new cookie jar to mimic the browser behavior:cookieJar, _ := cookiejar.New(nil)// Fill up basic data:c := &EvernoteClient{Username: username,Password: password,}// When initializing the http.Client, copy default values from http.DefaultClient// Pass a pointer to the cookie jar that was created earlier:c.httpClient = &http.Client{Transport: http.DefaultTransport,CheckRedirect: http.DefaultClient.CheckRedirect,Jar: cookieJar,Timeout: http.DefaultClient.Timeout,}return c }func (e *EvernoteClient) extractJSParams(body []byte) (err error) {matches := evernoteJSParamsExpr.FindAllSubmatch(body, -1)if len(matches) == 0 {return errNoMatches}for _, submatches := range matches {if len(submatches) < 3 {err = errNoMatchesbreak}key := submatches[1]val := submatches[2]if bytes.Compare(key, hptsKey) == 0 {e.hpts = string(val)}if bytes.Compare(key, hptshKey) == 0 {e.hptsh = string(val)}}return nil }// Login handles the login action. func (e *EvernoteClient) Login() error {// First step: fetch the login page as a browser visitor would do:res, err := e.httpClient.Get(evernoteLoginURL)if err != nil {return err}if res.Body == nil {return errors.New("No response body")}body, err := ioutil.ReadAll(res.Body)if err != nil {return err}err = e.extractJSParams(body)if err != nil {return err}// Second step: we have extracted the "hpts" and "hptsh" parameters// We send a request using only the username and setting "evaluateUsername":values := &url.Values{}values.Set("username", e.Username)values.Set("evaluateUsername", "")values.Set("analyticsLoginOrigin", "login_action")values.Set("clipperFlow", "false")values.Set("showSwitchService", "true")values.Set("hpts", e.hpts)values.Set("hptsh", e.hptsh)rawValues := values.Encode()req, err := http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))if err != nil {return err}req.Header.Set("Accept", "application/json")req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")req.Header.Set("x-requested-with", "XMLHttpRequest")req.Header.Set("referer", evernoteLoginURL)res, err = e.httpClient.Do(req)if err != nil {return err}body, err = ioutil.ReadAll(res.Body)if err != nil {return err}bodyStr := string(body)if !strings.Contains(bodyStr, `"usePasswordAuth":true`) {return errors.New("Password auth not enabled")}// Third step: do the final request, append password to form data:values.Del("evaluateUsername")values.Set("password", e.Password)values.Set("login", "Sign in")rawValues = values.Encode()req, err = http.NewRequest(http.MethodPost, evernoteLoginURL, bytes.NewBufferString(rawValues))if err != nil {return err}req.Header.Set("Accept", "text/html")req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")req.Header.Set("x-requested-with", "XMLHttpRequest")req.Header.Set("referer", evernoteLoginURL)res, err = e.httpClient.Do(req)if err != nil {return err}// Check the body in order to find the redirect URL:body, err = ioutil.ReadAll(res.Body)if err != nil {return err}bodyStr = string(body)matches := evernoteRedirectExpr.FindAllStringSubmatch(bodyStr, -1)if len(matches) == 0 {return errRedirectURL}m := matches[0]if len(m) < 2 {return errRedirectURL}redirectURL := m[1]fmt.Println("Login is ok, redirect URL:", redirectURL)return nil } After you successfully get the redirect URL, you should be able to send authenticated requests as long as you keep using the HTTP client that was used for the login process, the cookie jar plays a very important role here.To call this code use:func main() {evernoteClient := NewEvernoteClient("user@company", "password")err := evernoteClient.Login()if err != nil {panic(err)} }

只好自己寫,經(jīng)反復(fù)試驗(yàn),發(fā)現(xiàn)對于本文開頭自己寫的server,只需以下代碼即可通過驗(yàn)證,輸出了hello,world!(將訪問方式改為POST也一樣。)

package mainimport ("fmt""io/ioutil""net/http" )// Login handles the login action. func Login() {//生成client 參數(shù)為默認(rèn)client := &http.Client{}//要訪問的urlurl := "http://localhost:8000/hello"//要提交的請求req, _ := http.NewRequest("GET", url, nil)//最重要的一句,用戶名和密碼可隨意寫req.SetBasicAuth("aa", "bb")fmt.Println("POST訪問")//返回結(jié)果res, _ := client.Do(req)defer res.Body.Close()fmt.Println("header:")header := res.Headerfmt.Println(header)fmt.Println("realm:")basicRealm := res.Header.Get("Www-Authenticate")fmt.Println(basicRealm)fmt.Println("body:")body, _ := ioutil.ReadAll(res.Body)fmt.Println(string(body))}func main() { Login() }

查看SetBasicAuth的定義為(liteide中在光標(biāo)位置按Ctrl+shift+J):

func (r *Request) SetBasicAuth(username, password string) {r.Header.Set("Authorization", "Basic "+basicAuth(username, password)) }

而basicAuth的定義為

func basicAuth(username, password string) string {auth := username + ":" + passwordreturn base64.StdEncoding.EncodeToString([]byte(auth)) }

那么,用gocolly訪問的代碼如下:

package mainimport ("encoding/base64""fmt""net/http""github.com/gocolly/colly" )func basicAuth(username, password string) string {auth := username + ":" + passwordreturn base64.StdEncoding.EncodeToString([]byte(auth)) } func main() {c := colly.NewCollector()h := http.Header{}h.Set("Authorization", "Basic "+basicAuth("aaaa", "bbbb"))c.OnResponse(func(r *colly.Response) {//fmt.Println(r)fmt.Println(string(r.Body))})c.Request("GET", "http://localhost:8000/hello", nil, nil, h) }

注:對于其他網(wǎng)站,也許要用Fiddler抓包,設(shè)置相應(yīng)的header和cookie才行。

轉(zhuǎn)載于:https://www.cnblogs.com/pu369/p/10408898.html

總結(jié)

以上是生活随笔為你收集整理的(golang)HTTP基本认证机制及使用gocolly登录爬取的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。