日韩性视频-久久久蜜桃-www中文字幕-在线中文字幕av-亚洲欧美一区二区三区四区-撸久久-香蕉视频一区-久久无码精品丰满人妻-国产高潮av-激情福利社-日韩av网址大全-国产精品久久999-日本五十路在线-性欧美在线-久久99精品波多结衣一区-男女午夜免费视频-黑人极品ⅴideos精品欧美棵-人人妻人人澡人人爽精品欧美一区-日韩一区在线看-欧美a级在线免费观看

歡迎訪問 生活随笔!

生活随笔

當(dāng)前位置: 首頁 >

C++爬虫项目爬取图片

發(fā)布時間:2023/12/18 32 豆豆
生活随笔 收集整理的這篇文章主要介紹了 C++爬虫项目爬取图片 小編覺得挺不錯的,現(xiàn)在分享給大家,幫大家做個參考.

C++爬蟲項目爬取圖片,

值得注意的是有些網(wǎng)站的圖片爬不來的,有反爬機制,所以一般人爬不下來.

主要代碼文件

main.cpp文件里面的代碼

#include "CHttp.h" #include <urlmon.h>#pragma comment(lib, "urlmon.lib")queue<string> q;//url隊列 queue<string> p;//圖片url隊列void StartCatch(string url); void loadImage();int main() {cout << "*****************************************" << endl << endl;cout << " 歡迎使用網(wǎng)絡(luò)爬蟲系統(tǒng) " << endl;cout << " 開發(fā)者:admin " << endl << endl;cout << "*****************************************" << endl << endl;//創(chuàng)建一個文件夾,點表示當(dāng)前目錄CreateDirectory("./image", NULL);//從鍵盤輸入一個起始urlstring url;//cout<<"請輸入起始url:";cin>>url;//url = "http://desk.zol.com.cn/";//爬的是這個網(wǎng)站,可自行修改//開始抓取StartCatch(url);system("pause");return 0; }void StartCatch(string url) {q.push(url);while (!q.empty()){//取出urlstring currenturl = q.front();q.pop();CHttp http;//發(fā)送一個Get請求string html = http.FetchGet(currenturl);//cout<<html;http.AnalyseHtml(html);loadImage();} }//下載圖片的線程 static int num = 0; void loadImage() {while (!p.empty()){string currenturl = p.front();p.pop();char Name[20] = { 0 };num++;sprintf_s(Name, "./image/%d.jpg", num);if (S_OK == URLDownloadToFile(NULL, currenturl.c_str(), Name, 0, 0)){cout << "download ok" << endl;if (num == 24)//爬24張就結(jié)束了,也可以去掉這句話{exit(0);}}else{cout << "download error" << endl;}}}

CHttp.h 文件里面的代碼

#include<iostream> #include<windows.h> #include<string> #include<queue> //#include<WinSock2.h>在windows里邊 using namespace std;#pragma comment(lib,"ws2_32.lib")//網(wǎng)絡(luò)的庫class CHttp { private:string m_host;string m_object;SOCKET m_socket;bool AnalyseUrl(string url);//解析URL\httpbool AnalyseUrl2(string url);//\httpsbool init();//初始化套接字bool Connect();//連接web服務(wù)器 public:CHttp(void);~CHttp(void);string FetchGet(string url);//通過Get方式獲取網(wǎng)頁void AnalyseHtml(string html);//解析網(wǎng)頁,獲得圖片地址和其他的鏈接 };

CHttp.cpp 實現(xiàn)的文件的代碼是

#include "CHttp.h"CHttp::CHttp(void) {}CHttp::~CHttp(void) {closesocket(m_socket);WSACleanup(); }//解析URL\http bool CHttp::AnalyseUrl(string url) {if (string::npos == url.find("http://"))return false;if (url.length() <= 7)return false;int pos = url.find('/', 7);if (pos == string::npos){m_host = url.substr(7);m_object = '/';}else{m_host = url.substr(7, pos - 7);m_object = url.substr(pos);}if (m_host.empty())return false;return true; }//解析URL\https bool CHttp::AnalyseUrl2(string url) {if (string::npos == url.find("https://"))return false;if (url.length() <= 8)return false;int pos = url.find('/', 8);if (pos == string::npos){m_host = url.substr(8);m_object = '/';}else{m_host = url.substr(8, pos - 8);m_object = url.substr(pos);}if (m_host.empty())return false;return true; }bool CHttp::init() {//1 請求協(xié)議版本WSADATA wsaData;WSAStartup(MAKEWORD(2, 2), &wsaData);if (LOBYTE(wsaData.wVersion) != 2 ||HIBYTE(wsaData.wVersion) != 2) {printf("請求協(xié)議版本失敗!\n");return false;}//printf("請求協(xié)議成功!\n");//2 創(chuàng)建socketm_socket = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);if (SOCKET_ERROR == m_socket) {printf("創(chuàng)建socket失敗!\n");WSACleanup();return false;}//printf("創(chuàng)建socket成功!\n");return true; }//連接web服務(wù)器 bool CHttp::Connect() {//DNS服務(wù)器:將域名解析成IP地址hostent *p = gethostbyname(m_host.c_str());if (p == NULL)return false;SOCKADDR_IN sa;sa.sin_family = AF_INET;sa.sin_port = htons(80);//http的默認(rèn)端口,https的默認(rèn)端口443memcpy(&sa.sin_addr, p->h_addr, 4);if (-1 == connect(m_socket, (SOCKADDR*)&sa, sizeof(sa))){cout << "服務(wù)器連接失敗" << endl;return false;}else{//cout<<"服務(wù)器連接成功"<<endl;return true;} }string CHttp::FetchGet(string url)//通過Get方式獲取網(wǎng)頁 {string html;//解析urlif (false == AnalyseUrl(url)){if (false == AnalyseUrl2(url)){cout << "Html解析失敗" << endl;return "";}}//cout<<"主機名"<<m_host<<"\t\t"<<"資源名"<<m_object<<endl;if (false == init())//初始化套接字{return "";}if (false == Connect())//連接服務(wù)器{return "";}//發(fā)送Get請求 Get請求數(shù)據(jù)string request = "GET " + m_object +" HTTP/1.1\r\nHost:" + m_host +"\r\nConnection: Close\r\n\r\n";if (SOCKET_ERROR == send(m_socket, request.c_str(), request.size(), 0)){cout << "send request error" << endl;closesocket(m_socket);return "";}//接收數(shù)據(jù)char ch;while (recv(m_socket, &ch, 1, 0)){html += ch;}return html; } //判斷是否以什么結(jié)尾 bool hasEnding(char *& strFull, char*& strEnd) {char * pFull = strFull;while (*pFull != 0)pFull++;char * pEnd = strEnd;while (*pEnd != 0)pEnd++;while (1){pFull--;pEnd--;if (*pEnd == 0){break;}if (*pFull != *pEnd){return false;}}return true; } void CHttp::AnalyseHtml(string html)//解析網(wǎng)頁,獲得圖片地址和其他的鏈接 {int startIndex = 0;int endIndex = 0;//找到所有的圖片for (int pos = 0;pos < html.length();){startIndex = html.find("src=\"", startIndex);if (startIndex == -1){break;}startIndex += 5;endIndex = html.find("\"", startIndex);//找到資源鏈接string src = html.substr(startIndex, endIndex - startIndex);char *src1 = (char *)src.c_str();//cout<<src<<endl;//判斷連接是否是想要的資源 // char *strend = ".jpg";// char* strend = new char[20];// strcpy(strend, ".jpg");char* strend = new char[20]{ ".jpg" };if (hasEnding(src1, strend) == true){/*if(-1!=src.find("t_s960x600c5"))*/if (-1 != src.find("t_s1920x1080c5")){cout << src << endl;//新建一個線程來下載圖片extern queue<string> p;p.push(src);extern void loadImage();CreateThread(NULL, NULL, (LPTHREAD_START_ROUTINE)loadImage,NULL, NULL, NULL);}/*system("pause");*/}startIndex = endIndex + 1;//system("pause");}startIndex = 0;//找到其他URL地址for (int pos = 0;pos < html.length();){startIndex = html.find("href=\"", startIndex);if (startIndex == -1){break;}startIndex += 6;endIndex = html.find("\"", startIndex);//找到資源鏈接string src = html.substr(startIndex, endIndex - startIndex);char *src1 = (char *)src.c_str();//cout<<src<<endl;//判斷連接是否是想要的資源//char *strend = ".html";//char* strend = new char[20];//strcpy(strend, ".html");char* strend = new char[100]{ ".html" };if (hasEnding(src1, strend) == true){if ((-1 != src.find("bizhi") || -1 != src.find("showpic")) && -1 == src.find("http://")){string url = "http://desk.zol.com.cn" + src;extern queue<string> q;q.push(url);//cout<<url<<endl;}}startIndex = endIndex + 1;//system("pause");}}

總結(jié)

以上是生活随笔為你收集整理的C++爬虫项目爬取图片的全部內(nèi)容,希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網(wǎng)站內(nèi)容還不錯,歡迎將生活随笔推薦給好友。