當前位置：首頁 > 编程语言 > C# >内容正文

C#

今日发现的：一个类似Google Baidu的搜索引擎[C#]代码比较简单

發布時間：2024/4/14 C# 36 豆豆

生活随笔收集整理的這篇文章主要介紹了今日发现的：一个类似Google Baidu的搜索引擎[C#]代码比较简单小編覺得挺不錯的,現在分享給大家,幫大家做個參考.

里面實現很簡單，當然只能算是一個演示版本吧！不可能有Google Baidu的那么強大。有爬蟲代碼 Web代碼

主要的代碼分析如下：

/// <summary>
? ? ? ?/// 取得該URL網頁內容數據.調用后面的分析函數。
? ? ? ?/// </summary>
? ? ? ?private void GetHTML() {
? ? ? ? ? ?if (_HTMLData != null) return;
? ? ? ? ? ?Byte[] BinData;
? ? ? ? ? ?try {
? ? ? ? ? ? ? ?WebClient wc = new WebClient();
? ? ? ? ? ? ? ?BinData = wc.DownloadData(_URL);
? ? ? ? ? ? ? ?_HTMLData = Encoding.Default.GetString(BinData);
? ? ? ? ? ? ? ?wc.Dispose();
? ? ? ? ? ?}
? ? ? ? ? ?catch (Exception) { Console.WriteLine("Can not read this page!"); return; }
? ? ? ? ? ?GetTitle();
? ? ? ? ? ?GetMeta();
? ? ? ? ? ?GetLink();
? ? ? ? ? ?if (OnFinishAnalyze != null) { OnFinishAnalyze(this, new EventArgs()); }
? ? ? ? ? ?Console.WriteLine("Finish!");
? ? ? ? ? ?this.Dispose();
? ? ? ?}
? ? ? ?/// <summary>
? ? ? ?/// 分析URL里的Title
? ? ? ?/// </summary>
? ? ? ?private void GetTitle() {
? ? ? ? ? ?if (_Title != null) return;

? ? ? ? ? ?//"(<title>([\S\s]+)</title>)" ?提取網頁標題的正則表達式
? ? ? ? ? ?Regex reg = new Regex(@"<title>([\S\s]+)</title>", RegexOptions.IgnoreCase);
? ? ? ? ? ?try{
? ? ? ? ? ? ? ?Match m = reg.Matches(_HTMLData)[0];
? ? ? ? ? ? ? ?if (m.Success) _Title = m.Groups[1].Captures[0].ToString();
? ? ? ? ? ?}
? ? ? ? ? ?catch (Exception) { _Title = ""; }
? ? ? ? ? ?
? ? ? ?}
? ? ? ?/// <summary>
? ? ? ?/// 解析Html代碼里的超鏈接.獲得子URL集.
? ? ? ?/// </summary>
? ? ? ?private void GetLink() {
? ? ? ? ? ?if (_ChildURLSet != null) return;
? ? ? ? ? ?ArrayList urlset=new ArrayList();
? ? ? ? ? ?//"<a[\\s]+href=\"?([\\S]+)\"?[^ <>]+>([^ <>]+)</a>" //提取超鏈接的正則表達式
? ? ? ? ? ?Regex reg = new Regex("<a[\\s]+href=\"?([\\S]+)\"?[^<>]+>([^<>]+?)</a>", RegexOptions.IgnoreCase);
? ? ? ? ? ?MatchCollection mm;
? ? ? ? ? ?try { mm = reg.Matches(_HTMLData); } catch (Exception) { return; }
? ? ? ? ? ?urlset.Add(new QLinkURL(this._URL, "", this._MetaWords,this._Title));
? ? ? ? ? ?foreach (Match m in mm) {
? ? ? ? ? ? ? ?urlset.Add(new QLinkURL(URLJoin(m.Groups[1].Captures[0].ToString()), m.Groups[2].Captures[0].ToString().Replace(" ",""), ""));
? ? ? ? ? ?}
? ? ? ? ? ?_ChildURLSet = (QLinkURL[])urlset.ToArray(System.Type.GetType("QSplider.QLinkURL"));
? ? ? ?}
? ? ? ?private void GetMeta() {
? ? ? ? ? ?if (_MetaWords != null) return;

? ? ? ? ? ?//<meta[\S\s]+name="?keywords"?[\S\s]+content="?([\S\s]+)"?[\S\s]+></meta>
? ? ? ? ? ?Regex reg = new Regex("<meta[\\S\\s]+name=\"?keywords\"?[\\S\\s]+content=\"?([\\S\\s]+)\"?[\\S\\s]+></meta>", RegexOptions.IgnoreCase);
? ? ? ? ? ?try{
? ? ? ? ? ? ? ?Match m = reg.Matches(_HTMLData)[0];
? ? ? ? ? ? ? ?if (m.Success) _MetaWords = m.Groups[1].Captures[0].ToString();
? ? ? ? ? ?}catch(Exception){ _MetaWords = "";}
? ? ? ?}
? ? ? ?public void Dispose() {
? ? ? ? ? ?this._HTMLData = null;
? ? ? ? ? ?this._ChildURLSet = null;
? ? ? ? ? ?this._MetaWords = null;
? ? ? ? ? ?this._Title = null;
? ? ? ?}
? ? ? ?public string URLJoin(string s2) {
? ? ? ? ? ?s2=s2.Trim(_SplitChar);
? ? ? ? ? ?if (s2.StartsWith("http://", true,null)) return s2;
? ? ? ? ? ?if(s2.StartsWith("/")) ?s2.Substring(1,s2.Length-1);
? ? ? ? ? ?if (_URL.LastIndexOf("/") > 9) _URL = _URL.Substring(0,_URL.LastIndexOf("/"));
? ? ? ? ? ?return (_URL + "/" + s2).Trim(_SplitChar);
? ? ? ?}

主要工作的函數就是上面的幾個了！這是一個爬蟲的代碼！

當然還有數據庫的，還有web的！
下載地址如下：
http://www.libing.net.cn/attachment.php?f=attachment%2F%2Fqsearch.splider.zip
http://www.libing.net.cn/attachment.php?f=attachment%2F%2Fqsearch.website.zip
http://59.70.157.222/QSearch.WebSite.zip

轉載于:https://www.cnblogs.com/wbbady/archive/2007/07/10/812006.html

總結

以上是生活随笔為你收集整理的今日发现的：一个类似Google Baidu的搜索引擎[C#]代码比较简单的全部內容，希望文章能夠幫你解決所遇到的問題。

如果覺得生活随笔網站內容還不錯，歡迎將生活随笔推薦給好友。

上一篇： Struts 体系结构与工作原理(图)
下一篇： ASP.NET 2.0(C#) - Th