数据采集与网站统计实现全过程
網站統計需要數據的支持,那么數據從哪里來?這里就需要網站記錄客戶所瀏覽過的痕跡,記錄用戶訪問每個頁面的流向,就要給該頁面加百度統計和cnzz的統計代碼,將用戶的訪問數據記錄下來
進而生成統計數據。現在就來自己實現一個這樣的數據采集與統計功能,具體步驟與相關代碼如下
一、設計表結構
先考慮數據的記錄方式與保存位置,用戶這些數據后期要用,因此考慮將數據存儲到數據庫中,根據需要創建了三個表,Visitor用于記錄來訪用戶的相關數據,VisitorRecord用于記錄來訪用戶訪問時間,著陸頁面與跳出頁面,表ViewRecord是訪問記錄,用于記錄訪問的基礎數據。表結構如下:
二、插入數據
創建表好了,如何向其中插入數據?既然ViewRecord是記錄基礎數據,毫無疑問當用戶每訪問一次一個頁面,就要向該表中插入一條數據;一個訪問者就向Visitor表中插入一條數據,現在問題來了,我們如何來判斷該用戶是否來過呢,這里我們定義一臺PC端為一個用戶,如果表Vistor中已存在該用戶的數據,只需更新表中訪問次數visitingNum,而不需要再向Visitor表中添加數據。至于應該如何判斷,這里采用的是用一插件fingerprint2.js來幫助識別是否為同一用戶,這種文件類似于指紋識別,插件請移步至下載頻道下載(下載地址:指紋識別插件 fingerprint2.js)。另外表VisitorRecord用來存儲用戶的來訪時間、跳出時間及訪問時長。
至于各個表插入數據的方法這里就不再贅述了,相信看到此文的讀者都不在話下,這里只闡述具體的調用與實現,功能是放到一般處理程序中實現的,讀者可自由變通。費話不多說,上代碼: Stat.ashx public class Stat : IHttpHandler {
public void ProcessRequest(HttpContext context) {string url = context.Request.PathInfo;string IP = Labbase.Common.Utils.GetIP();Labbase.BLL.TJ_LocalIP bllIP = new Labbase.BLL.TJ_LocalIP();Labbase.Model.TJ_Visitor mdTJVisitor = new Labbase.Model.TJ_Visitor();Labbase.BLL.TJ_Visitor bllTJVisitor = new Labbase.BLL.TJ_Visitor();Labbase.Model.TJ_VisitorRecord mdTJVisitorRecord = new Labbase.Model.TJ_VisitorRecord();Labbase.BLL.MVisitorRecord bllMVisitorRecord = new Labbase.BLL.MVisitorRecord();if (bllIP.Exists("LIP= '" + IP + "'"))return;if (isEngine(IP, context.Request.ServerVariables["HTTP_USER_AGENT"]))return;if (!string.IsNullOrEmpty(context.Request["InPage"])){if (context.Request.Cookies["lbGUID"] == null){HttpCookie cooklbGUID = new HttpCookie("lbGUID");cooklbGUID.Value = Guid.NewGuid().ToString();cooklbGUID.Expires = DateTime.MaxValue;context.Response.Cookies.Add(cooklbGUID);}else{Guid lbGUID;try{lbGUID = new Guid(context.Request.Cookies["lbGUID"].Value.ToString());}catch{return;}int lbGUIDCount = bllTJVisitor.Exists(new Guid(context.Request.Cookies["lbGUID"].Value)) == false ? 0 : 1;if (lbGUIDCount == 0){mdTJVisitor.visitingNum = 1;mdTJVisitor.VisitorID = lbGUID;mdTJVisitor.Bfingerprinting = context.Request["fingerprint"];mdTJVisitor.LastVisitingTime = DateTime.Now;mdTJVisitor.VIp = IP;mdTJVisitor.Loction = IPShowAddress(IP);//查詢IP庫bllTJVisitor.Add(mdTJVisitor);mdTJVisitorRecord.VRID = Guid.NewGuid();mdTJVisitorRecord.VisitorID = mdTJVisitor.VisitorID;mdTJVisitorRecord.InTime = DateTime.Now;mdTJVisitorRecord.outTime = DateTime.Now.AddSeconds(1);mdTJVisitorRecord.Entrance = context.Request.UrlReferrer.ToString();mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();bllMVisitorRecord.Add(mdTJVisitorRecord);}else{mdTJVisitor = bllTJVisitor.GetModel(lbGUID);if ((DateTime.Now - (DateTime)mdTJVisitor.LastVisitingTime).Minutes > 5){mdTJVisitorRecord.VRID = Guid.NewGuid();mdTJVisitorRecord.VisitorID = lbGUID;mdTJVisitorRecord.InTime = DateTime.Now;mdTJVisitorRecord.outTime = DateTime.Now.AddSeconds(1);mdTJVisitorRecord.Entrance = context.Request.UrlReferrer.ToString();mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();bllMVisitorRecord.Add(mdTJVisitorRecord);mdTJVisitor.LastVisitingTime = DateTime.Now;bllTJVisitor.Update(mdTJVisitor);}else{string vrid = bllMVisitorRecord.GetVRIDByGUID(lbGUID);if (vrid != ""){mdTJVisitor = bllTJVisitor.GetModel(lbGUID);mdTJVisitor.LastVisitingTime = DateTime.Now;bllTJVisitor.Update(mdTJVisitor);mdTJVisitorRecord.VRID = new Guid(vrid);}}}Labbase.Model.ViewRecord mdViewRecord = new Labbase.Model.ViewRecord();Labbase.BLL.MViewRecord bllMViewRecord = new Labbase.BLL.MViewRecord();mdViewRecord.VRID = mdTJVisitorRecord.VRID;mdViewRecord.ViewID = Guid.NewGuid();mdViewRecord.referenceUrl = System.Web.HttpUtility.UrlDecode(context.Request["referrer"]);if (Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))mdViewRecord.referenceUrl = "";mdViewRecord.FullPagePath = System.Web.HttpUtility.UrlDecode(context.Request["InPage"]);mdViewRecord.ViewIP = IP;mdViewRecord.Localarea = IPShowAddress(IP);//查詢IP庫mdViewRecord.Vtitle = System.Web.HttpUtility.UrlDecode(context.Request["title"]);mdViewRecord.ViewTime = DateTime.Now;if (!Utils.IsNullOrEmpty(mdViewRecord.referenceUrl))mdViewRecord.SId = Enginer(new Uri(mdViewRecord.referenceUrl).DnsSafeHost);elsemdViewRecord.SId = 0;string AbsolutePath = context.Request.UrlReferrer.AbsolutePath.TrimStart(new char[] { '/' });if (AbsolutePath.Contains('-')){AbsolutePath = AbsolutePath.Substring(0, AbsolutePath.IndexOf('-'));}if (AbsolutePath.Contains('.')){AbsolutePath = AbsolutePath.Substring(0, AbsolutePath.IndexOf('.'));}int parID = 0;string[] pars;object objCompany;Labbase.BLL.supply bllsupply = new Labbase.BLL.supply();Labbase.BLL.product_category bllcategory = new Labbase.BLL.product_category();Labbase.Model.supplyInfo mdSupply = new Labbase.Model.supplyInfo();int companyID = 0;switch (AbsolutePath){case "IndustryNewsDetial":parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));Labbase.BLL.News bllNews = new Labbase.BLL.News();objCompany = bllNews.GetCompanyID("NewsID=" + parID);if (objCompany != null && !string.IsNullOrEmpty(objCompany.ToString()) && objCompany.ToString() != "0"){mdViewRecord.Querypar1 = int.Parse(objCompany.ToString());}else{mdViewRecord.Querypar1 = 0;}mdViewRecord.channel = "技術資料";break;case "SupplyDetial"://供應詳情parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));string supplyID = parID.ToString();mdSupply = bllsupply.GetModel(Int32.Parse(supplyID));DataTable dtclass = bllcategory.GetProductClassInfo(" ClassXXID=" + mdSupply.ClassXXID);mdViewRecord.Querypar1 = mdSupply.CompanyID;companyID = int.Parse(mdSupply.CompanyID.ToString());if (dtclass.Rows.Count > 0){mdViewRecord.QueryPar2 = Int32.Parse(dtclass.Rows[0]["ClassID"].ToString());mdViewRecord.QueryPar3 = Int32.Parse(dtclass.Rows[0]["ClassXID"].ToString());}mdViewRecord.QueryPar4 = mdSupply.ClassXXID;mdViewRecord.channel = "供求信息";break;case "ProductLDetail"://產品詳情parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));companyID = new Labbase.BLL.product().GetCompanyIDByProductID(parID);mdViewRecord.Querypar1 = companyID;mdViewRecord.channel = "產品信息";break;case "CompanyIndex"://公司庫首頁case "CompanyNewsList"://公司技術資料case "CompanyContact":case "CompanyInfo":parID = int.Parse(context.Request.UrlReferrer.PathAndQuery.Split('-').Last().TrimEnd(".html".ToCharArray()));mdViewRecord.Querypar1 = parID;companyID = parID;mdViewRecord.channel = "公司主頁";break;case "CompanyProduct"://公司供應case "CompanyProdutDetail"://公司供應詳情case "CompanyNews"://公司技術資料詳情pars = context.Request.UrlReferrer.PathAndQuery.TrimEnd(".html".ToCharArray()).Split('-');mdViewRecord.Querypar1 = int.Parse(pars[1]);companyID = int.Parse(pars[1]);mdViewRecord.channel = "公司主頁";break;case "NewsLDetails":case "PrimeList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "資訊";break;case "IndustryNewsList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "技術資料";break;case "SupplyList":case "ProClass":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "供求信息";break;case "ProductBList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "產品信息";break;case "BrandsList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "品牌專區";break;case "CompanyList":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "公司庫";break;case "Exhibition":case "Exhibition/Detail":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "展會信息";break;case "ProductSearch":case "SupplySearch":case "NewsSearch":case "CompanySearch":case "BrandSearch":mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "搜索";break;case "purchase":case "AboutmUs":case "ContactmUs":default:mdViewRecord.Querypar1 = 0;mdViewRecord.channel = "其它";break;}if (!Utils.IsNullOrEmpty(mdViewRecord.referenceUrl)){//Engine_wrod EW = EngineWord(WebRequest.Create(mdViewRecord.referenceUrl));string keyws = GetKeyWord(mdViewRecord.referenceUrl);if (!string.IsNullOrEmpty(keyws)){Labbase.BLL.SearchWord bllSearchWord = new Labbase.BLL.SearchWord();Labbase.Model.SearchWord mdSearchWord = new Labbase.Model.SearchWord();object obj = bllSearchWord.GetSearchWordId("Word='" + keyws + "'");if (obj == null){mdSearchWord.SWID = Guid.NewGuid();mdSearchWord.LastDate = DateTime.Now;mdSearchWord.SumNum = 1;mdSearchWord.UserIP = IP;mdSearchWord.Word = keyws;mdSearchWord.CompanyID = companyID;bllSearchWord.Add(mdSearchWord);mdViewRecord.SWId = mdSearchWord.SWID;}else{mdViewRecord.SWId = new Guid(obj.ToString());mdSearchWord = bllSearchWord.GetModel(new Guid(obj.ToString()));mdSearchWord.SumNum = mdSearchWord.SumNum + 1;mdSearchWord.LastDate = DateTime.Now;mdSearchWord.CompanyID = companyID;bllSearchWord.Update(mdSearchWord);}}}bllMViewRecord.Add(mdViewRecord);}}if (!string.IsNullOrEmpty(context.Request["OutPage"])){Guid lbGUID;try{lbGUID = new Guid(context.Request.Cookies["lbGUID"].Value.ToString());}catch{return;}int lbGUIDCount = bllTJVisitor.Exists(new Guid(context.Request.Cookies["lbGUID"].Value)) == false ? 0 : 1;if (lbGUIDCount < 1) return;string vrid = bllMVisitorRecord.GetVRIDByGUID(lbGUID);if (vrid != ""){mdTJVisitorRecord = bllMVisitorRecord.GetModel(new Guid(vrid));mdTJVisitorRecord.outTime = Convert.ToDateTime(DateTime.Now);mdTJVisitorRecord.ExitPage = context.Request.UrlReferrer.ToString();bllMVisitorRecord.Update(mdTJVisitorRecord);}}context.Response.ContentType = "text/plain";context.Response.Write(""); } private class Engine_wrod {public bool isEngine { get; set; }public string keyWord { get; set; } }/// <summary> /// 獲取全部的搜索引擎 /// </summary> /// <param name="input"></param> /// <returns></returns> private int Enginer(string input) {Labbase.BLL.SearchEngine bll = new Labbase.BLL.SearchEngine();foreach (DataRow dr in bll.GetList("").Tables[0].Rows){if (input.Contains(dr["SDomin"].ToString())){return int.Parse(dr["SID"].ToString());}}return 0; }/// <summary> /// 判定是否是搜索引擎 /// </summary> /// <param name="IP"></param> /// <param name="useragent"></param> /// <returns></returns> private bool isEngine(string IP, string useragent) {Labbase.BLL.SerchEngineMark bllSerchEngineMark = new Labbase.BLL.SerchEngineMark();Labbase.BLL.SearchEngineIP bllSearchEngineIP = new Labbase.BLL.SearchEngineIP();if (string.IsNullOrEmpty(useragent)) return true;//如果沒有useragent 設定為搜索引擎,不再繼續統計.foreach (DataRow dr in bllSerchEngineMark.GetList("").Tables[0].Rows){if (useragent.Contains(dr["SEMString"].ToString())){return true;}}return bllSearchEngineIP.Exists("SEIP='" + IP + "'"); }/// <summary> /// 獲取搜索關鍵詞 /// </summary> /// <param name="url">來源地址</param> /// <returns></returns> private string GetKeyWord(string url) {string keyword = "";string[] _uOsr = { "google", "yahoo", "baidu", "soso", "bing", "sogou", "so.com" }; //將幾個搜索引擎與對應的搜索關系詞寫入對應的數組中string[] _uOkw = { "q", "q", "wd|word|kw|keyword", "w", "q", "query", "q" };for (int i = 0; i < _uOsr.Length; i++){if (url.Contains(_uOsr[i])) //如果URL中包含這幾個搜索引擎則進入處理{if (_uOsr[i] == "baidu"){string[] temp = _uOkw[i].Split('|'); //來自百度的關系詞 有WD和WORD,分開處理#region 現在的代碼keyword = GetQuerystring(temp[0], url); //當以WD取不到的時候,則用WORD取詞if (string.IsNullOrEmpty(keyword)) //指定對應的編碼來消除亂碼 {keyword = GetQuerystring(temp[1], url); //從URL中取得關鍵詞的方法}if (string.IsNullOrEmpty(keyword)){keyword = GetQuerystring(temp[2], url);}if (string.IsNullOrEmpty(keyword)){keyword = GetQuerystring(temp[3], url);}#endregion}else{keyword = GetQuerystring(_uOkw[i], url);}break;}}string ecode = GBorUTF(keyword, url); //獲得文字的編碼格式keyword = HttpUtility.UrlDecode(keyword, Encoding.GetEncoding(ecode));keyword = HttpUtility.UrlEncode(keyword, Encoding.GetEncoding("UTF-8"));keyword = HttpUtility.UrlDecode(keyword, Encoding.GetEncoding("UTF-8"));return keyword; }/// <summary> /// 獲取文字的編碼 /// </summary> /// <param name="input"></param> /// <returns></returns> private string GBorUTF(string input, string url) {string en_code = "UTF-8";if (url.Contains("baidu")){if (url.Contains("ie=")){if (url.Contains("ie=gb2312")){en_code = "GB2312";}}else{string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));if (input.ToLower() != U_TO_R.ToLower()){en_code = "GB2312";}else{en_code = "UTF-8";}}}else if (url.Contains("sogou")){if (url.Contains("ie=")){if (url.Contains("ie=gb2312")){en_code = "GB2312";}}else{string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));if (input.ToLower() != U_TO_R.Replace("(", "%28").Replace(")", "%29").ToLower()){en_code = "GB2312";}}}else if (url.Contains("so.com")){if (url.Contains("ie=")){if (url.Contains("ie=gb2312")){en_code = "GB2312";}}else{string R_TO_U = HttpUtility.UrlDecode(input, Encoding.GetEncoding("UTF-8"));string U_TO_R = HttpUtility.UrlEncode(R_TO_U, Encoding.GetEncoding("UTF-8"));if (input.ToLower() != U_TO_R.ToLower()){en_code = "GB2312";}}}return en_code;}/// <summary> /// 從URL地址中通過queryname提取關鍵詞 /// </summary> /// <param name="queryname">wd,word,q,query,w...</param> /// <param name="url">URL地址</param> /// <returns></returns> private string GetQuerystring(string queryname, string url) {string keyword = string.Empty;Dictionary<string, string> dic = new Dictionary<string, string>();string re = "[?&]([^=]+)(?:=([^&]*))?"; //通進正則將URL中參數分拆 放入字典中MatchCollection mc = Regex.Matches(url, re);foreach (Match item in mc){if (item.Success){dic.Add(item.Groups[1].Value, item.Groups[2].Value);}}if (dic.ContainsKey(queryname)) //如果字典中有傳入的匹配關鍵詞的鍵,則取其值返回{keyword = dic[queryname];}return keyword;}/// <summary> /// 獲取鏈接的參數 /// </summary> /// <param name="strQuery"></param> /// <param name="strSplit"></param> /// <returns></returns> protected string wordFromUrlQuery(string strQuery, string strSplit) {strQuery = strQuery.TrimStart('?');strSplit += "=";foreach (string str in strQuery.Split('&')){if (str.StartsWith(strSplit)){string[] qValue = str.Split('=');if (qValue.Length > 1)return qValue[1];}}return ""; }/// <summary> ///根據IP獲取地址 /// </summary> /// <param name="strChar"></param> /// <returns></returns> protected string IPShowAddress(string strChar) {string ip = strChar;IPScaner objScan = new IPScaner();objScan.DataPath = System.Web.HttpContext.Current.Server.MapPath(@"/js/QQWry.Dat");objScan.IP = ip;string addre = objScan.IPLocation();return addre; } public bool IsReusable {get{return false;} } 復制代碼} 三、數據采集 具體功能得以實現之后,這個時候我們需要一個js腳本來控制該一般處理程序Stat.aspx的運行,具體腳本如下: statistics.js: $(function () {
var urlreferrer = escape(document.referrer);var locationurl = escape(document.location); var ffreashed = getCookie("freash"); if (ffreashed == document.location) {return; }if (getCookie("lbGUID") == null) {$.ajax({url: "/js/fingerprint2.js",dataType: "script",cache: true}).done(function (data, status, jqxhr) {var fp = new Fingerprint2();fp.get(function (result) {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { InPage: locationurl, referrer: urlreferrer, title: escape(document.title) },async: false,success: function () {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { InPage: locationurl, fingerprint: result, referrer: urlreferrer, title: escape(document.title) },async: false});},error: function (XMLHttpRequest, textStatus, errorThrown) {alert(XMLHttpRequest.status)}});});}); } else {$.ajax({type: "POST",url: "/tools/Stat.ashx",data: { InPage: locationurl, referrer: urlreferrer, title: escape(document.title) },async: false,error: function (XMLHttpRequest, textStatus, errorThrown) {alert(XMLHttpRequest.status)}}); } 復制代碼}); //給重新刷新設置一個cookie $(window).unload(function () { $.ajax({ type: "POST", url: "/tools/Stat.ashx", data: { OutPage: escape(document.location) }, async: false }); setCookie("freash", document.location); }); function setCookie(name, value) { var exp = new Date(); exp.setTime(exp.getTime() + 2.5 * 1000); document.cookie = name + "=" + escape(value) + ";expires=" + exp.toGMTString(); } function getCookie(name) { var arr, reg = new RegExp("(^| )" + name + "=([^;]*)(;|$)"); if (arr = document.cookie.match(reg)) return unescape(arr[2]); else return null; } function delCookie(name) { var exp = new Date(); exp.setTime(exp.getTime() - 1); var cval = getCookie(name); if (cval != null) document.cookie = name + "=" + cval + ";expires=" + exp.toGMTString(); } 到了該步驟,基本的工作已經完成了,剩下的工作就是直接將腳本引用到頁面中,用戶點擊相應的頁面自然也就可以實現數據采集與數據統計了。為了避免重復動作,最好將腳本引用到頁面共用的用戶控件中,下面是經過規范化處理的引用示例:
<script type="text/javascript"> (function () { //網站內部統計 var oHead = document.getElementsByTagName('HEAD').item(0); var oScript = document.createElement("script"); oScript.type = "text/javascript"; oScript.async = true; oScript.src = "/js/statistics.js"; oHead.appendChild(oScript); })(); </script> 總的來說,數據采集無非就是記錄下用戶在本網站各頁面的具體瀏覽軌跡,從而用于對用戶的需求進行分析,采集就是統計的數據來源
轉載于:https://juejin.im/post/5d07440d6fb9a07f0a2de0a9
《新程序員》:云原生和全面數字化實踐50位技術專家共同創作,文字、視頻、音頻交互閱讀總結
以上是生活随笔為你收集整理的数据采集与网站统计实现全过程的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: Delphi 关键字详解
- 下一篇: 安装源码包(这里主要写了redis,其他