//get alexa rank for some domain url //para: http://answers.yahoo.com //return: 4 public int getDomainAlexaRank(string domainUrl) { int alexaRank = 0; string queryUrl = ""; string respHtml = ""; Dictionary<string, string> postDict = new Dictionary<string, string>(); string alexaRankStr = ""; bool prevMethodFail = true; //string noHttpPreDomainUrl = Regex.Replace(domainUrl, "((https)|(http)|(ftp))://", ""); if ((alexaRank == 0) && prevMethodFail) { //Method 1: use http://www.searchbliss.com/rank.asp string mainUrl = "http://www.searchbliss.com/rank.asp"; respHtml = getUrlRespHtml_multiTry(mainUrl); //<input type="hidden" name="RAC" value="EIS"> string accessCode = ""; if (extractSingleStr(@"<input\s+type=""hidden""\s+name=""RAC""\s+value=""([A-Z]+)"">", respHtml, out accessCode)) { queryUrl = "http://www.searchbliss.com/rank.asp"; //AC EIS //RAC EIS //rank http://hubpages.com postDict = new Dictionary<string, string>(); //postDict.Add("domain", noHttpPreDomainUrl); postDict.Add("AC", accessCode); postDict.Add("RAC", accessCode); postDict.Add("rank", domainUrl); respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict); //<a href="http://www.alexa.com/data/details/main/http://hubpages.com" target="_blank">444</a> if (extractSingleStr(@"<a\s+href=""http://www\.alexa\.com/data/details/main/.+?""\s+target=""_blank"">(\d+)</a>", respHtml, out alexaRankStr)) { //alexaRank = Int32.Parse(alexaRankStr); if (Int32.TryParse(alexaRankStr, out alexaRank)) { prevMethodFail = false; } else { prevMethodFail = true; } prevMethodFail = false; } else { prevMethodFail = true; } } else { prevMethodFail = true; } } #if USE_HTML_PARSER_HTMLAGILITYPACK if ((alexaRank == 0) && prevMethodFail) { //Method 2: use http://www.alexa.com/ string tmpUrl = "http://www.alexa.com"; //to get cookies string tmpRespHtml = getUrlRespHtml_multiTry(tmpUrl); //then do work queryUrl = "http://www.alexa.com/search"; //http://www.alexa.com/search?q=crifan.com&r=home_home&p=bigtop queryUrl += "?q=" + domainUrl; queryUrl += "&r=" + "home_home"; queryUrl += "&p=" + "bigtop"; respHtml = getUrlRespHtml_multiTry(queryUrl); HtmlAgilityPack.HtmlDocument htmlDoc = htmlToHtmlDoc(respHtml); HtmlNode rootHtmlNode = htmlDoc.DocumentNode; //<span> //<img class="align-top" src="/images/icons/globe-sm.gif" /> //<span class="traffic-stat-label">Alexa Traffic Rank:</span> //<a href="/siteinfo/yahoo.com#trafficstats"> //4</a> //</span> //<span class="traffic-stat-label">Alexa Traffic Rank:</span> //<a href="/siteinfo/crifan.com#trafficstats"> //170,557</a> //</span> //HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']/a[@href]"); //HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']/a]"); //HtmlNodeCollection trafficHtmlNodes = rootHtmlNode.SelectNodes("//span/span[@class='traffic-stat-label']"); HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']"); if ((trafficHtmlNode != null) && (trafficHtmlNode.InnerText.StartsWith("Alexa Traffic Rank:"))) { HtmlNode parentHtmlNode = trafficHtmlNode.ParentNode; HtmlNode aHrefNode = parentHtmlNode.SelectSingleNode(".//a[@href]"); string tracfficNumberStr = aHrefNode.InnerText; alexaRankStr = tracfficNumberStr.Trim().Replace(",", ""); //speical: //"No Data" //alexaRank = Int32.Parse(alexaRankStr); if(Int32.TryParse(alexaRankStr, out alexaRank)) { prevMethodFail = false; } else { prevMethodFail = true; } } else { prevMethodFail = true; } } #endif if ((alexaRank == 0) && prevMethodFail) { //Method 3: use http://moonsy.com/alexa_rank/ //(1) http://moonsy.com/alexa_rank/ queryUrl = "http://moonsy.com/alexa_rank/"; postDict = new Dictionary<string, string>(); //postDict.Add("domain", noHttpPreDomainUrl); postDict.Add("domain", domainUrl); postDict.Add("Submit", "CHECK"); respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict); //<h2>Alexa Rank of <b>ANSWERS.YAHOO.COM</b> is : <b>4</b></h2> alexaRankStr = ""; if (extractSingleStr(@"<h2>Alexa Rank of.+?is.+?(\d+).+?</h2>", respHtml, out alexaRankStr)) { //alexaRank = Int32.Parse(alexaRankStr); if (Int32.TryParse(alexaRankStr, out alexaRank)) { prevMethodFail = false; } else { prevMethodFail = true; } prevMethodFail = false; } else { prevMethodFail = true; } } //TODO: //maybe future can use: //http://www.dakola.com/tools/alexa/ return alexaRank; }
例 9.13. getDomainAlexaRank 的使用范例
public struct searchItemInfo { public string title; public string googleUrl; // with google appendix public string originalUrl; public string description; //add domain url and rank public string domainUrl; public int pageRank; public int alexaRank; }; singleItemInfo.domainUrl = crifanLib.getDomainUrl(singleItemInfo.originalUrl); singleItemInfo.pageRank = crifanLib.getDomainPageRank(singleItemInfo.domainUrl); singleItemInfo.alexaRank = crifanLib.getDomainAlexaRank(singleItemInfo.domainUrl);