9.12. 查找获得域名的Alexa Rank:getDomainAlexaRank


    //get alexa rank for some domain url
    //para: http://answers.yahoo.com
    //return: 4
    public int getDomainAlexaRank(string domainUrl)
    {
        int alexaRank = 0;
        string queryUrl = "";
        string respHtml = "";
        Dictionary<string, string> postDict = new Dictionary<string, string>();
        string alexaRankStr = "";
        bool prevMethodFail = true;

        //string noHttpPreDomainUrl = Regex.Replace(domainUrl, "((https)|(http)|(ftp))://", "");
                
        if ((alexaRank == 0) && prevMethodFail)
        {
            //Method 1: use http://www.searchbliss.com/rank.asp
            string mainUrl = "http://www.searchbliss.com/rank.asp";
            respHtml = getUrlRespHtml_multiTry(mainUrl);
            //<input type="hidden" name="RAC" value="EIS">
            string accessCode = "";
            if (extractSingleStr(@"<input\s+type=""hidden""\s+name=""RAC""\s+value=""([A-Z]+)"">", respHtml, out accessCode))
            {
                queryUrl = "http://www.searchbliss.com/rank.asp";
                //AC	EIS
                //RAC	EIS
                //rank	http://hubpages.com
                postDict = new Dictionary<string, string>();
                //postDict.Add("domain", noHttpPreDomainUrl);
                postDict.Add("AC", accessCode);
                postDict.Add("RAC", accessCode);
                postDict.Add("rank", domainUrl);
                respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict);
                //<a href="http://www.alexa.com/data/details/main/http://hubpages.com" target="_blank">444</a>
                if (extractSingleStr(@"<a\s+href=""http://www\.alexa\.com/data/details/main/.+?""\s+target=""_blank"">(\d+)</a>", respHtml, out alexaRankStr))
                {
                    //alexaRank = Int32.Parse(alexaRankStr);
                    if (Int32.TryParse(alexaRankStr, out alexaRank))
                    {
                        prevMethodFail = false;
                    }
                    else
                    {
                        prevMethodFail = true;
                    }

                    prevMethodFail = false;
                }
                else
                {
                    prevMethodFail = true;
                }
            }
            else 
            {
                prevMethodFail = true;
            }
        }
        
        #if USE_HTML_PARSER_HTMLAGILITYPACK
        if ((alexaRank == 0) && prevMethodFail)
        {
            //Method 2: use http://www.alexa.com/
            string tmpUrl = "http://www.alexa.com";
            //to get cookies
            string tmpRespHtml = getUrlRespHtml_multiTry(tmpUrl);
            //then do work
            queryUrl = "http://www.alexa.com/search";
            //http://www.alexa.com/search?q=crifan.com&r=home_home&p=bigtop
            queryUrl += "?q=" + domainUrl;
            queryUrl += "&r=" + "home_home";
            queryUrl += "&p=" + "bigtop";
            respHtml = getUrlRespHtml_multiTry(queryUrl);

            HtmlAgilityPack.HtmlDocument htmlDoc = htmlToHtmlDoc(respHtml);
            HtmlNode rootHtmlNode = htmlDoc.DocumentNode;

            //<span>
            //<img class="align-top" src="/images/icons/globe-sm.gif" />
            //<span class="traffic-stat-label">Alexa Traffic Rank:</span>
            //<a href="/siteinfo/yahoo.com#trafficstats">
            //4</a>
            //</span>

            //<span class="traffic-stat-label">Alexa Traffic Rank:</span>
            //<a href="/siteinfo/crifan.com#trafficstats">
            //170,557</a>
            //</span>
            //HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']/a[@href]");
            //HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']/a]");
            //HtmlNodeCollection trafficHtmlNodes = rootHtmlNode.SelectNodes("//span/span[@class='traffic-stat-label']");
            HtmlNode trafficHtmlNode = rootHtmlNode.SelectSingleNode("//span/span[@class='traffic-stat-label']");
            if ((trafficHtmlNode != null) && (trafficHtmlNode.InnerText.StartsWith("Alexa Traffic Rank:")))
            {
                HtmlNode parentHtmlNode = trafficHtmlNode.ParentNode;
                HtmlNode aHrefNode = parentHtmlNode.SelectSingleNode(".//a[@href]");
                string tracfficNumberStr = aHrefNode.InnerText;
                alexaRankStr = tracfficNumberStr.Trim().Replace(",", "");
                                
                //speical:
                //"No Data"
                //alexaRank = Int32.Parse(alexaRankStr);
                if(Int32.TryParse(alexaRankStr, out alexaRank))
                {
                    prevMethodFail = false;
                }
                else
                {
                    prevMethodFail = true;
                }
            }
            else
            {
                prevMethodFail = true;
            }
        }
        #endif
        
        if ((alexaRank == 0) && prevMethodFail)
        {
            //Method 3: use http://moonsy.com/alexa_rank/

            //(1) http://moonsy.com/alexa_rank/
            queryUrl = "http://moonsy.com/alexa_rank/";
            postDict = new Dictionary<string, string>();
            //postDict.Add("domain", noHttpPreDomainUrl);
            postDict.Add("domain", domainUrl);
            postDict.Add("Submit", "CHECK");

            respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict);

            //<h2>Alexa Rank of <b>ANSWERS.YAHOO.COM</b> is : <b>4</b></h2>
            alexaRankStr = "";
            if (extractSingleStr(@"<h2>Alexa Rank of.+?is.+?(\d+).+?</h2>", respHtml, out alexaRankStr))
            {
                //alexaRank = Int32.Parse(alexaRankStr);
                if (Int32.TryParse(alexaRankStr, out alexaRank))
                {
                    prevMethodFail = false;
                }
                else
                {
                    prevMethodFail = true;
                }

                prevMethodFail = false;
            }
            else
            {
                prevMethodFail = true;
            }
        }

        //TODO:
        //maybe future can use:
        //http://www.dakola.com/tools/alexa/
        
        return alexaRank;
    }

    

例 9.13. getDomainAlexaRank 的使用范例


        public struct searchItemInfo
        {
            public string title;
            public string googleUrl; // with google appendix
            public string originalUrl;
            public string description;
            //add domain url and rank
            public string domainUrl;
            public int pageRank;
            public int alexaRank;
        };
        
        singleItemInfo.domainUrl = crifanLib.getDomainUrl(singleItemInfo.originalUrl);
        singleItemInfo.pageRank = crifanLib.getDomainPageRank(singleItemInfo.domainUrl);
        singleItemInfo.alexaRank = crifanLib.getDomainAlexaRank(singleItemInfo.domainUrl);