//get page rank for some domain url //para: http://answers.yahoo.com //return: 7 public int getDomainPageRank(string domainUrl) { int pageRank = 0; string queryUrl = ""; string respHtml = ""; Dictionary<string, string> postDict = new Dictionary<string,string>(); string rankStr = ""; bool prevMethodFail = true; if ((pageRank == 0) && prevMethodFail) { //Method 1: use http://www.pagerankme.com/ queryUrl = "http://www.pagerankme.com/"; postDict = new Dictionary<string, string>(); postDict.Add("url", domainUrl); respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict); //<a href="http://www.pagerankme.com" target="_blank" style="text-decoration:none;color:#000000;">PageRank 7</a> rankStr = ""; if (extractSingleStr(@"<a href=""http://www\.pagerankme\.com"" target=""_blank"" style="".+?"">PageRank (\d+)</a>", respHtml, out rankStr)) { pageRank = Int32.Parse(rankStr); prevMethodFail = false; } else { prevMethodFail = true; } } if ((pageRank == 0) && prevMethodFail) { //Method 2: use http://moonsy.com/pagerank_checker/ //(1) http://moonsy.com/pagerank_checker/ queryUrl = "http://moonsy.com/pagerank_checker/"; postDict = new Dictionary<string, string>(); postDict.Add("domain", domainUrl); postDict.Add("Submit", "CHECK"); respHtml = getUrlRespHtml_multiTry(queryUrl, postDict: postDict); //<h3>Your Page Rank: 7/10 rankStr = ""; if (extractSingleStr(@"<h3>Your Page Rank.+?(\d+)/10", respHtml, out rankStr)) { pageRank = Int32.Parse(rankStr); prevMethodFail = false; } else { prevMethodFail = true; } } if ((pageRank == 0) && prevMethodFail) { //Method 3: use http://pagerank.webmasterhome.cn/ string noHttpPreDomainUrl = Regex.Replace(domainUrl, "((https)|(http)|(ftp))://", ""); //http://pagerank.webmasterhome.cn/prLoading.asp?domain=answers.yahoo.com string tmpRespHtml = ""; Dictionary<string, string> headerDict; //(1)to get cookies string pageRankMainUrl = "http://pagerank.webmasterhome.cn/"; tmpRespHtml = getUrlRespHtml_multiTry(pageRankMainUrl); //(2)ask page rank string firstBaseUrl = "http://pagerank.webmasterhome.cn/?domain="; //http://pagerank.webmasterhome.cn/?domain=answers.yahoo.com string firstWholeUrl = firstBaseUrl + noHttpPreDomainUrl; headerDict = new Dictionary<string, string>(); headerDict.Add("referer", pageRankMainUrl); tmpRespHtml = getUrlRespHtml_multiTry(firstWholeUrl, headerDict: headerDict); string baseUrl = "http://pagerank.webmasterhome.cn/prLoading.asp?domain="; //http://pagerank.webmasterhome.cn/prLoading.asp?domain=answers.yahoo.com queryUrl = baseUrl + noHttpPreDomainUrl; headerDict = new Dictionary<string, string>(); headerDict.Add("referer", firstWholeUrl); respHtml = getUrlRespHtml_multiTry(queryUrl, headerDict: headerDict); //'<img src=\"http://primg.webmasterhome.cn/pr7.gif\" style=\"width:40px;height:5px;border:0px;\" alt=PageRank align=absmiddle> (7/10)' rankStr = ""; if (extractSingleStr(@"\((\d+)/10\)", respHtml, out rankStr)) { pageRank = Int32.Parse(rankStr); prevMethodFail = false; } else { prevMethodFail = true; } } //TODO: //Google PR (PageRank) Checker //http://www.searchbliss.com/seo-tools/google-pagerank-checker.php //tmp is "We're sorry, the Google PR check is currently being repaired." //future: if Ok, mayby can use it return pageRank; }
例 9.12. getDomainPageRank 的使用范例
public struct searchItemInfo { public string title; public string googleUrl; // with google appendix public string originalUrl; public string description; //add domain url and rank public string domainUrl; public int pageRank; public int alexaRank; }; singleItemInfo.domainUrl = crifanLib.getDomainUrl(singleItemInfo.originalUrl); singleItemInfo.pageRank = crifanLib.getDomainPageRank(singleItemInfo.domainUrl); singleItemInfo.alexaRank = crifanLib.getDomainAlexaRank(singleItemInfo.domainUrl);