此处,顺便也把对应的,全局变量,初始化代码,私有函数等等,贴出来,供参考:
public struct pairItem { public string key; public string value; }; private Dictionary<string, DateTime> calcTimeList; const char replacedChar = '_'; string[] cookieFieldArr = { "expires", "domain", "secure", "path", "httponly", "version" }; //IE7 const string constUserAgent_IE7_x64 = "Mozilla/4.0 (compatible; MSIE 7.0; Windows NT 6.1; WOW64; Trident/5.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E)"; //IE8 const string constUserAgent_IE8_x64 = "Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E"; //IE9 const string constUserAgent_IE9_x64 = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; WOW64; Trident/5.0)"; // x64 const string constUserAgent_IE9_x86 = "Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0)"; // x86 //Chrome const string constUserAgent_Chrome = "Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US) AppleWebKit/533.4 (KHTML, like Gecko) Chrome/5.0.375.99 Safari/533.4"; //Mozilla Firefox const string constUserAgent_Firefox = "Mozilla/5.0 (Windows; U; Windows NT 6.1; rv:1.9.2.6) Gecko/20100625 Firefox/3.6.6"; private string gUserAgent; private WebProxy gProxy = null; //detault values: //getUrlResponse private const Dictionary<string, string> defHeaderDict = null; private const Dictionary<string, string> defPostDict = null; private const int defTimeout = 30 * 1000; private const string defPostDataStr = null; private const int defReadWriteTimeout = 30 * 1000; //getUrlRespHtml private const string defCharset = null; //getUrlRespHtml_multiTry private const int defMaxTryNum = 5; private const int defRetryFailSleepTime = 100; //sleep time in ms when retry fail for getUrlRespHtml List<string> cookieFieldList = new List<string>(); CookieCollection curCookies = null; //private long totalLength = 0; //private long currentLength = 0; #if USE_GETURLRESPONSE_BW //indicate background worker complete or not bool bNotCompleted_resp = true; //store response of http request private HttpWebResponse gCurResp = null; #endif private BackgroundWorker gBgwDownload; //indicate download complete or not bool bNotCompleted_download = true; //store current read out data len private int gRealReadoutLen = 0; Action<int> gFuncUpdateProgress = null; public crifanLib() { //!!! for load embedded dll: (1) register resovle handler AppDomain.CurrentDomain.AssemblyResolve += new ResolveEventHandler(CurrentDomain_AssemblyResolve); //http related gUserAgent = constUserAgent_IE8_x64; //set max enough to avoid http request is used out -> avoid dead while get response System.Net.ServicePointManager.DefaultConnectionLimit = 200; curCookies = new CookieCollection(); // init const cookie keys foreach (string key in cookieFieldArr) { cookieFieldList.Add(key); } //init for calc time calcTimeList = new Dictionary<string, DateTime>(); #if USE_GETURLRESPONSE_BW gBgwDownload = new BackgroundWorker(); #endif //debug //gProxy = new WebProxy("127.0.0.1", 8087); } /*------------------------Private Functions------------------------------*/ //!!! for load embedded dll: (2) implement this handler System.Reflection.Assembly CurrentDomain_AssemblyResolve(object sender, ResolveEventArgs args) { string dllName = args.Name.Contains(",") ? args.Name.Substring(0, args.Name.IndexOf(',')) : args.Name.Replace(".dll", ""); dllName = dllName.Replace(".", "_"); if (dllName.EndsWith("_resources")) return null; System.Resources.ResourceManager rm = new System.Resources.ResourceManager(GetType().Namespace + ".Properties.Resources", System.Reflection.Assembly.GetExecutingAssembly()); byte[] bytes = (byte[])rm.GetObject(dllName); return System.Reflection.Assembly.Load(bytes); } // replace the replacedChar back to original ',' private string _recoverExpireField(Match foundPprocessedExpire) { string recovedStr = ""; recovedStr = foundPprocessedExpire.Value.Replace(replacedChar, ','); return recovedStr; } //replace ',' with replacedChar private string _processExpireField(Match foundExpire) { string replacedComma = ""; replacedComma = foundExpire.Value.ToString().Replace(',', replacedChar); return replacedComma; } //replace "0A" (in \x0A) into '\n' private string _replaceEscapeSequenceToChar(Match foundEscapeSequence) { char[] hexValues = new char[2]; //string hexChars = foundEscapeSequence.Value.ToString(); string matchedEscape = foundEscapeSequence.ToString(); hexValues[0] = matchedEscape[2]; hexValues[1] = matchedEscape[3]; string hexValueString = new string(hexValues); int convertedInt = int.Parse(hexValueString, NumberStyles.HexNumber, NumberFormatInfo.InvariantInfo); char hexChar = Convert.ToChar(convertedInt); string hexStr = hexChar.ToString(); return hexStr; } //check whether need add/retain this cookie // not add for: // ck is null or ck name is null // domain is null and curDomain is not set // expired and retainExpiredCookie==false private bool needAddThisCookie(Cookie ck, string curDomain) { bool needAdd = false; if ((ck == null) || (ck.Name == "")) { needAdd = false; } else { if (ck.Domain != "") { needAdd = true; } else// ck.Domain == "" { if (curDomain != "") { ck.Domain = curDomain; needAdd = true; } else // curDomain == "" { // not set current domain, omit this // should not add empty domain cookie, for this will lead execute CookieContainer.Add() fail !!! needAdd = false; } } } return needAdd; } //quote the input dict values //note: the return result for first para no '&' private string _quoteParas(Dictionary<string, string> paras, bool spaceToPercent20 = true) { string quotedParas = ""; bool isFirst = true; string val = ""; foreach (string para in paras.Keys) { if (paras.TryGetValue(para, out val)) { string encodedVal = ""; if (spaceToPercent20) { //encodedVal = HttpUtility.UrlPathEncode(val); //encodedVal = Uri.EscapeDataString(val); //encodedVal = Uri.EscapeUriString(val); encodedVal = HttpUtility.UrlEncode(val).Replace("+", "%20"); } else { encodedVal = HttpUtility.UrlEncode(val); //space to + } if (isFirst) { isFirst = false; quotedParas += para + "=" + encodedVal; } else { quotedParas += "&" + para + "=" + encodedVal; } } else { break; } } return quotedParas; } /* get url's response * */ private HttpWebResponse _getUrlResponse(string url, Dictionary<string, string> headerDict = defHeaderDict, Dictionary<string, string> postDict = defPostDict, int timeout = defTimeout, string postDataStr = defPostDataStr, int readWriteTimeout = defReadWriteTimeout) { //CookieCollection parsedCookies; HttpWebResponse resp = null; HttpWebRequest req = (HttpWebRequest)WebRequest.Create(url); req.AllowAutoRedirect = true; req.Accept = "*/*"; //req.ContentType = "text/plain"; //const string gAcceptLanguage = "en-US"; // zh-CN/en-US //req.Headers["Accept-Language"] = gAcceptLanguage; req.KeepAlive = true; req.UserAgent = gUserAgent; req.Headers["Accept-Encoding"] = "gzip, deflate"; //req.AutomaticDecompression = DecompressionMethods.GZip; req.AutomaticDecompression = DecompressionMethods.GZip | DecompressionMethods.Deflate; req.Proxy = gProxy; if (timeout > 0) { req.Timeout = timeout; } if (readWriteTimeout > 0) { //default ReadWriteTimeout is 300000=300 seconds = 5 minutes !!! //too long, so here change to 300000 = 30 seconds //for support TimeOut for later StreamReader's ReadToEnd req.ReadWriteTimeout = readWriteTimeout; } if (curCookies != null) { req.CookieContainer = new CookieContainer(); req.CookieContainer.PerDomainCapacity = 40; // following will exceed max default 20 cookie per domain req.CookieContainer.Add(curCookies); } if ((headerDict != null) && (headerDict.Count > 0)) { foreach (string header in headerDict.Keys) { string headerValue = ""; if (headerDict.TryGetValue(header, out headerValue)) { string lowecaseHeader = header.ToLower(); // following are allow the caller overwrite the default header setting if (lowecaseHeader == "referer") { req.Referer = headerValue; } else if ( (lowecaseHeader == "allow-autoredirect") || (lowecaseHeader == "allowautoredirect") || (lowecaseHeader == "allow autoredirect") ) { bool isAllow = false; if (bool.TryParse(headerValue, out isAllow)) { req.AllowAutoRedirect = isAllow; } } else if (lowecaseHeader == "accept") { req.Accept = headerValue; } else if ( (lowecaseHeader == "keep-alive") || (lowecaseHeader == "keepalive") || (lowecaseHeader == "keep alive") ) { bool isKeepAlive = false; if (bool.TryParse(headerValue, out isKeepAlive)) { req.KeepAlive = isKeepAlive; } } else if ( (lowecaseHeader == "accept-language") || (lowecaseHeader == "acceptlanguage") || (lowecaseHeader == "accept language") ) { req.Headers["Accept-Language"] = headerValue; } else if ( (lowecaseHeader == "user-agent") || (lowecaseHeader == "useragent") || (lowecaseHeader == "user agent") ) { req.UserAgent = headerValue; } else if ( (lowecaseHeader == "content-type") || (lowecaseHeader == "contenttype") || (lowecaseHeader == "content type") ) { req.ContentType = headerValue; } else { req.Headers[header] = headerValue; } } else { break; } } } if (((postDict != null) && (postDict.Count > 0)) || (!string.IsNullOrEmpty(postDataStr))) { req.Method = "POST"; if (req.ContentType == null) { req.ContentType = "application/x-www-form-urlencoded"; } if ((postDict != null) && (postDict.Count > 0)) { postDataStr = _quoteParas(postDict); } //byte[] postBytes = Encoding.GetEncoding("utf-8").GetBytes(postData); byte[] postBytes = Encoding.UTF8.GetBytes(postDataStr); req.ContentLength = postBytes.Length; try { Stream postDataStream = req.GetRequestStream(); postDataStream.Write(postBytes, 0, postBytes.Length); postDataStream.Close(); } catch (WebException webEx) { //for prev has set ReadWriteTimeout //so here also may timeout if (webEx.Status == WebExceptionStatus.Timeout) { req = null; } } } else { req.Method = "GET"; } if (req != null) { //may timeout, has fixed in: //http://www.crifan.com/fixed_problem_sometime_httpwebrequest_getresponse_timeout/ try { resp = (HttpWebResponse)req.GetResponse(); updateLocalCookies(resp.Cookies, ref curCookies); } catch (WebException webEx) { if (webEx.Status == WebExceptionStatus.Timeout) { resp = null; } } } return resp; } #if USE_GETURLRESPONSE_BW private void getUrlResponse_bw(string url, Dictionary<string, string> headerDict = defHeaderDict, Dictionary<string, string> postDict = defPostDict, int timeout = defTimeout, string postDataStr = defPostDataStr, int readWriteTimeout = defReadWriteTimeout) { // Create a background thread BackgroundWorker bgwGetUrlResp = new BackgroundWorker(); bgwGetUrlResp.DoWork += new DoWorkEventHandler(bgwGetUrlResp_DoWork); bgwGetUrlResp.RunWorkerCompleted += new RunWorkerCompletedEventHandler( bgwGetUrlResp_RunWorkerCompleted ); //init bNotCompleted_resp = true; // run in another thread object paraObj = new object[] { url, headerDict, postDict, timeout, postDataStr, readWriteTimeout }; bgwGetUrlResp.RunWorkerAsync(paraObj); } private void bgwGetUrlResp_DoWork(object sender, DoWorkEventArgs e) { object[] paraObj = (object[])e.Argument; string url = (string)paraObj[0]; Dictionary<string, string> headerDict = (Dictionary<string, string>)paraObj[1]; Dictionary<string, string> postDict = (Dictionary<string, string>)paraObj[2]; int timeout = (int)paraObj[3]; string postDataStr = (string)paraObj[4]; int readWriteTimeout = (int)paraObj[5]; e.Result = _getUrlResponse(url, headerDict, postDict, timeout, postDataStr, readWriteTimeout); } //void m_bgWorker_ProgressChanged(object sender, ProgressChangedEventArgs e) //{ // bRespNotCompleted = true; //} private void bgwGetUrlResp_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) { // The background process is complete. We need to inspect // our response to see if an error occurred, a cancel was // requested or if we completed successfully. // Check to see if an error occurred in the // background process. if (e.Error != null) { //MessageBox.Show(e.Error.Message); return; } // Check to see if the background process was cancelled. if (e.Cancelled) { //MessageBox.Show("Cancelled ..."); } else { bNotCompleted_resp = false; // Everything completed normally. // process the response using e.Result //MessageBox.Show("Completed..."); gCurResp = (HttpWebResponse)e.Result; } } #endif private void getUrlRespStreamBytes_bw(ref Byte[] respBytesBuf, string url, Dictionary<string, string> headerDict, Dictionary<string, string> postDict, int timeout, Action<int> funcUpdateProgress) { // Create a background thread gBgwDownload = new BackgroundWorker(); gBgwDownload.DoWork += bgwDownload_DoWork; gBgwDownload.RunWorkerCompleted += bgwDownload_RunWorkerCompleted; gBgwDownload.WorkerReportsProgress = true; gBgwDownload.ProgressChanged += bgwDownload_ProgressChanged; //init bNotCompleted_download = true; gFuncUpdateProgress = funcUpdateProgress; // run in another thread object paraObj = new object[] {respBytesBuf, url, headerDict, postDict, timeout}; gBgwDownload.RunWorkerAsync(paraObj); } private void bgwDownload_ProgressChanged(object sender, ProgressChangedEventArgs e) { if (gFuncUpdateProgress != null) { // This function fires on the UI thread so it's safe to edit // the UI control directly, no funny business with Control.Invoke. // Update the progressBar with the integer supplied to us from the // ReportProgress() function. Note, e.UserState is a "tag" property // that can be used to send other information from the // BackgroundThread to the UI thread. gFuncUpdateProgress(e.ProgressPercentage); } } private void bgwDownload_DoWork(object sender, DoWorkEventArgs e) { // // The sender is the BackgroundWorker object we need it to // // report progress and check for cancellation. // BackgroundWorker gBgwDownload = sender as BackgroundWorker; object[] paraObj = (object[])e.Argument; Byte[] respBytesBuf = (Byte[])paraObj[0]; string url = (string)paraObj[1]; Dictionary<string, string> headerDict = (Dictionary<string, string>)paraObj[2]; Dictionary<string, string> postDict = (Dictionary<string, string>)paraObj[3]; int timeout = (int)paraObj[4]; //e.Result = _getUrlRespStreamBytes(ref respBytesBuf, url, headerDict, postDict, timeout); int curReadoutLen; int realReadoutLen = 0; int curBufPos = 0; long totalLength = 0; long currentLength = 0; try { //HttpWebResponse resp = getUrlResponse(url, headerDict, postDict, timeout); HttpWebResponse resp = getUrlResponse(url, headerDict, postDict); long expectReadoutLen = resp.ContentLength; totalLength = expectReadoutLen; currentLength = 0; Stream binStream = resp.GetResponseStream(); //int streamDataLen = (int)binStream.Length; // erro: not support seek operation do { //let up layer update its UI, otherwise up layer UI will no response during this func exec time //now has make this function to call by backgroundworker, so not need this to update UI //System.Windows.Forms.Application.DoEvents(); // here download logic is: // once request, return some data // request multiple time, until no more data curReadoutLen = binStream.Read(respBytesBuf, curBufPos, (int)expectReadoutLen); if (curReadoutLen > 0) { curBufPos += curReadoutLen; currentLength = curBufPos; expectReadoutLen = expectReadoutLen - curReadoutLen; realReadoutLen += curReadoutLen; int currentPercent = (int)((currentLength * 100) / totalLength); if (currentPercent < 0) { currentPercent = 0; } if (currentPercent > 100) { currentPercent = 100; } gBgwDownload.ReportProgress(currentPercent); } } while (curReadoutLen > 0); } catch (Exception ex) { string errorMessage = ex.Message; realReadoutLen = -1; } //return realReadoutLen; e.Result = realReadoutLen; //gBgwDownload.ReportProgress(100); } private void bgwDownload_RunWorkerCompleted(object sender, RunWorkerCompletedEventArgs e) { // The background process is complete. We need to inspect // our response to see if an error occurred, a cancel was // requested or if we completed successfully. // Check to see if an error occurred in the // background process. if (e.Error != null) { //MessageBox.Show(e.Error.Message); return; } // Check to see if the background process was cancelled. if (e.Cancelled) { //MessageBox.Show("Cancelled ..."); } else { bNotCompleted_download = false; // Everything completed normally. // process the response using e.Result //MessageBox.Show("Completed..."); gRealReadoutLen = (int)e.Result; } }