【问题】
写Python代码,利用urllib2去访问网络,结果期间会出现错误:
urllib2.URLError: <urlopen error [Errno 10060] > |
【解决过程】
1.后来发现,程序本身是好的,但是是由于,网络的偶尔的不稳定,而导致了此错误的。
2.所以后来就想到,当发现网络遇到这类错误的时候,多试几次,应该就可以解决此问题了。
所以把原先的:
1 | #itemRespHtml = crifanLib.getUrlRespHtml(itemLink); |
改为:
1 | itemRespHtml = crifanLib.getUrlRespHtml_multiTry(itemLink); |
其中对应的代码是:
中的:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 | #------------------------------------------------------------------------------ def getUrlResponse(url, postDict = {}, headerDict = {}, timeout = 0 , useGzip = False , postDataDelimiter = "&" ) : """Get response from url, support optional postDict,headerDict,timeout,useGzip Note: 1. if postDict not null, url request auto become to POST instead of default GET 2 if you want to auto handle cookies, should call initAutoHandleCookies() before use this function. then following urllib2.Request will auto handle cookies """ # makesure url is string, not unicode, otherwise urllib2.urlopen will error url = str (url); if (postDict) : if (postDataDelimiter = = "&" ): postData = urllib.urlencode(postDict); else : postData = ""; for eachKey in postDict.keys() : postData + = str (eachKey) + "=" + str (postDict[eachKey]) + postDataDelimiter; postData = postData.strip(); logging.info( "postData=%s" , postData); req = urllib2.Request(url, postData); logging.info( "req=%s" , req); req.add_header( 'Content-Type' , "application/x-www-form-urlencoded" ); else : req = urllib2.Request(url); defHeaderDict = { 'User-Agent' : gConst[ 'UserAgent' ], 'Cache-Control' : 'no-cache' , 'Accept' : '*/*' , 'Connection' : 'Keep-Alive' , }; # add default headers firstly for eachDefHd in defHeaderDict.keys() : #print "add default header: %s=%s"%(eachDefHd,defHeaderDict[eachDefHd]); req.add_header(eachDefHd, defHeaderDict[eachDefHd]); if (useGzip) : #print "use gzip for",url; req.add_header( 'Accept-Encoding' , 'gzip, deflate' ); # add customized header later -> allow overwrite default header if (headerDict) : #print "added header:",headerDict; for key in headerDict.keys() : req.add_header(key, headerDict[key]); if (timeout > 0 ) : # set timeout value if necessary resp = urllib2.urlopen(req, timeout = timeout); else : resp = urllib2.urlopen(req); #update cookies into local file if (gVal[ 'cookieUseFile' ]): gVal[ 'cj' ].save(); logging.info( "gVal['cj']=%s" , gVal[ 'cj' ]); return resp; #------------------------------------------------------------------------------ # get response html==body from url #def getUrlRespHtml(url, postDict={}, headerDict={}, timeout=0, useGzip=False) : def getUrlRespHtml(url, postDict = {}, headerDict = {}, timeout = 0 , useGzip = True , postDataDelimiter = "&" ) : resp = getUrlResponse(url, postDict, headerDict, timeout, useGzip, postDataDelimiter); respHtml = resp.read(); if (useGzip) : #print "---before unzip, len(respHtml)=",len(respHtml); respInfo = resp.info(); # Server: nginx/1.0.8 # Date: Sun, 08 Apr 2012 12:30:35 GMT # Content-Type: text/html # Transfer-Encoding: chunked # Connection: close # Vary: Accept-Encoding # ... # Content-Encoding: gzip # sometime, the request use gzip,deflate, but actually returned is un-gzip html # -> response info not include above "Content-Encoding: gzip" # -> so here only decode when it is indeed is gziped data if ( ( "Content-Encoding" in respInfo) and (respInfo[ 'Content-Encoding' ] = = "gzip" )) : respHtml = zlib.decompress(respHtml, 16 + zlib.MAX_WBITS); #print "+++ after unzip, len(respHtml)=",len(respHtml); return respHtml; def getUrlRespHtml_multiTry(url, postDict = {}, headerDict = {}, timeout = 0 , useGzip = True , postDataDelimiter = "&" , maxTryNum = 5 ): """ get url response html, multiple try version: if fail, then retry """ respHtml = ""; # access url # mutile retry, if some (mostly is network) error for tries in range (maxTryNum) : try : respHtml = getUrlRespHtml(url, postDict, headerDict, timeout, useGzip, postDataDelimiter); #logging.debug("Successfully access url %s", url); break # successfully, so break now except : if tries < (maxTryNum - 1 ) : #logging.warning("Access url %s fail, do %d retry", url, (tries + 1)); continue ; else : # last try also failed, so exit logging.error( "Has tried %d times to access url %s, all failed!" , maxTryNum, url); break ; return respHtml; |
【总结】
当访问网络出错:
urllib2.URLError: <urlopen error [Errno 10060] > |
那就多试几次,就可以了。
转载请注明:在路上 » 【已解决】Python访问网络出错:urllib2.URLError: <urlopen error [Errno 10060] >