2.4.1. 检查/判断/校验网络上某个文件是否有效:isFileValid
#------------------------------------------------------------------------------
# check file validation:
# open file url to check return info is match or not
# with exception support
# note: should handle while the file url is redirect
# eg :
# http://publish.it168.com/2007/0627/images/500754.jpg ->
# http://img.publish.it168.com/2007/0627/images/500754.jpg
# other special one:
# sina pic url:
# http://s14.sinaimg.cn/middle/3d55a9b7g9522d474a84d&690
# http://s14.sinaimg.cn/orignal/3d55a9b7g9522d474a84d
# the real url is same with above url
def isFileValid(fileUrl) :
fileIsValid = False;
errReason = "Unknown error";
try :
#print "original fileUrl=",fileUrl;
origFileName = fileUrl.split('/')[-1];
#print "origFileName=",origFileName;
#old: https://ie2zeq.bay.livefilestore.com/y1mo7UWr-TrmqbBhkw52I0ii__WE6l2UtMRSTZHSky66-uDxnCdKPr3bdqVrpUcQHcoJLedlFXa43bvCp_O0zEGF3JdG_yZ4wRT-c2AQmJ_TNcWvVZIXfBDgGerouWyx19WpA4I0XQR1syRJXjDNpwAbQ/IMG_5214_thumb[1].jpg
#new: https://kxoqva.bay.livefilestore.com/y1mQlGjwNAYiHKoH5Aw6TMNhsCmX2YDR3vPKnP86snuqQEtnZgy3dHkwUvZ61Ah8zU3AGiS4whmm_ADrvxdufEAfMGo56KjLdhIbosn9F34olQ/IMG_5214_thumb%5b1%5d.jpg
unquotedOrigFilenname = urllib.unquote(origFileName);
#print "unquotedOrigFilenname=",unquotedOrigFilenname
lowUnquotedOrigFilename = unquotedOrigFilenname.lower();
#print "lowUnquotedOrigFilename=",lowUnquotedOrigFilename;
resp = urllib2.urlopen(fileUrl, timeout=gConst['defaultTimeout']); # note: Python 2.6 has added timeout support.
#print "resp=",resp;
realUrl = resp.geturl();
#print "realUrl=",realUrl;
newFilename = realUrl.split('/')[-1];
#print "newFilename=",newFilename;
#http://blog.sina.com.cn/s/blog_696e50390100ntxs.html
unquotedNewFilename = urllib.unquote(newFilename);
#print "unquotedNewFilename=",unquotedNewFilename;
unquotedLowNewFilename = unquotedNewFilename.lower();
#print "unquotedLowNewFilename=",unquotedLowNewFilename;
respInfo = resp.info();
#print "respInfo=",respInfo;
respCode = resp.getcode();
#print "respCode=",respCode;
# special:
# http://116.img.pp.sohu.com/images/blog/2007/5/24/17/24/11355bf42a9.jpg
# return no content-length
#contentLen = respInfo['Content-Length'];
# for redirect, if returned size>0 and filename is same, also should be considered valid
#if (origFileName == newFilename) and (contentLen > 0):
# for redirect, if returned response code is 200(OK) and filename is same, also should be considered valid
#if (origFileName == newFilename) and (respCode == 200):
if (lowUnquotedOrigFilename == unquotedLowNewFilename) and (respCode == 200):
fileIsValid = True;
else :
fileIsValid = False;
# eg: Content-Type= image/gif, ContentTypes : audio/mpeg
# more ContentTypes can refer: http://kenya.bokee.com/3200033.html
contentType = respInfo['Content-Type'];
errReason = "file url returned info: type=%s, len=%d, realUrl=%s"%(contentType, contentLen, realUrl);
except urllib2.URLError,reason :
fileIsValid = False;
errReason = reason;
except urllib2.HTTPError,code :
fileIsValid = False;
errReason = code;
except :
fileIsValid = False;
errReason = "Unknown error";
# here type(errReason)= <class 'urllib2.HTTPError'>, so just convert it to str
errReason = str(errReason);
return (fileIsValid, errReason);
例 2.20. isFileValid的使用范例
# indeed is pic, process it
(picIsValid, errReason) = isFileValid(curUrl);