#------------------------------------------------------------------------------ # depend on chardet # check whether the strToDect is ASCII string def strIsAscii(strToDect) : isAscii = False; encInfo = chardet.detect(strToDect); if (encInfo['confidence'] > 0.9) and (encInfo['encoding'] == 'ascii') : isAscii = True; return isAscii;
例 2.27. strIsAscii的使用范例
if(not strIsAscii(extractedBlogUser)) : # if is: http://hi.baidu.com/资料收集 # then should quote it, otherwise later output to WXR will fail ! extractedBlogUser = urllib.quote(extractedBlogUser);