【问题】
用Python代码:
photoInfoJsonAddQuote = re.sub(r"(,?)(\w+?)\s*?:", r"\1'\2':", photoInfoJson); logging.debug("photoInfoJsonAddQuote=%s", photoInfoJsonAddQuote); photoInfoJsonDoubleQuote = photoInfoJsonAddQuote.replace("'", "\""); logging.debug("photoInfoJsonDoubleQuote=%s", photoInfoJsonDoubleQuote); #remove comma before end of list afterRemoveLastCommaInList = re.sub(r",\s*?]", "]", photoInfoJsonDoubleQuote); logging.debug("photoInfoJsonDoubleQuote=%s", afterRemoveLastCommaInList); #photoInfoJsonDoubleQuoteUni = photoInfoJsonDoubleQuote.decode("UTF-8"); #photoInfoJsonAddQuoteAnsi = photoInfoJsonDoubleQuoteUni.encode("GB18030"); #print "type(photoInfoJson)=",type(photoInfoJsonDoubleQuoteUni); #print crifanLib.getStrPossibleCharset(photoInfoJsonDoubleQuoteUni); #photoInfoJsonDoubleQuote = photoInfoJson.replace("'", '"'); #logging.debug("photoInfoJsonDoubleQuote=%s", photoInfoJsonDoubleQuote); #photoInfoDict = json.loads(photoInfoJsonDoubleQuote, "UTF-8"); #photoInfoDict = json.loads(photoInfoJsonDoubleQuote); #photoInfoDict = json.loads(photoInfoJsonAddQuoteAnsi, "GB18030"); #photoInfoDict = json.loads(photoInfoJsonDoubleQuote); #http://www.yupoo.com/photos/314159subin/87556247/ #{"id":"34393-87556247","owner":"34393","ownername":"314159subin","title":"雪舞飞扬","description":"1月3日新年伊始,一场大雪让杭城\x26quot;雪舞飞扬\x26quot;.","bucket":"314159subin","key":"CxsKKLUc","license":0,"stats_notes": 0,"albums": ["34393-1693900"],"tags":[{"name":"城事", "author": "34393"},{"name":"西湖", "author": "34393"},{"name":"纪实", "author": "34393"},{"name":"街头", "author": "34393"},{"name":"城市", "author": "34393"},{"name":"杭州", "author": "34393"},{"name":"动物", "author": "34393"},{"name":"下雪喽", "author": "34393"},{"name":"雪景", "author": "34393"}],"owner":{"id": 34393,"username": "314159subin","nickname": "三点一四一屋酒"}} replacedQuoteJson = re.sub(r"\\x26([a-zA-Z]{2,6});", r"&\1;", afterRemoveLastCommaInList); logging.debug("replacedQuoteJson=%s", replacedQuoteJson); photoInfoDict = json.loads(replacedQuoteJson); logging.debug("photoInfoDict=%s", photoInfoDict);
处理
http://www.yupoo.com/photos/sinaweibo723506601/85924357/
中的html:
{id:’4015182-85924357′,owner:’4015182′,ownername:’sinaweibo723506601′,title:’Scene: crouching tiger, hidden dragon’,description:”,bucket:’sinaweibo723506601′,key:’C4CSjbe8′,license:4,stats_notes: 0,albums: [‘4015182-4857285′,],tags:[{name:’2012’, author: ‘4015182’},{name:’Nikon’, author: ‘4015182’},{name:’动物’, author: ‘4015182’},{name:’旅游’, author: ‘4015182’},{name:’美国’, author: ‘4015182’},{name:’鸟’, author: ‘4015182’},{name:’鹤’, author: ‘4015182’},{name:’Everglades’, author: ‘4015182’},{name:’Miami’, author: ‘4015182’}],owner:{id: 4015182,username: ‘sinaweibo723506601’,nickname: ‘mrtom1999’}} |
结果出错:
photoInfoDict = json.loads(replacedQuoteJson); return _default_decoder.decode(s) File "D:\tmp\dev_install_root\Python27_x64\lib\json\decoder.py", line 366, in decode obj, end = self.raw_decode(s, idx=_w(s, 0).end()) File "D:\tmp\dev_install_root\Python27_x64\lib\json\decoder.py", line 382, in raw_decode obj, end = self.scan_once(s, idx) ValueError: Expecting , delimiter: line 1 column 86 (char 86) |
【解决过程】
1.格式化对应json为:
{ owner : ‘4015182’, ownername : ‘sinaweibo723506601’, title : ‘Scene: crouching tiger, hidden dragon’, description : ”, bucket : ‘sinaweibo723506601’, key : ‘C4CSjbe8’, license : 4, stats_notes : 0, albums : [‘4015182-4857285’, ], tags : [{ name : ‘2012’, author : ‘4015182’ }, { name : ‘Nikon’, author : ‘4015182’ }, { name : ‘动物’, author : ‘4015182’ }, { name : ‘旅游’, author : ‘4015182’ }, { name : ‘美国’, author : ‘4015182’ }, { name : ‘鸟’, author : ‘4015182’ }, { name : ‘鹤’, author : ‘4015182’ }, { name : ‘Everglades’, author : ‘4015182’ }, { name : ‘Miami’, author : ‘4015182’ } ], owner : { id : 4015182, username : ‘sinaweibo723506601’, nickname : ‘mrtom1999’ } } |
后来找到相关log为:
LINE 147 DEBUG photoInfoJson={id:’4015182-85924357′,owner:’4015182′,ownername:’sinaweibo723506601′,title:’Scene: crouching tiger, hidden dragon’,description:”,bucket:’sinaweibo723506601′,key:’C4CSjbe8′,license:4,stats_notes: 0,albums: [‘4015182-4857285′,],tags:[{name:’2012’, author: ‘4015182’},{name:’Nikon’, author: ‘4015182’},{name:’动物’, author: ‘4015182’},{name:’旅游’, author: ‘4015182’},{name:’美国’, author: ‘4015182’},{name:’鸟’, author: ‘4015182’},{name:’鹤’, author: ‘4015182’},{name:’Everglades’, author: ‘4015182’},{name:’Miami’, author: ‘4015182’}],owner:{id: 4015182,username: ‘sinaweibo723506601’,nickname: ‘mrtom1999’}} LINE 158 DEBUG photoInfoJsonDoubleQuote={"id":"4015182-85924357","owner":"4015182","ownername":"sinaweibo723506601","title":""Scene": crouching tiger, hidden dragon","description":"","bucket":"sinaweibo723506601","key":"C4CSjbe8","license":4,"stats_notes": 0,"albums": ["4015182-4857285",],"tags":[{"name":"2012", "author": "4015182"},{"name":"Nikon", "author": "4015182"},{"name":"动物", "author": "4015182"},{"name":"旅游", "author": "4015182"},{"name":"美国", "author": "4015182"},{"name":"鸟", "author": "4015182"},{"name":"鹤", "author": "4015182"},{"name":"Everglades", "author": "4015182"},{"name":"Miami", "author": "4015182"}],"owner":{"id": 4015182,"username": "sinaweibo723506601","nickname": "mrtom1999"}} LINE 162 DEBUG photoInfoJsonDoubleQuote={"id":"4015182-85924357","owner":"4015182","ownername":"sinaweibo723506601","title":""Scene": crouching tiger, hidden dragon","description":"","bucket":"sinaweibo723506601","key":"C4CSjbe8","license":4,"stats_notes": 0,"albums": ["4015182-4857285"],"tags":[{"name":"2012", "author": "4015182"},{"name":"Nikon", "author": "4015182"},{"name":"动物", "author": "4015182"},{"name":"旅游", "author": "4015182"},{"name":"美国", "author": "4015182"},{"name":"鸟", "author": "4015182"},{"name":"鹤", "author": "4015182"},{"name":"Everglades", "author": "4015182"},{"name":"Miami", "author": "4015182"}],"owner":{"id": 4015182,"username": "sinaweibo723506601","nickname": "mrtom1999"}} LINE 182 DEBUG replacedQuoteJson={"id":"4015182-85924357","owner":"4015182","ownername":"sinaweibo723506601","title":""Scene": crouching tiger, hidden dragon","description":"","bucket":"sinaweibo723506601","key":"C4CSjbe8","license":4,"stats_notes": 0,"albums": ["4015182-4857285"],"tags":[{"name":"2012", "author": "4015182"},{"name":"Nikon", "author": "4015182"},{"name":"动物", "author": "4015182"},{"name":"旅游", "author": "4015182"},{"name":"美国", "author": "4015182"},{"name":"鸟", "author": "4015182"},{"name":"鹤", "author": "4015182"},{"name":"Everglades", "author": "4015182"},{"name":"Miami", "author": "4015182"}],"owner":{"id": 4015182,"username": "sinaweibo723506601","nickname": "mrtom1999"}} |
发现了,是不小心把:
title:’Scene: crouching tiger, hidden dragon’,
当成两个属性和值了。
2.所以需要去改代码。
最后改为:
#http://www.yupoo.com/photos/sinaweibo723506601/85924357/ #{id:'4015182-85924357',owner:'4015182',ownername:'sinaweibo723506601',title:'Scene: crouching tiger, hidden dragon',description:'',bucket:'sinaweibo723506601',key:'C4CSjbe8',license:4,stats_notes: 0,albums: ['4015182-4857285',],tags:[{name:'2012', author: '4015182'},{name:'Nikon', author: '4015182'},{name:'动物', author: '4015182'},{name:'旅游', author: '4015182'},{name:'美国', author: '4015182'},{name:'鸟', author: '4015182'},{name:'鹤', author: '4015182'},{name:'Everglades', author: '4015182'},{name:'Miami', author: '4015182'}],owner:{id: 4015182,username: 'sinaweibo723506601',nickname: 'mrtom1999'}} photoInfoJsonAddQuote = re.sub(r"(,?)(\w+?)\s*?:\s*(?='|\d|\[|{)", r"\1'\2':", photoInfoJson); logging.info("photoInfoJsonAddQuote=%s", photoInfoJsonAddQuote); photoInfoJsonDoubleQuote = photoInfoJsonAddQuote.replace("'", "\""); logging.info("photoInfoJsonDoubleQuote=%s", photoInfoJsonDoubleQuote); #remove comma before end of list afterRemoveLastCommaInList = re.sub(r",\s*?]", "]", photoInfoJsonDoubleQuote); logging.info("photoInfoJsonDoubleQuote=%s", afterRemoveLastCommaInList); #photoInfoJsonDoubleQuoteUni = photoInfoJsonDoubleQuote.decode("UTF-8"); #photoInfoJsonAddQuoteAnsi = photoInfoJsonDoubleQuoteUni.encode("GB18030"); #print "type(photoInfoJson)=",type(photoInfoJsonDoubleQuoteUni); #print crifanLib.getStrPossibleCharset(photoInfoJsonDoubleQuoteUni); #photoInfoJsonDoubleQuote = photoInfoJson.replace("'", '"'); #logging.debug("photoInfoJsonDoubleQuote=%s", photoInfoJsonDoubleQuote); #photoInfoDict = json.loads(photoInfoJsonDoubleQuote, "UTF-8"); #photoInfoDict = json.loads(photoInfoJsonDoubleQuote); #photoInfoDict = json.loads(photoInfoJsonAddQuoteAnsi, "GB18030"); #photoInfoDict = json.loads(photoInfoJsonDoubleQuote); #http://www.yupoo.com/photos/314159subin/87556247/ #{"id":"34393-87556247","owner":"34393","ownername":"314159subin","title":"雪舞飞扬","description":"1月3日新年伊始,一场大雪让杭城\x26quot;雪舞飞扬\x26quot;.","bucket":"314159subin","key":"CxsKKLUc","license":0,"stats_notes": 0,"albums": ["34393-1693900"],"tags":[{"name":"城事", "author": "34393"},{"name":"西湖", "author": "34393"},{"name":"纪实", "author": "34393"},{"name":"街头", "author": "34393"},{"name":"城市", "author": "34393"},{"name":"杭州", "author": "34393"},{"name":"动物", "author": "34393"},{"name":"下雪喽", "author": "34393"},{"name":"雪景", "author": "34393"}],"owner":{"id": 34393,"username": "314159subin","nickname": "三点一四一屋酒"}} replacedQuoteJson = re.sub(r"\\x26([a-zA-Z]{2,6});", r"&\1;", afterRemoveLastCommaInList); logging.info("replacedQuoteJson=%s", replacedQuoteJson); photoInfoDict = json.loads(replacedQuoteJson); logging.info("photoInfoDict=%s", photoInfoDict);
就可以了。
LINE 191 : INFO photoInfoDict={u’description’: u”, u’license’: 4, u’title’: u’Scene: crouching tiger, hidden dragon’, u’tags’: [{u’name’: u’2012′, u’author’: u’4015182′}, {u’name’: u’Nikon’, u’a hor’: u’4015182′}, {u’name’: u’\u9e64′, u’author’: u’4015182′}, {u’name’: u’Everglades’, u’author’: u’4015182′}, {u’name’: u’Miami’, u’author’: u’4015182′}], u’bucket’: u’sinaweibo723506601′, u’stats_ notes’: 0, u’ownername’: u’sinaweibo723506601′, u’key’: u’C4CSjbe8′, u’owner’: {u’username’: u’sinaweibo723506601′, u’nickname’: u’mrtom1999′, u’id’: 4015182}, u’albums’: [u’4015182-4857285′], u’id’: u’4015182-85924357′} |
【总结】
非法的Json字符串,想要用json库去解析的话,还是很麻烦的,要自己处理后,确保合法,才可以的。
转载请注明:在路上 » 【已解决】Python中json.loads出错:ValueError: Expecting , delimiter: line 1 column 86 (char 86)