【背景】
之前自己弄的BlogsToWordpress,后来希望添加支持,导出网易163博客中的心情随笔的内容。
之前已经通过代码,可以获得返回的DWR-REPLY数据了:
【记录】给BlogsToWordPress添加支持导出网易的心情随笔
现在就是想办法,在Python去分析并解析这些数据了:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 | LINE 511 DEBUG getFeelingCardDwrUrl=http://api.blog.163.com/ni_chen/dwr/call/plaincall/FeelingsBeanNew.getRecentFeelingCards.dwr LINE 519 DEBUG feelingCardRespHtml=//#DWR-INSERT //#DWR-REPLY var s0={};var s1={};var s2={};var s3={};var s4={};var s5={};var s6={};var s7={};var s8={};var s9={};var s10={};var s11={};var s12={};var s13={};var s14={};var s15={};var s16={};var s17={};var s18={};var s19={};var s20={};var s21={};var s22={};var s23={};var s24={};var s25={};var s26={};var s27={};var s28={};var s29={};var s30={};var s31={};var s32={};var s33={};var s34={};var s35={};var s36={};var s37={};var s38={};var s39={};var s40={};var s41={};var s42={};var s43={};var s44={};var s45={};var s46={};var s47={};var s48={};var s49={};var s50={};var s51={};var s52={};var s53={};var s54={};var s55={};var s56={};var s57={};var s58={};var s59={};var s60={};var s61={};var s62={};var s63={};var s64={};var s65={};var s66={};var s67={};var s68={};var s69={};var s70={};var s71={};var s72={};var s73={};var s74={};var s75={};var s76={};var s77={};var s78={};var s79={};var s80={};var s81={};var s82={};var s83={};var s84={};var s85={};var s86={};var s87={};var s88={};var s89={};var s90={};var s91={};var s92={};var s93={};var s94={};var s95={};var s96={};var s97={};var s98={};var s99={};var s100={};var s101={};var s102={};var s103={};var s104={};var s105={};var s106={};var s107={};var s108={};var s109={};var s110={};var s111={};var s112={};var s113={};var s114={};var s115={};var s116={};var s117={};var s118={};var s119={};var s120={};var s121={};var s122={};var s123={};var s124={};var s125={};var s126={};var s127={};var s128={};var s129={};var s130={};var s131={};var s132={};var s133={};var s134={};var s135={};var s136={};var s137={};var s138={};var s139={};var s140={};var s141={};var s142={};var s143={};var s144={};var s145={};var s146={};var s147={};var s148={};var s149={};var s150={};var s151={};var s152={};var s153={};var s154={};var s155={};var s156={};var s157={};var s158={};var s159={};var s160={};var s161={};var s162={};var s163={};var s164={};var s165={};var s166={};var s167={};var s168={};var s169={};var s170={};var s171={};var s172={};var s173={};var s174={};var s175={};var s176={};var s177={};var s178={};var s179={};var s180={};var s181={};var s182={};var s183={};var s184={};var s185={};var s186={};var s187={};var s188={};var s189={};var s190={};var s191={};var s192={};var s193={};var s194={};var s195={};var s196={};var s197={};var s198={};var s199={};var s200={};var s201={};var s202={};var s203={};var s204={};var s205={};var s206={};var s207={};var s208={};var s209={};var s210={};var s211={};var s212={};var s213={};var s214={};var s215={};var s216={};var s217={};var s218={};var s219={};var s220={};var s221={};var s222={};var s223={};var s224={};var s225={};var s226={};var s227={};var s228={};var s229={};var s230={};var s231={};var s232={};var s233={};var s234={};var s235={};var s236={};var s237={};var s238={};var s239={};var s240={};var s241={};var s242={};var s243={};var s244={};var s245={};var s246={};var s247={};var s248={};var s249={};var s250={};var s251={};var s252={};var s253={};var s254={};var s255={};var s256={};var s257={};var s258={};var s259={};var s260={};var s261={};var s262={};var s263={};var s264={};var s265={};var s266={};var s267={};var s268={};var s269={};var s270={};var s271={};var s272={};var s273={};var s274={};var s275={};var s276={};var s277={};var s278={};var s279={};var s280={};var s281={};var s282={};var s283={};var s284={};var s285={};var s286={};var s287={};var s288={};var s289={};var s290={};var s291={};var s292={};var s293={};var s294={};var s295={};var s296={};var s297={};var s298={};var s299={};var s300={};var s301={};var s302={};var s303={};var s304={};var s305={};var s306={};var s307={};var s308={};var s309={};var s310={};var s311={};var s312={};var s313={};var s314={};var s315={};var s316={};var s317={};var s318={};var s319={};var s320={};var s321={};var s322={};var s323={};var s324={};var s325={};var s326={};var s327={};var s328={};var s329={};var s330={};var s331={};var s332={};var s333={};var s334={};var s335={};var s336={};var s337={};var s338={};var s339={};var s340={};var s341={};var s342={};var s343={};var s344={};var s345={};var s346={};var s347={};var s348={};var s349={};var s350={};var s351={};var s352={};var s353={};var s354={};var s355={};var s356={};var s357={};var s358={};var s359={};var s360={};var s361={};var s362={};var s363={};var s364={};var s365={};var s366={};var s367={};var s368={};var s369={};var s370={};var s371={};var s372={};var s373={};var s374={};var s375={};var s376={};var s377={};var s378={};var s379={};var s380={};var s381={};var s382={};var s383={};var s384={};var s385={};var s386={};var s387={};var s388={};var s389={};var s390={};var s391={};var s392={};var s393={};var s394={};var s395={};var s396={};var s397={};var s398={};var s399={};var s400={};var s401={};var s402={};var s403={};var s404={};var s405={};var s406={};var s407={};var s408={};var s409={};var s410={};var s411={};var s412={};var s413={};var s414={};var s415={};var s416={};var s417={};var s418={};var s419={};var s420={};var s421={};var s422={};var s423={};var s424={};var s425={};var s426={};var s427={};var s428={};var s429={};var s430={};var s431={};var s432={};var s433={};var s434={};var s435={};var s436={};var s437={};var s438={};var s439={};var s440={};var s441={};var s442={};var s443={};var s444={};var s445={};var s446={};var s447={};var s448={};var s449={};var s450={};var s451={};var s452={};var s453={};var s454={};var s455={};var s456={};var s457={};var s458={};var s459={};var s460={};var s461={};var s462={};var s463={};var s464={};var s465={};var s466={};var s467={};var s468={};var s469={};var s470={};var s471={};var s472={};var s473={};var s474={};var s475={};var s476={};var s477={};var s478={};var s479={};var s480={};var s481={};var s482={};var s483={};var s484={};var s485={};var s486={};var s487={};var s488={};var s489={};var s490={};var s491={};var s492={};var s493={};var s494={};var s495={};var s496={};var s497={};var s498={};var s499={};var s500={};var s501={};var s502={};var s503={};var s504={};var s505={};var s506={};var s507={};var s508={};var s509={};var s510={};var s511={};var s512={};var s513={};var s514={};var s515={};var s516={};var s517={};var s518={};var s519={};var s520={};var s521={};var s522={};var s523={};var s524={};var s525={};var s526={};var s527={};var s528={};var s529={};var s530={};var s531={};var s532={};var s533={};var s534={};var s535={};var s536={};var s537={};var s538={};var s539={};var s540={};var s541={};var s542={};var s543={};var s544={};var s545={};var s546={};var s547={};var s548={};var s549={};var s550={};var s551={};var s552={};var s553={};var s554={};var s555={};var s556={};var s557={};var s558={};var s559={};var s560={};var s561={};var s562={};var s563={};var s564={};var s565={};var s566={};var s567={};var s568={};var s569={};var s570={};var s571={};var s572={};var s573={};var s574={};var s575={};var s576={};var s577={};var s578={};var s579={};var s580={};var s581={};var s582={};var s583={};var s584={};var s585={};var s586={};var s587={};var s588={};var s589={};var s590={};var s591={};var s592={};var s593={};var s594={};var s595={};var s596={};var s597={};var s598={};var s599={};var s600={};var s601={};var s602={};var s603={};var s604={};var s605={};var s606={};var s607={};var s608={};var s609={};var s610={};var s611={};var s612={};var s613={};var s614={};var s615={};var s616={};var s617={};var s618={};var s619={};var s620={};var s621={};var s622={};var s623={};var s624={};var s625={};var s626={};var s627={};var s628={};var s629={};var s630={};var s631={};var s632={};var s633={};var s634={};var s635={};var s636={};var s637={};var s638={};var s639={};var s640={};var s641={};var s642={};var s643={};var s644={};var s645={};var s646={};var s647={};var s648={};var s649={};var s650={};var s651={};var s652={};var s653={};var s654={};var s655={};var s656={};var s657={};var s658={};var s659={};var s660={};var s661={};var s662={};var s663={};var s664={};var s665={};var s666={};var s667={};var s668={};var s669={};var s670={};var s671={};var s672={};var s673={};var s674={};var s675={};var s676={};var s677={};var s678={};var s679={};var s680={};var s681={};var s682={};var s683={};var s684={};var s685={};var s686={};var s687={};var s688={};var s689={};var s690={};var s691={};var s692={};var s693={};var s694={};var s695={};var s696={};var s697={};var s698={};var s699={};var s700={};var s701={};var s702={};var s703={};var s704={};var s705={};var s706={};var s707={};var s708={};var s709={};var s710={};var s711={};var s712={};var s713={};var s714={};var s715={};var s716={};var s717={};var s718={};var s719={};var s720={};var s721={};var s722={};var s723={};var s724={};var s725={};var s726={};var s727={};var s728={};var s729={};var s730={};var s731={};var s732={};var s733={};var s734={};var s735={};var s736={};var s737={};var s738={};var s739={};var s740={};var s741={};var s742={};var s743={};var s744={};var s745={};var s746={};var s747={};var s748={};var s749={};var s750={};var s751={};var s752={};var s753={};var s754={};var s755={};var s756={};var s757={};var s758={};var s759={};var s760={};var s761={};var s762={};var s763={};var s764={};var s765={};var s766={};var s767={};var s768={};var s769={};var s770={};var s771={};var s772={};var s773={};var s774={};var s775={};var s776={};var s777={};var s778={};var s779={};var s780={};var s781={};var s782={};var s783={};var s784={};var s785={};var s786={};var s787={};var s788={};s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa"; s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa"; s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa"; s4.commentCount=0;s4.content="\u5B66\u957F\u8001\u8BA9\u6211\u5E2E\u4ED6\u4E0B\u7535\u5B50\u4E66\uFF0C\u89C9\u5F97\u6211\u5F88\u5389\u5BB3\uFF0C\u97E9\u56FD\u8FD8\u6CA1\u4E70\u7684\u4E66\u6211\u90FD\u80FD\u4E0B\u6765\uFF0C\u8FD8\u8BF4\u662F\u4E0D\u662F\u7684\u7ED9\u6211\u4E70\u597D\u5403\u7684[P]\u5F00\u6000\u7B11[/P][P]\u5F00\u6000\u7B11[/P]";s4.id="134829323";s4.mainCommentCount=0;s4.moodType=1;s4.moveFrom=null;s4.publishTime=1350366965636;s4.synchMiniBlog=-1;s4.userAvatar=0;s4.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s4.userId=186541395;s4.userName="ni_chen";s4.userNickname="Neysa"; s5.commentCount=0;s5.content="\u6628\u5929\u770B\u5B8C\u300A\u9700\u8981\u6D6A\u6F2B\u300B\uFF0C\u7528\u4E86\u5DEE\u4E0D\u591A\u4E00\u76D2\u7EB8\u5DFE\uFF0C\u773C\u775B\u75BC";s5.id="134829040";s5.mainCommentCount=0;s5.moodType=1;s5.moveFrom=null;s5.publishTime=1350342912629;s5.synchMiniBlog=-1;s5.userAvatar=0;s5.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s5.userId=186541395;s5.userName="ni_chen";s5.userNickname="Neysa"; s6.commentCount=0;s6.content="\"\u751F\u6D3B\u4E2D\u7684\u5899\u4E0D\u662F\u7528\u6765\u649E\u5934\u7684\uFF0C\u662F\u7528\u6765\u8F6C\u5F2F\u7684\". ";s6.id="134829014";s6.mainCommentCount=0;s6.moodType=1;s6.moveFrom=null;s6.publishTime=1350340791901;s6.synchMiniBlog=-1;s6.userAvatar=0;s6.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s6.userId=186541395;s6.userName="ni_chen";s6.userNickname="Neysa"; s7.commentCount=1;s7.content="\u90A3\u5E2E\u4EBA\u53BB\u515C\u98CE\u4E86\uFF0C\u7814\u7A76\u5BA4\u5C31\u5269\u6211\u4E00\u4E2A\u4E86[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s7.id="134768355";s7.mainCommentCount=1;s7.moodType=1;s7.moveFrom=null;s7.publishTime=1350278946278;s7.synchMiniBlog=-1;s7.userAvatar=0;s7.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s7.userId=186541395;s7.userName="ni_chen";s7.userNickname="Neysa"; s8.commentCount=0;s8.content="\u4E09\u5341\u5C81\u524D\u603B\u7B97\u6709\u4E86\u4E2A\u5C0F\u7A9D[IMG]http://img4.ph.126.net/NdQSyqju67sds7xsP9wWVA==/6597146732471627934.jpg[/IMG]";s8.id="134757722";s8.mainCommentCount=0;s8.moodType=0;s8.moveFrom="wap";s8.publishTime=1350211074706;s8.synchMiniBlog=-1;s8.userAvatar=0;s8.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s8.userId=186541395;s8.userName="ni_chen";s8.userNickname="Neysa"; ...... s786.commentCount=0;s786.content="\u6211\u8BBE\u8BA1\u7684\u6A21\u7248\uFF0C\u563B\u563B\u3002";s786.id="111039880";s786.mainCommentCount=0;s786.moodType=1;s786.moveFrom="";s786.publishTime=1231545101647;s786.synchMiniBlog=-1;s786.userAvatar=0;s786.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s786.userId=186541395;s786.userName="ni_chen";s786.userNickname="Niya"; s787.commentCount=0;s787.content="\u6211\u542C\u4EBA\u8BF4\uFF0C\u5BC2\u5BDE\u7684\u4EBA\uFF0C\u611F\u5192\u4F1A\u62D6\u5F97\u7279\u522B\u957F\uFF0C\u56E0\u4E3A\u4ED6\u81EA\u5DF1\u4E5F\u4E0D\u60F3\u597D\u3002";s787.id="111039881";s787.mainCommentCount=0;s787.moodType=1;s787.moveFrom="";s787.publishTime=1231329494484;s787.synchMiniBlog=-1;s787.userAvatar=0;s787.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s787.userId=186541395;s787.userName="ni_chen";s787.userNickname="Niya"; s788.commentCount=0;s788.content="\u4E0D\u8981\u8FFD\u6C42\u4EC0\u4E48\u7ED3\u679C\uFF0C\u6BCF\u4E2A\u4EBA\u7ED3\u679C\u90FD\u4E00\u6837\uFF0C\u5C31\u662F\u6B7B\u4EA1\u3002";s788.id="111039882";s788.mainCommentCount=0;s788.moodType=1;s788.moveFrom="";s788.publishTime=1231158155439;s788.synchMiniBlog=-1;s788.userAvatar=0;s788.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s788.userId=186541395;s788.userName="ni_chen";s788.userNickname="Niya"; dwr.engine._remoteHandleCallback('1','0',[s0,s1,s2,s3,s4,s5,s6,s7,s8,s9,s10,s11,s12,s13,s14,s15,s16,s17,s18,s19,s20,s21,s22,s23,s24,s25,s26,s27,s28,s29,s30,s31,s32,s33,s34,s35,s36,s37,s38,s39,s40,s41,s42,s43,s44,s45,s46,s47,s48,s49,s50,s51,s52,s53,s54,s55,s56,s57,s58,s59,s60,s61,s62,s63,s64,s65,s66,s67,s68,s69,s70,s71,s72,s73,s74,s75,s76,s77,s78,s79,s80,s81,s82,s83,s84,s85,s86,s87,s88,s89,s90,s91,s92,s93,s94,s95,s96,s97,s98,s99,s100,s101,s102,s103,s104,s105,s106,s107,s108,s109,s110,s111,s112,s113,s114,s115,s116,s117,s118,s119,s120,s121,s122,s123,s124,s125,s126,s127,s128,s129,s130,s131,s132,s133,s134,s135,s136,s137,s138,s139,s140,s141,s142,s143,s144,s145,s146,s147,s148,s149,s150,s151,s152,s153,s154,s155,s156,s157,s158,s159,s160,s161,s162,s163,s164,s165,s166,s167,s168,s169,s170,s171,s172,s173,s174,s175,s176,s177,s178,s179,s180,s181,s182,s183,s184,s185,s186,s187,s188,s189,s190,s191,s192,s193,s194,s195,s196,s197,s198,s199,s200,s201,s202,s203,s204,s205,s206,s207,s208,s209,s210,s211,s212,s213,s214,s215,s216,s217,s218,s219,s220,s221,s222,s223,s224,s225,s226,s227,s228,s229,s230,s231,s232,s233,s234,s235,s236,s237,s238,s239,s240,s241,s242,s243,s244,s245,s246,s247,s248,s249,s250,s251,s252,s253,s254,s255,s256,s257,s258,s259,s260,s261,s262,s263,s264,s265,s266,s267,s268,s269,s270,s271,s272,s273,s274,s275,s276,s277,s278,s279,s280,s281,s282,s283,s284,s285,s286,s287,s288,s289,s290,s291,s292,s293,s294,s295,s296,s297,s298,s299,s300,s301,s302,s303,s304,s305,s306,s307,s308,s309,s310,s311,s312,s313,s314,s315,s316,s317,s318,s319,s320,s321,s322,s323,s324,s325,s326,s327,s328,s329,s330,s331,s332,s333,s334,s335,s336,s337,s338,s339,s340,s341,s342,s343,s344,s345,s346,s347,s348,s349,s350,s351,s352,s353,s354,s355,s356,s357,s358,s359,s360,s361,s362,s363,s364,s365,s366,s367,s368,s369,s370,s371,s372,s373,s374,s375,s376,s377,s378,s379,s380,s381,s382,s383,s384,s385,s386,s387,s388,s389,s390,s391,s392,s393,s394,s395,s396,s397,s398,s399,s400,s401,s402,s403,s404,s405,s406,s407,s408,s409,s410,s411,s412,s413,s414,s415,s416,s417,s418,s419,s420,s421,s422,s423,s424,s425,s426,s427,s428,s429,s430,s431,s432,s433,s434,s435,s436,s437,s438,s439,s440,s441,s442,s443,s444,s445,s446,s447,s448,s449,s450,s451,s452,s453,s454,s455,s456,s457,s458,s459,s460,s461,s462,s463,s464,s465,s466,s467,s468,s469,s470,s471,s472,s473,s474,s475,s476,s477,s478,s479,s480,s481,s482,s483,s484,s485,s486,s487,s488,s489,s490,s491,s492,s493,s494,s495,s496,s497,s498,s499,s500,s501,s502,s503,s504,s505,s506,s507,s508,s509,s510,s511,s512,s513,s514,s515,s516,s517,s518,s519,s520,s521,s522,s523,s524,s525,s526,s527,s528,s529,s530,s531,s532,s533,s534,s535,s536,s537,s538,s539,s540,s541,s542,s543,s544,s545,s546,s547,s548,s549,s550,s551,s552,s553,s554,s555,s556,s557,s558,s559,s560,s561,s562,s563,s564,s565,s566,s567,s568,s569,s570,s571,s572,s573,s574,s575,s576,s577,s578,s579,s580,s581,s582,s583,s584,s585,s586,s587,s588,s589,s590,s591,s592,s593,s594,s595,s596,s597,s598,s599,s600,s601,s602,s603,s604,s605,s606,s607,s608,s609,s610,s611,s612,s613,s614,s615,s616,s617,s618,s619,s620,s621,s622,s623,s624,s625,s626,s627,s628,s629,s630,s631,s632,s633,s634,s635,s636,s637,s638,s639,s640,s641,s642,s643,s644,s645,s646,s647,s648,s649,s650,s651,s652,s653,s654,s655,s656,s657,s658,s659,s660,s661,s662,s663,s664,s665,s666,s667,s668,s669,s670,s671,s672,s673,s674,s675,s676,s677,s678,s679,s680,s681,s682,s683,s684,s685,s686,s687,s688,s689,s690,s691,s692,s693,s694,s695,s696,s697,s698,s699,s700,s701,s702,s703,s704,s705,s706,s707,s708,s709,s710,s711,s712,s713,s714,s715,s716,s717,s718,s719,s720,s721,s722,s723,s724,s725,s726,s727,s728,s729,s730,s731,s732,s733,s734,s735,s736,s737,s738,s739,s740,s741,s742,s743,s744,s745,s746,s747,s748,s749,s750,s751,s752,s753,s754,s755,s756,s757,s758,s759,s760,s761,s762,s763,s764,s765,s766,s767,s768,s769,s770,s771,s772,s773,s774,s775,s776,s777,s778,s779,s780,s781,s782,s783,s784,s785,s786,s787,s788]); |
【折腾过程】
1.针对第一个获得的数据:
\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86 |
去分析了一下,结果是:
>>> print ‘\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86’.decode(‘unicode-escape’) |
即,对应着,第一个评论数据:
所以,接下来,就是针对于这样的数据:
1 2 3 4 5 | s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa"; s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa"; |
如何去用python一点点解析出来,成为单个的评论。
2.所以,就是去写代码去解析评论了。
代码如下:
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 | def parseRespDwrToCmtList(respDwrReplyStr): """ Parse response DWR-REPLY string, into comment list """ #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; #s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa"; #s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa"; #s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa"; commentStrList = []; #commentStrList = re.findall(r'(?:s\d+)\.commentCount=.+?\1\.userNickname=".+?";', respDwrReplyStr); commentStrList = re.findall(r 's\d+\.commentCount=.+?s\d+\.userNickname=".+?";' , respDwrReplyStr); #logging.info("commentStrList=%s", commentStrList); logging.info( "len(commentStrList)=%d" , len (commentStrList)); if (commentStrList): for eachCommentStr in commentStrList: #parse each comment string into comment dict singleCmtDict = { 'cmtIdx' : 0 , 'commentCount' : "", 'content' : "", 'id' : "", 'mainCommentCount' : "", 'moodType' : "", 'moveFrom' : "", 'publishTime' : "", 'synchMiniBlog' : "", 'userAvatar' : "", 'userAvatarUrl' : "", 'userId' : "", 'userName' : "", 'userNickname' : "", }; #fisrt get the comment index foundCmtIdx = re.search( 's(?P<cmtIdx>\d+)\.commentCount=' , eachCommentStr); cmtIdx = foundCmtIdx.group( "cmtIdx" ); cmtIdx = int (cmtIdx); logging.info( "cmtIdx=%d" , cmtIdx); singleCmtDict[ 'cmtIdx' ] = cmtIdx; #init some common values strSn = "s" + str (cmtIdx); #commentCount #s0.commentCount=0; foundCommentCount = re.search(strSn + '\.commentCount=(?P<commentCount>\d+);' , eachCommentStr); commentCount = foundCommentCount.group( "commentCount" ); singleCmtDict[ 'commentCount' ] = commentCount; logging.info( "commentCount=%s" , commentCount); #content #s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id=" foundContent = re.search(strSn + '\.content=(?P<content>.+?);' + strSn + '\.id="' , eachCommentStr); content = foundContent.group( "content" ); content = content.decode( "unicode-escape" ); singleCmtDict[ 'content' ] = content; logging.info( "content=%s" , content); #id #s0.id="148749270"; foundId = re.search(strSn + '\.id="(?P<id>\d+)";' , eachCommentStr); id = foundId.group( "id" ); singleCmtDict[ 'id' ] = id ; logging.info( "id=%s" , id ); #mainCommentCount #s0.mainCommentCount=0; foundMainCommentCount = re.search(strSn + '\.mainCommentCount=(?P<mainCommentCount>\d+);' , eachCommentStr); mainCommentCount = foundMainCommentCount.group( "mainCommentCount" ); singleCmtDict[ 'mainCommentCount' ] = mainCommentCount; logging.info( "mainCommentCount=%s" , mainCommentCount); #moodType #s0.moodType=0; foundMoodType = re.search(strSn + '\.moodType=(?P<moodType>\d+);' , eachCommentStr); moodType = foundMoodType.group( "moodType" ); singleCmtDict[ 'moodType' ] = moodType; logging.info( "moodType=%s" , moodType); #moveFrom #s0.moveFrom="iphone"; #s2.moveFrom=null; #s8.moveFrom="wap"; #s699.moveFrom=""; foundMoveFrom = re.search(strSn + '\.moveFrom="?(?P<moveFrom>[^"]*?)"?;' , eachCommentStr); moveFrom = foundMoveFrom.group( "moveFrom" ); singleCmtDict[ 'moveFrom' ] = moveFrom; logging.info( "moveFrom=%s" , moveFrom); #publishTime #s0.publishTime=1374626867596; foundPublishTime = re.search(strSn + '\.publishTime=(?P<publishTime>\d+);' , eachCommentStr); publishTime = foundPublishTime.group( "publishTime" ); singleCmtDict[ 'publishTime' ] = publishTime; logging.info( "publishTime=%s" , publishTime); #synchMiniBlog #s0.synchMiniBlog=-1; foundSynchMiniBlog = re.search(strSn + '\.synchMiniBlog=(?P<synchMiniBlog>.+?);' , eachCommentStr); synchMiniBlog = foundSynchMiniBlog.group( "synchMiniBlog" ); singleCmtDict[ 'synchMiniBlog' ] = synchMiniBlog; logging.info( "synchMiniBlog=%s" , synchMiniBlog); #userAvatar #s0.userAvatar=0; foundUserAvatar = re.search(strSn + '\.userAvatar=(?P<userAvatar>\d+);' , eachCommentStr); userAvatar = foundUserAvatar.group( "userAvatar" ); singleCmtDict[ 'userAvatar' ] = userAvatar; logging.info( "userAvatar=%s" , userAvatar); #userAvatarUrl #s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg"; foundUserAvatarUrl = re.search(strSn + '\.userAvatarUrl="?(?P<userAvatarUrl>http://.+?)"?;' , eachCommentStr); userAvatarUrl = foundUserAvatarUrl.group( "userAvatarUrl" ); singleCmtDict[ 'userAvatarUrl' ] = userAvatarUrl; logging.info( "userAvatarUrl=%s" , userAvatarUrl); #userId #s0.userId=186541395; foundUserId = re.search(strSn + '\.userId=(?P<userId>\d+);' , eachCommentStr); userId = foundUserId.group( "userId" ); singleCmtDict[ 'userId' ] = userId; logging.info( "userId=%s" , userId); #userName #s0.userName="ni_chen"; foundUserName = re.search(strSn + '\.userName="?(?P<userName>.+?)"?;' , eachCommentStr); userName = foundUserName.group( "userName" ); singleCmtDict[ 'userName' ] = userName; logging.info( "userName=%s" , userName); #userNickname #s0.userNickname="Neysa"; foundUserNickname = re.search(strSn + '\.userNickname="?(?P<userNickname>.+?)"?;' , eachCommentStr); userNickname = foundUserNickname.group( "userNickname" ); singleCmtDict[ 'userNickname' ] = userNickname; logging.info( "userNickname=%s" , userNickname); return commentStrList; |
3.但是,对于
mainCommentCount不是0
比如:
mainCommentCount=1
的评论,说明其下是有对应的子评论的,所以还要想办法抓取出来。
经过分析,对应的发送的post data和response data分别是:
然后就去将其中的逻辑,用代码,再模拟出来,获得对应的数据,再去分析出来子评论的内容。
4.最后,完整的相关解析部分的代码为:
| def getPlaincallRespDwrStr(c0ScriptName, c0MethodName, c0Param0, c0Param1, c0Param2): """ get FeelingsBeanNew response DWR string """ #typ1: # [paras] # callCount=1 # scriptSessionId=${scriptSessionId}187 # c0-scriptName=BlogBeanNew # c0-methodName=getComments # c0-id=0 # c0-param0=string:fks_094067082083086070082083080095085081083068093095082074085 # c0-param1=number:1 # c0-param2=number:0 # batchId=728048 #http://api.blog.163.com/againinput4/dwr/call/plaincall/BlogBeanNew.getComments.dwr?&callCount=1&scriptSessionId=${scriptSessionId}187&c0-scriptName=BlogBeanNew&c0-methodName=getComments&c0-id=0&c0-param0=string:fks_094067082083086070082083080095085081083068093095082074085&c0-param1=number:1&c0-param2=number:0&batchId=728048 #type2: # callCount=1 # scriptSessionId=${scriptSessionId}187 # c0-scriptName=FeelingsBeanNew # c0-methodName=getRecentFeelingsComment # c0-id=0 # c0-param0=string:134875456 # c0-param1=number:1 # c0-param2=number:0 # batchId=705438 #type3: # callCount=1 # scriptSessionId=${scriptSessionId}187 # c0-scriptName=FeelingsBeanNew # c0-methodName=getRecentFeelingCards # c0-id=0 # c0-param0=number:186541395 # c0-param1=number:0 # c0-param2=number:20 # batchId=292545 logging.debug( "get FeelingsBeanNew reponse DWR string for c0MethodName=%s, c0Param0=%s, c0Param1=%s, c0Param2=%s" , c0MethodName, c0Param0, c0Param1, c0Param2); postDict = { 'callCount' : '1' , 'scriptSessionId' : '${scriptSessionId}187' , 'c0-scriptName' : c0ScriptName, #BlogBeanNew/FeelingsBeanNew 'c0-methodName' : c0MethodName, #getComments/getRecentFeelingsComment/getRecentFeelingCards 'c0-id' : '0' , 'c0-param0' : c0Param0, 'c0-param1' : c0Param1, 'c0-param2' : c0Param2, 'batchId' : '1' , # should random generate number? }; plaincallDwrUrl = gConst[ 'blogApi163' ] + '/' + gVal[ 'blogUser' ] + '/' + "dwr/call/plaincall/" + c0ScriptName + "." + c0MethodName + ".dwr" ; logging.debug( "plaincallDwrUrl=%s" , plaincallDwrUrl); headerDict = { 'Content-Type' : "text/plain" , }; plaincallRespDwrStr = crifanLib.getUrlRespHtml(plaincallDwrUrl, postDict = postDict, headerDict = headerDict, postDataDelimiter = '\r\n' ); logging.debug( "plaincallRespDwrStr=%s" , plaincallRespDwrStr); return plaincallRespDwrStr; def fetchComments_feelingCard(): """ Get feeling card items, to use as comments """ totalCmtDictList = []; totalMainCmtDictList = []; totalSubCmtDictList = []; # init before loop needGetMore = True ; startIdx = 0 ; startNum = 1 ; onceGetNum = 1000 ; # get 1000 items once try : while needGetMore : # get resopnse dwr string # callCount=1 # scriptSessionId=${scriptSessionId}187 # c0-scriptName=FeelingsBeanNew # c0-methodName=getRecentFeelingCards # c0-id=0 # c0-param0=number:186541395 # c0-param1=number:0 # c0-param2=number:20 # batchId=292545 getRecentFeelingCardsRespDwrStr = getPlaincallRespDwrStr( "FeelingsBeanNew" , "getRecentFeelingCards" , "number:" + str (gVal[ 'userId' ]), "number:" + str (startIdx), "number:" + str (onceGetNum)); logging.debug( "getRecentFeelingCardsRespDwrStr=%s" , getRecentFeelingCardsRespDwrStr); curMainCmtDictList = parseMainCmtDwrStrToMainCmtDictList(getRecentFeelingCardsRespDwrStr); totalMainCmtDictList.extend(curMainCmtDictList); curGotMainCmtNum = len (curMainCmtDictList); if (curGotMainCmtNum < onceGetNum): #has got all comment, so quit needGetMore = False ; logging.debug( "Request %d comments, but only response %d comments, so no more comments, has got all comments" , onceGetNum, curGotMainCmtNum); #add main comment dict list into total comment dict list logging.debug( "Total got %d main comments dict" , len (totalMainCmtDictList)); totalCmtDictList.extend(totalMainCmtDictList); logging.debug( "Total comments %d" , len (totalCmtDictList)); #after get all main comment dict, then try to find the sub comments for eachMainCmtDict in totalMainCmtDictList: #logging.info("eachMainCmtDict=%s", eachMainCmtDict); mainCommentCount = eachMainCmtDict[ 'mainCommentCount' ]; #logging.info("mainCommentCount=%s", mainCommentCount); mainCommentCountInt = int (mainCommentCount); #logging.info("mainCommentCountInt=%d", mainCommentCountInt); if (mainCommentCountInt > 0 ): #has sub comment logging.debug( "[%d] main comment has sub %d comments" , eachMainCmtDict[ 'curCmtIdx' ], mainCommentCountInt); #1. get sub comment dwr string subCmtDwrStr = getFeelingCardSubCmtDwrStr(eachMainCmtDict[ 'id' ]); #2. parse sub comment dwr string to sub comment dict curSubCmtDictList = parseSubCmtDwrStrToSubCmtDictList(subCmtDwrStr); totalSubCmtDictList.extend(curSubCmtDictList); #do some update for sub comment logging.debug( "Total got %d sub comment dict" , len (totalSubCmtDictList)); if (totalSubCmtDictList): #update sub comment index subCmtStartIdx = len (totalMainCmtDictList); logging.debug( "subCmtStartIdx=%d" , subCmtStartIdx); for idx,eachSubCmtDict in enumerate (totalSubCmtDictList): eachSubCmtDict[ 'curCmtIdx' ] = subCmtStartIdx + idx; eachSubCmtDict[ 'curCmtNum' ] = eachSubCmtDict[ 'curCmtIdx' ] + 1 ; logging.debug( "done for update sub comment index" ); #update sub comment's parent relation for idx,eachSubCmtDict in enumerate (totalSubCmtDictList): subCmtParentId = eachSubCmtDict[ 'cardId' ]; for eachMainCmtDict in totalMainCmtDictList: mainCmtId = eachMainCmtDict[ 'id' ]; if (subCmtParentId = = mainCmtId): logging.debug( "sub cmt id=%s 's parent's id=%s, parent curCmtNum=%d" , eachSubCmtDict[ 'id' ], mainCmtId, eachMainCmtDict[ 'curCmtNum' ]); eachSubCmtDict[ 'parentCmtNum' ] = eachMainCmtDict[ 'curCmtNum' ]; #update sub comment's parent whose within sub comment list #s0.replyComId="-1"; #s3.replyComId="72175292" curSubCmtReplyComId = eachSubCmtDict[ 'replyComId' ]; # for singleSubCmtDict in totalSubCmtDictList: subCmtId = singleSubCmtDict[ 'id' ]; subCmtCurCmtNum = singleSubCmtDict[ 'curCmtNum' ]; if (curSubCmtReplyComId = = subCmtId): logging.debug( "sub cmt id=%s 's replyComId=%s, find correspoinding parent (sub) comment, whose curCmtNum=%d" , subCmtId, curSubCmtReplyComId, subCmtCurCmtNum); eachSubCmtDict[ 'parentCmtNum' ] = subCmtCurCmtNum; logging.debug( "done for update sub comment's parent relation" ); totalCmtDictList.extend(totalSubCmtDictList); except : logging.debug( "Fail for fetch the feeling card (index=[%d-%d]) for %s " , startIdx, startIdx + onceGetNum - 1 , url); return totalCmtDictList; def getFeelingCardSubCmtDwrStr(subCmtId): """ input sub comment id, return sub comment response dwr string """ # callCount=1 # scriptSessionId=${scriptSessionId}187 # c0-scriptName=FeelingsBeanNew # c0-methodName=getRecentFeelingsComment # c0-id=0 # c0-param0=string:134875456 # c0-param1=number:1 # c0-param2=number:0 # batchId=705438 logging.debug( "get sub comment for %s" , subCmtId); getRecentFeelingsCommentRespDwrStr = getPlaincallRespDwrStr( "FeelingsBeanNew" , "getRecentFeelingsComment" , "string:" + str (subCmtId), "number:1" , "number:0" ); logging.debug( "getRecentFeelingsCommentRespDwrStr=%s" , getRecentFeelingsCommentRespDwrStr); return getRecentFeelingsCommentRespDwrStr; def parseSingleDwrStrToCmtDict(singleCmtDwrStr): """ parse single comment dwr string, main comment or sub comment, to comment dict """ logging.debug( "singleCmtDwrStr=%s" , singleCmtDwrStr); #init values curCmtDict = {}; singleMainCmtDict = { 'curCmtIdx' : 0 , 'curCmtNum' : 0 , 'parentCmtNum' : 0 , 'isSubComment' : False , 'commentCount' : "", 'mainCommentCount' : "", 'moodType' : "", 'userAvatar' : "", 'userAvatarUrl' : "", 'userName' : "", 'userNickname' : "", #common part 'content' : "", 'id' : "", 'moveFrom' : "", 'publishTime' : "", 'synchMiniBlog' : "", 'userId' : "", }; singleSubCmtDict = { 'curCmtIdx' : 0 , 'curCmtNum' : 0 , 'parentCmtNum' : 0 , 'isSubComment' : True , 'cardId' : "", # is parent ID 'ip' : "", 'ipName' : "", 'lastUpdateTime' : "", 'mainComId' : "", 'popup' : "", 'publisherAvatar' : "", 'publisherAvatarUrl' : "", 'publisherId' : "", 'publisherName' : "", 'publisherNickname' : "", 'publisherUrl' : "", 'replyComId' : "", 'replyToUserId' : "", 'replyToUserName' : "", 'replyToUserNick' : "", 'spam' : "", 'subComments' : "", 'valid' : "", #common part 'content' : "", 'id' : "", 'moveFrom' : "", 'publishTime' : "", 'synchMiniBlog' : "", 'userId' : "", }; #1. check is main comment or sub comment #start with sN.cardId=, is sub comment foundCardId = re.search( "^s\d+\.cardId=" , singleCmtDwrStr); if (foundCardId): curCmtDict = singleSubCmtDict; curCmtDict[ 'isSubComment' ] = True ; logging.debug( "------- is sub comments" ); else : curCmtDict = singleMainCmtDict; curCmtDict[ 'isSubComment' ] = False ; logging.debug( "======= is main comments" ); #2. process common key and value #common key and value #fisrt get the comment index #main comment: #s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id=" #sub comment: #s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id=" #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; foundCurCmtIdx = re.search(r 's(?P<curCmtIdx>\d+)\.content=".+?";s\1\.id="' , singleCmtDwrStr); logging.debug( "foundCurCmtIdx=%s" , foundCurCmtIdx); curCmtIdx = foundCurCmtIdx.group( "curCmtIdx" ); curCmtIdx = int (curCmtIdx); logging.debug( "curCmtIdx=%d" , curCmtIdx); if ( not curCmtDict[ 'isSubComment' ]): #only add for main comment #later, will update sub comment curCmtIdx and curCmtNum curCmtDict[ 'curCmtIdx' ] = curCmtIdx; curCmtDict[ 'curCmtNum' ] = curCmtIdx + 1 ; #init some common values strSn = "s" + str (curCmtIdx); #content #s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id=" foundContent = re.search(strSn + '\.content=(?P<content>.+?);' + strSn + '\.id="' , singleCmtDwrStr); content = foundContent.group( "content" ); content = content.decode( "unicode-escape" ); curCmtDict[ 'content' ] = content; logging.debug( "content=%s" , content); #id #s0.id="148749270"; foundId = re.search(strSn + '\.id="(?P<id>\d+)";' , singleCmtDwrStr); id = foundId.group( "id" ); curCmtDict[ 'id' ] = id ; logging.debug( "id=%s" , id ); #moveFrom #s0.moveFrom="iphone"; #s2.moveFrom=null; #s8.moveFrom="wap"; #s699.moveFrom=""; foundMoveFrom = re.search(strSn + '\.moveFrom="?(?P<moveFrom>[^"]*?)"?;' , singleCmtDwrStr); moveFrom = foundMoveFrom.group( "moveFrom" ); curCmtDict[ 'moveFrom' ] = moveFrom; logging.debug( "moveFrom=%s" , moveFrom); #publishTime #s0.publishTime=1374626867596; foundPublishTime = re.search(strSn + '\.publishTime=(?P<publishTime>\d+);' , singleCmtDwrStr); publishTime = foundPublishTime.group( "publishTime" ); curCmtDict[ 'publishTime' ] = publishTime; logging.debug( "publishTime=%s" , publishTime); #synchMiniBlog #s0.synchMiniBlog=-1; #in sub comment: #s0.synchMiniBlog=false; foundSynchMiniBlog = re.search(strSn + '\.synchMiniBlog=(?P<synchMiniBlog>.+?);' , singleCmtDwrStr); synchMiniBlog = foundSynchMiniBlog.group( "synchMiniBlog" ); curCmtDict[ 'synchMiniBlog' ] = synchMiniBlog; logging.debug( "synchMiniBlog=%s" , synchMiniBlog); #userId #s0.userId=186541395; foundUserId = re.search(strSn + '\.userId=(?P<userId>\d+);' , singleCmtDwrStr); userId = foundUserId.group( "userId" ); curCmtDict[ 'userId' ] = userId; logging.debug( "userId=%s" , userId); #3. process different key and value if (curCmtDict[ 'isSubComment' ]): #process sub comment remaing field #sub comment dwr string: #sample 1: #s0.cardId="134875456";s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="73300019";s0.ip="203.234.215.66";s0.ipName=null;s0.lastUpdateTime=1351380367156;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1351380367155;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=55976067;s0.publisherName="chenlin198412@126";s0.publisherNickname="Lynn";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0; #sample 2: # s0.cardId="133211376";s0.content="\u4ECE\u9AD8\u4E2D\u5C31\u5F00\u59CB\u7684\u5417\uFF1F\u597D\u597D\u53BB\u533B\u9662\u68C0\u67E5\u4E00\u4E0B\u5427\uFF0C\u73B0\u5728\u6709\u75C5\u4E00\u5B9A\u4E0D\u8981\u62D6\u7740\uFF0C\u8981\u4E0D\u5C0F\u75C5\u4E5F\u4F1A\u53D8\u6210\u5927\u75C5\uFF0C\u5230\u65F6\u53EF\u6CA1\u6709\u540E\u6094\u836F\u5403\u3002";s0.id="72192291";s0.ip="115.170.58.191";s0.ipName=null;s0.lastUpdateTime=1348561288469;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1348468815327;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=26959367;s0.publisherName="chenyuanyuan0913";s0.publisherNickname="\u6C89\u7F18\u6E90";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0; # s1[0]=s2;s1[1]=s3;s1[2]=s4; # s2.cardId="133211376";s2.content="\u4E00\u76F4\u60F3\u67E5\uFF0C\u4F46\u662F\u6CA1\u6709\u533B\u7597\u4FDD\u9669<img src=\"http://b.bst.126.net/common/portrait/face/preview/face2.gif\" >\u3002\u6211\u5F97\u5148\u95EE\u6E05\u695A\u4E00\u4E0B";s2.id="72175292";s2.ip="147.46.115.126";s2.ipName=null;s2.lastUpdateTime=0;s2.mainComId="72192291";s2.moveFrom=null;s2.popup=false;s2.publishTime=1348471820683;s2.publisherAvatar=0;s2.publisherAvatarUrl=null;s2.publisherId=186541395;s2.publisherName="ni_chen";s2.publisherNickname="Neysa";s2.publisherUrl=null;s2.replyComId="72192291";s2.replyToUserId=26959367;s2.replyToUserName="chenyuanyuan0913";s2.replyToUserNick="\u6C89\u7F18\u6E90";s2.spam=0;s2.subComments=s5;s2.synchMiniBlog=false;s2.userId=186541395;s2.valid=0; #s3.cardId="133211376";s3.content="\u522B\u62D6\u5EF6\uFF0C\u505A\u4E2A\u68C0\u67E5\u82B1\u4E0D\u4E86\u591A\u5C11\u94B1\u7684\uFF0C\u522B\u5230\u65F6\u771F\u751F\u75C5\u4E86\uFF0C\u90A3\u53EF\u82B1\u5F97\u4E0D\u662F\u4E00\u70B9\u534A\u70B9\u7684\u3002\u6709\u65F6\u95F4\u4E86\u5C31\u8D76\u7D27\u53BB\uFF0C\u4E00\u5B9A\u8981\u53BB\u554A\uFF0C\u6CA1\u4EC0\u4E48\u4E8B\u5C31\u653E\u5FC3\u4E86\u3002\u8BB0\u5F97\u6211\u4EEC\u5BBF\u820D\u90A3\u4E2A\u5C0F\u59D1\u5A18\u5417\uFF0C\u90A3\u53EF\u662F\u771F\u5B9E\u7684\u6559\u8BAD\u554A";s3.id="72227357";s3.ip="115.170.26.179";s3.ipName=null;s3.lastUpdateTime=0;s3.mainComId="72192291";s3.moveFrom=null;s3.popup=false;s3.publishTime=1348560697833;s3.publisherAvatar=0;s3.publisherAvatarUrl=null;s3.publisherId=26959367;s3.publisherName="chenyuanyuan0913";s3.publisherNickname="\u6C89\u7F18\u6E90";s3.publisherUrl=null;s3.replyComId="72175292";s3.replyToUserId=186541395;s3.replyToUserName="ni_chen";s3.replyToUserNick="Neysa";s3.spam=0;s3.subComments=s6;s3.synchMiniBlog=false;s3.userId=186541395;s3.valid=0; # s4.cardId="133211376";s4.content="\u55EF\uFF0C\u77E5\u9053\u5566<img src=\"http://b.bst.126.net/common/portrait/face/preview/face47.gif\" >";s4.id="72206314";s4.ip="147.46.115.126";s4.ipName=null;s4.lastUpdateTime=0;s4.mainComId="72192291";s4.moveFrom=null;s4.popup=false;s4.publishTime=1348561288458;s4.publisherAvatar=0;s4.publisherAvatarUrl=null;s4.publisherId=186541395;s4.publisherName="ni_chen";s4.publisherNickname="Neysa";s4.publisherUrl=null;s4.replyComId="72227357";s4.replyToUserId=26959367;s4.replyToUserName="chenyuanyuan0913";s4.replyToUserNick="\u6C89\u7F18\u6E90";s4.spam=0;s4.subComments=s7;s4.synchMiniBlog=false;s4.userId=186541395;s4.valid=0; #sample 3: #s0.cardId="131435017";s0.content="\u54C8\u54C8\uFF0C\u4FFA\u662F\u7B14\u8FF9\u63A7";s0.id="70788610";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1344839449690;s0.mainComId="-1";s0.moveFrom="iphone";s0.popup=false;s0.publishTime=1344839449690;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=186541395;s0.publisherName="ni_chen";s0.publisherNickname="Neysa";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName="";s0.replyToUserNick="";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0; #sample 4: #s0.cardId="131435017";s0.content="\u54C8\u54C8\uFF0C\u4FFA\u662F\u7B14\u8FF9\u63A7";s0.id="70788610";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1344839449690;s0.mainComId="-1";s0.moveFrom="iphone";s0.popup=false;s0.publishTime=1344839449690;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=186541395;s0.publisherName="ni_chen";s0.publisherNickname="Neysa";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName="";s0.replyToUserNick="";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0; #sample 5: #s0.cardId="111039854";s0.content="\u65B0\u53D1\u578B\u771F\u5F97\u5F88\u6F02\u4EAE\u554A\u2026\u2026\u53EF\u4EE5\u4F20\u4E00\u7EC4\u7167\u7247\u8BA9\u59D0\u59D0\u770B\u770B\u5417\uFF1F";s0.id="58333672";s0.ip=null;s0.ipName=null;s0.lastUpdateTime=1251121541764;s0.mainComId="-1";s0.moveFrom="";s0.popup=false;s0.publishTime=1251121541764;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=26959367;s0.publisherName="chenyuanyuan0913";s0.publisherNickname="\u6C89\u7F18\u6E90";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=0;s0.replyToUserName=null;s0.replyToUserNick=null;s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0; #cardId #s0.cardId="134875456"; foundCardId = re.search(strSn + '\.cardId="?(?P<cardId>.*?)"?;' , singleCmtDwrStr); cardId = foundCardId.group( "cardId" ); curCmtDict[ 'cardId' ] = cardId; logging.debug( "cardId=%s" , cardId); #ip #s0.ip="203.234.215.66"; #s0.ip=null; foundIp = re.search(strSn + '\.ip="?(?P<ip>.*?)"?;' , singleCmtDwrStr); ip = foundIp.group( "ip" ); if ( not re.search( "\d+\.\d+\.\d+\.\d+" , ip)): ip = ""; curCmtDict[ 'ip' ] = ip; logging.debug( "ip=%s" , ip); #ipName #s0.ipName=null; foundIpName = re.search(strSn + '\.ipName=(?P<ipName>.+?);' , singleCmtDwrStr); ipName = foundIpName.group( "ipName" ); curCmtDict[ 'ipName' ] = ipName; logging.debug( "ipName=%s" , ipName); #lastUpdateTime #s0.lastUpdateTime=1351380367156; foundLastUpdateTime = re.search(strSn + '\.lastUpdateTime=(?P<lastUpdateTime>\d+);' , singleCmtDwrStr); lastUpdateTime = foundLastUpdateTime.group( "lastUpdateTime" ); curCmtDict[ 'lastUpdateTime' ] = lastUpdateTime; logging.debug( "lastUpdateTime=%s" , lastUpdateTime); #mainComId #s0.mainComId="-1"; foundMainComId = re.search(strSn + '\.mainComId="?(?P<mainComId>.*?)"?;' , singleCmtDwrStr); mainComId = foundMainComId.group( "mainComId" ); curCmtDict[ 'mainComId' ] = mainComId; logging.debug( "mainComId=%s" , mainComId); #popup #s0.popup=false; foundPopup = re.search(strSn + '\.popup=(?P<popup>.+?);' , singleCmtDwrStr); popup = foundPopup.group( "popup" ); curCmtDict[ 'popup' ] = popup; logging.debug( "popup=%s" , popup); #publisherAvatar #s0.publisherAvatar=0; foundPublisherAvatar = re.search(strSn + '\.publisherAvatar=(?P<publisherAvatar>\d+);' , singleCmtDwrStr); publisherAvatar = foundPublisherAvatar.group( "publisherAvatar" ); curCmtDict[ 'publisherAvatar' ] = publisherAvatar; logging.debug( "publisherAvatar=%s" , publisherAvatar); #publisherAvatarUrl #s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg"; #s2.publisherAvatarUrl=null; foundPublisherAvatarUrl = re.search(strSn + '\.publisherAvatarUrl="?(?P<publisherAvatarUrl>.*?)"?;' , singleCmtDwrStr); publisherAvatarUrl = foundPublisherAvatarUrl.group( "publisherAvatarUrl" ); curCmtDict[ 'publisherAvatarUrl' ] = publisherAvatarUrl; logging.debug( "publisherAvatarUrl=%s" , publisherAvatarUrl); #publisherId #s0.publisherId=55976067; foundPublisherId = re.search(strSn + '\.publisherId=(?P<publisherId>\d+);' , singleCmtDwrStr); publisherId = foundPublisherId.group( "publisherId" ); curCmtDict[ 'publisherId' ] = publisherId; logging.debug( "publisherId=%s" , publisherId); #publisherName #s0.publisherName="chenlin198412@126"; foundPublisherName = re.search(strSn + '\.publisherName="?(?P<publisherName>.*?)"?;' , singleCmtDwrStr); publisherName = foundPublisherName.group( "publisherName" ); curCmtDict[ 'publisherName' ] = publisherName; logging.debug( "publisherName=%s" , publisherName); #publisherNickname #s0.publisherNickname="Lynn"; foundPublisherNickname = re.search(strSn + '\.publisherNickname="?(?P<publisherNickname>.*?)"?;' , singleCmtDwrStr); publisherNickname = foundPublisherNickname.group( "publisherNickname" ); publisherNicknameUni = publisherNickname.decode( 'unicode-escape' ); curCmtDict[ 'publisherNickname' ] = publisherNicknameUni; logging.debug( "publisherNickname=%s" , publisherNickname); #publisherUrl #s0.publisherUrl=null; foundPublisherUrl = re.search(strSn + '\.publisherUrl="?(?P<publisherUrl>.*?)"?;' , singleCmtDwrStr); publisherUrl = foundPublisherUrl.group( "publisherUrl" ); curCmtDict[ 'publisherUrl' ] = publisherUrl; logging.debug( "publisherUrl=%s" , publisherUrl); #replyComId #s0.replyComId="-1"; foundReplyComId = re.search(strSn + '\.replyComId="?(?P<replyComId>.*?)"?;' , singleCmtDwrStr); replyComId = foundReplyComId.group( "replyComId" ); curCmtDict[ 'replyComId' ] = replyComId; logging.debug( "replyComId=%s" , replyComId); #replyToUserId #s0.replyToUserId=186541395; foundReplyToUserId = re.search(strSn + '\.replyToUserId=(?P<replyToUserId>\d+);' , singleCmtDwrStr); replyToUserId = foundReplyToUserId.group( "replyToUserId" ); curCmtDict[ 'replyToUserId' ] = replyToUserId; logging.debug( "replyToUserId=%s" , replyToUserId); #replyToUserName #s0.replyToUserName="ni_chen"; #s0.replyToUserName=""; #s0.replyToUserName=null; foundReplyToUserName = re.search(strSn + '\.replyToUserName="?(?P<replyToUserName>.*?)"?;' , singleCmtDwrStr); replyToUserName = foundReplyToUserName.group( "replyToUserName" ); curCmtDict[ 'replyToUserName' ] = replyToUserName; logging.debug( "replyToUserName=%s" , replyToUserName); #replyToUserNick #s0.replyToUserNick="Neysa"; #s0.replyToUserNick=null; foundReplyToUserNick = re.search(strSn + '\.replyToUserNick="?(?P<replyToUserNick>.*?)"?;' , singleCmtDwrStr); replyToUserNick = foundReplyToUserNick.group( "replyToUserNick" ); curCmtDict[ 'replyToUserNick' ] = replyToUserNick; logging.debug( "replyToUserNick=%s" , replyToUserNick); #spam #s0.spam=0; foundSpam = re.search(strSn + '\.spam=(?P<spam>\d+);' , singleCmtDwrStr); spam = foundSpam.group( "spam" ); curCmtDict[ 'spam' ] = spam; logging.debug( "spam=%s" , spam); #subComments #s0.subComments=s1; foundSubComments = re.search(strSn + '\.subComments=(?P<subComments>.+?);' , singleCmtDwrStr); subComments = foundSubComments.group( "subComments" ); curCmtDict[ 'subComments' ] = subComments; logging.debug( "subComments=%s" , subComments); #valid #s0.valid=0; foundValid = re.search(strSn + '\.valid=(?P<valid>\d+);' , singleCmtDwrStr); valid = foundValid.group( "valid" ); curCmtDict[ 'valid' ] = valid; logging.debug( "valid=%s" , valid); else : #process main comment remaing field #main comment dwr string: #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; #s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa"; #s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa"; #s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa"; #commentCount #s0.commentCount=0; foundCommentCount = re.search(strSn + '\.commentCount=(?P<commentCount>\d+);' , singleCmtDwrStr); commentCount = foundCommentCount.group( "commentCount" ); curCmtDict[ 'commentCount' ] = commentCount; logging.debug( "commentCount=%s" , commentCount); #mainCommentCount #s0.mainCommentCount=0; foundMainCommentCount = re.search(strSn + '\.mainCommentCount=(?P<mainCommentCount>\d+);' , singleCmtDwrStr); mainCommentCount = foundMainCommentCount.group( "mainCommentCount" ); curCmtDict[ 'mainCommentCount' ] = mainCommentCount; logging.debug( "mainCommentCount=%s" , mainCommentCount); #moodType #s0.moodType=0; foundMoodType = re.search(strSn + '\.moodType=(?P<moodType>\d+);' , singleCmtDwrStr); moodType = foundMoodType.group( "moodType" ); curCmtDict[ 'moodType' ] = moodType; logging.debug( "moodType=%s" , moodType); #userAvatar #s0.userAvatar=0; foundUserAvatar = re.search(strSn + '\.userAvatar=(?P<userAvatar>\d+);' , singleCmtDwrStr); userAvatar = foundUserAvatar.group( "userAvatar" ); curCmtDict[ 'userAvatar' ] = userAvatar; logging.debug( "userAvatar=%s" , userAvatar); #userAvatarUrl #s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg"; foundUserAvatarUrl = re.search(strSn + '\.userAvatarUrl="?(?P<userAvatarUrl>http://.+?)"?;' , singleCmtDwrStr); userAvatarUrl = foundUserAvatarUrl.group( "userAvatarUrl" ); curCmtDict[ 'userAvatarUrl' ] = userAvatarUrl; logging.debug( "userAvatarUrl=%s" , userAvatarUrl); #userName #s0.userName="ni_chen"; foundUserName = re.search(strSn + '\.userName="?(?P<userName>.+?)"?;' , singleCmtDwrStr); userName = foundUserName.group( "userName" ); curCmtDict[ 'userName' ] = userName; logging.debug( "userName=%s" , userName); #userNickname #s0.userNickname="Neysa"; foundUserNickname = re.search(strSn + '\.userNickname="?(?P<userNickname>.+?)"?;' , singleCmtDwrStr); userNickname = foundUserNickname.group( "userNickname" ); curCmtDict[ 'userNickname' ] = userNickname; logging.debug( "userNickname=%s" , userNickname); return curCmtDict; def parseSubCmtDwrStrToSubCmtDictList(subCmtDwrStr): """ parse sub comment dwr string to sub comment dict list split to single sub comment dwr string list convert each sub comment dwr string to dict """ subCmtDictList = []; # //#DWR-INSERT # //#DWR-REPLY # var s0={};var s1=[];s0.cardId="134875456";s0.content="\u81EA\u5DF1\u4E70\u70B9\u6C34\u679C\u5403\u3002";s0.id="73300019";s0.ip="203.234.215.66";s0.ipName=null;s0.lastUpdateTime=1351380367156;s0.mainComId="-1";s0.moveFrom=null;s0.popup=false;s0.publishTime=1351380367155;s0.publisherAvatar=0;s0.publisherAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.publisherId=55976067;s0.publisherName="chenlin198412@126";s0.publisherNickname="Lynn";s0.publisherUrl=null;s0.replyComId="-1";s0.replyToUserId=186541395;s0.replyToUserName="ni_chen";s0.replyToUserNick="Neysa";s0.spam=0;s0.subComments=s1;s0.synchMiniBlog=false;s0.userId=186541395;s0.valid=0; # dwr.engine._remoteHandleCallback('1','0',[s0]); subCmtStrList = re.findall(r 's\d+\.cardId=.+?s\d+\.valid=\d+;(?:\s)' , subCmtDwrStr); #logging.info("subCmtStrList=%s", subCmtStrList); logging.debug( "len(subCmtStrList)=%d" , len (subCmtStrList)); if (subCmtStrList): for singleSubCmtDwrStr in subCmtStrList: singleSubCmtDict = parseSingleDwrStrToCmtDict(singleSubCmtDwrStr); subCmtDictList.append(singleSubCmtDict); return subCmtDictList; def parseMainCmtDwrStrToMainCmtDictList(respDwrReplyStr): """ Parse main comment response DWR-REPLY string, into comment dict list """ commentDictList = []; #s0.commentCount=0;s0.content="\u7EC8\u4E8E\u6709iphone\u7248\u7684\u4E86";s0.id="148749270";s0.mainCommentCount=0;s0.moodType=0;s0.moveFrom="iphone";s0.publishTime=1374626867596;s0.synchMiniBlog=-1;s0.userAvatar=0;s0.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s0.userId=186541395;s0.userName="ni_chen";s0.userNickname="Neysa"; #s1.commentCount=1;s1.content="\u4E3B\u9875\u7EC8\u4E8E\u7545\u901A\u4E86\uFF0C\u4FFA\u53C8\u8981\u632A\u7A9D\u4E86[P]\u5FAE\u7B11[/P]";s1.id="134875456";s1.mainCommentCount=1;s1.moodType=1;s1.moveFrom=null;s1.publishTime=1350461318140;s1.synchMiniBlog=-1;s1.userAvatar=0;s1.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s1.userId=186541395;s1.userName="ni_chen";s1.userNickname="Neysa"; #s2.commentCount=0;s2.content="BK\u529E\u516C\u5BA4\u5927\u5988\u771F\u662F\u540D\u4E0D\u865A\u4F20\uFF0C\u5976\u5976\u7684\u4ECA\u5929\u8FD8\u4E0D\u7ED3\u675F\u6211\u8981\u6297\u8BAE\u4E86";s2.id="134892431";s2.mainCommentCount=0;s2.moodType=1;s2.moveFrom=null;s2.publishTime=1350454487895;s2.synchMiniBlog=-1;s2.userAvatar=0;s2.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s2.userId=186541395;s2.userName="ni_chen";s2.userNickname="Neysa"; #s3.commentCount=0;s3.content="\u7814\u7A76\u5BA4\u5C0F\u5B69\u8981\u53BB\u89C1IU\uFF0C\u8FD8\u9884\u7EA6\u4E86\u76AE\u80A4\u7BA1\u7406\uFF0C\u4E24\u4E2A\u4EBA\u5728\u90A3\u5600\u5495\u201C\u8FD9\u6837\u62FF\u4E0D\u5230IU\u7B7E\u540D\u7684.....\u201D[P]\u5931\u671B[/P][P]\u5931\u671B[/P]";s3.id="134892313";s3.mainCommentCount=0;s3.moodType=1;s3.moveFrom=null;s3.publishTime=1350443478140;s3.synchMiniBlog=-1;s3.userAvatar=0;s3.userAvatarUrl="http://img.bimg.126.net/photo/hmZoNQaqzZALvVp0rE7faA==/0.jpg";s3.userId=186541395;s3.userName="ni_chen";s3.userNickname="Neysa"; mainCmtDwrStrList = []; #mainCmtDwrStrList = re.findall(r'(?:s\d+)\.commentCount=.+?\1\.userNickname=".+?";', respDwrReplyStr); mainCmtDwrStrList = re.findall(r 's\d+\.commentCount=.+?s\d+\.userNickname=".+?";(?:\s)' , respDwrReplyStr); #logging.info("mainCmtDwrStrList=%s", mainCmtDwrStrList); logging.debug( "len(mainCmtDwrStrList)=%d" , len (mainCmtDwrStrList)); if (mainCmtDwrStrList): for eachMainCmtDwrStr in mainCmtDwrStrList: #parse each main comment string into comment dict singleMainCmtDict = parseSingleDwrStrToCmtDict(eachMainCmtDwrStr); #add single comment dict into list commentDictList.append(singleMainCmtDict); return commentDictList; #------------------------------------------------------------------------------ def fillComments_fellingCard(destCmtDict, srcCmtDict): """ fill source comments dictionary into destination comments dictionary note: here srcCmtDict may be is main comment dict or sub comment dict """ logging.debug( "--------- source comment: idx=%d, num=%d ---------" , srcCmtDict[ 'curCmtIdx' ], srcCmtDict[ 'curCmtNum' ]); #for item in srcCmtDict.items() : # logging.debug("%s", item); destCmtDict[ 'id' ] = srcCmtDict[ 'curCmtNum' ]; if (srcCmtDict[ 'isSubComment' ]): destCmtDict[ 'author' ] = srcCmtDict[ 'publisherNickname' ]; else : destCmtDict[ 'author' ] = srcCmtDict[ 'userNickname' ]; #logging.info("done for author"); if (srcCmtDict[ 'isSubComment' ]): destCmtDict[ 'author_email' ] = srcCmtDict[ 'publisherName' ]; #s0.publisherName="chenlin198412@126"; else : destCmtDict[ 'author_email' ] = ""; #logging.info("done for author_email"); if (srcCmtDict[ 'isSubComment' ]): destCmtDict[ 'author_url' ] = saxutils.escape(genNeteaseUserUrl(srcCmtDict[ 'publisherName' ])); else : destCmtDict[ 'author_url' ] = saxutils.escape(gVal[ 'blogEntryUrl' ]); #logging.info("done for author_url"); if (srcCmtDict[ 'isSubComment' ]): destCmtDict[ 'author_IP' ] = srcCmtDict[ 'ip' ]; else : destCmtDict[ 'author_IP' ] = ""; #logging.info("done for author_IP"); # method 1: #epoch1000 = srcCmtDict['publishTime'] #epoch = float(epoch1000) / 1000 #localTime = time.localtime(epoch) #gmtTime = time.gmtime(epoch) # method 2: #s0.publishTime=1374626867596; #s4.publishTime=1348561288458; publishTimeStr = srcCmtDict[ 'publishTime' ]; #logging.info("publishTimeStr=%s", publishTimeStr); publishTimeStrInt = int (publishTimeStr); publishTimeStrIntSec = publishTimeStrInt / 1000 ; publishTimeStrIntSecStr = str (publishTimeStrIntSec); localTime = crifanLib.timestampToDatetime(publishTimeStrIntSecStr); #logging.info("localTime=%s", localTime); #pubTimeStr = srcCmtDict['shortPublishDateStr'] + " " + srcCmtDict['publishTimeStr']; #localTime = datetime.strptime(pubTimeStr, "%Y-%m-%d %H:%M:%S"); gmtTime = crifanLib.convertLocalToGmt(localTime); destCmtDict[ 'date' ] = localTime.strftime( "%Y-%m-%d %H:%M:%S" ); destCmtDict[ 'date_gmt' ] = gmtTime.strftime( "%Y-%m-%d %H:%M:%S" ); #logging.info("done for date and date_gmt"); # handle some speical condition #logging.debug("before decode, coment content:\n%s", srcCmtDict['content']); #cmtContent = srcCmtDict['content'].decode('unicode-escape'); # convert from \uXXXX to character cmtContent = srcCmtDict[ 'content' ]; #logging.debug("after decode, coment content:\n%s", cmtContent); destCmtDict[ 'content' ] = cmtContent; #logging.info("done for content"); destCmtDict[ 'approved' ] = 1 ; destCmtDict[ 'type' ] = ''; destCmtDict[ 'parent' ] = srcCmtDict[ 'parentCmtNum' ]; destCmtDict[ 'user_id' ] = 0 ; logging.debug( "author=%s" , destCmtDict[ 'author' ]); logging.debug( "author_email=%s" , destCmtDict[ 'author_email' ]); logging.debug( "author_IP=%s" , destCmtDict[ 'author_IP' ]); logging.debug( "author_url=%s" , destCmtDict[ 'author_url' ]); logging.debug( "date=%s" , destCmtDict[ 'date' ]); logging.debug( "date_gmt=%s" , destCmtDict[ 'date_gmt' ]); logging.debug( "content=%s" , destCmtDict[ 'content' ]); logging.debug( "parent=%s" , destCmtDict[ 'parent' ]); return destCmtDict; #------------------------------------------------------------------------------ # fetch and parse comments # return the parsed dict value def fetchAndParseComments(url, html): cmtRespDictList = []; parsedCommentsList = []; if (url = = gVal[ 'special' ][ 'feelingCard' ][ 'url' ]): cmtRespDictList = fetchComments_feelingCard(); if (cmtRespDictList) : # got valid comments, now proess it for cmtDict in cmtRespDictList : comment = {}; #fill all comment field comment = fillComments_fellingCard(comment, cmtDict); parsedCommentsList.append(comment); else : #extract comments if exist soup = htmlToSoup(html); cmtRespDictList = fetchComments(url, soup); #logging.info("cmtRespDictList=%s", cmtRespDictList); if (cmtRespDictList) : # got valid comments, now proess it for cmtDict in cmtRespDictList : comment = {}; #fill all comment field comment = fillComments(comment, cmtDict); parsedCommentsList.append(comment); return parsedCommentsList; |
【总结】
其实分析逻辑,获取评论数据,不是很难。
麻烦的是写代码,去提取评论数据,比较琐碎,是个体力活。。。
关于抓取网易163博客的FeelingCard心情随笔的完整的代码,可以去看:
转载请注明:在路上 » 【记录】用Python解析网易163博客的心情随笔FeelingCard返回的DWR-REPLY数据