【背景】
之前写的,去模拟:
然后获得返回的jsonp字符串。
【scrape_menupix_com代码分享】
1.截图:
(1)运行效果:
返回的jsonp示例:
1 | jsonp1358141152({"menuHtml" :"< script type = 'text/javascript' >\n\n ......"}); |
2.Python项目代码下载:
scrape_menupix_com_2013-01-14.7z
3.代码分享:
(1)scrape_menupix_com.py
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 | #!/usr/bin/python # -*- coding: utf-8 -*- """ ------------------------------------------------------------------------------- Function: scrape menupix.com to got resp jsonp string Version: 2013-01-14 Author: Crifan Li Contact: https://www.crifan.com/about/me/ ------------------------------------------------------------------------------- """ #--------------------------------const values----------------------------------- gConst = { }; gCfg = { }; gVal = { }; #---------------------------------import--------------------------------------- import os; import re; import sys; sys.path.append( "libs" ); from BeautifulSoup import BeautifulSoup,Tag,CData; import crifanLib; import logging; # import urllib; # import json; # import csv; # import argparse; # import codecs; def main(): #init cookie crifanLib.initAutoHandleCookies(); idList = [ "201384" , ]; for eachId in idList: wholeUrl = prefUrl + str (eachId); respHtml = crifanLib.getUrlRespHtml(wholeUrl); logging.debug( "respHtml=%s" , respHtml); #var menuApi = new MenusApi("k47dex17opfs7y7nae9a6p8o0"); foundMenusApi = re.search( 'MenusApi\("(?P<menusApi>\w+)"\)' , respHtml); logging.info( "foundMenusApi=%s" , foundMenusApi); if (foundMenusApi): menusApi = foundMenusApi.group( "menusApi" ); logging.info( "menusApi=%s" , menusApi); timeStamp10Digit = crifanLib.getCurTimestamp(); logging.info( "timeStamp10Digit=%s" , timeStamp10Digit); jsonp = "jsonp" + str (timeStamp10Digit); logging.info( "jsonp=%s" , jsonp); paraDict = { "apiKey" : menusApi, "v" : "2" , "callback" : jsonp, }; menusWholeUrl = crifanLib.genFullUrl(menusBaseUrl, paraDict); logging.info( "menusWholeUrl=%s" , menusWholeUrl); menusRespHtml = crifanLib.getUrlRespHtml(menusWholeUrl); logging.info( "menusRespHtml=%s" , menusRespHtml); #here successfully got: #menusRespHtml=jsonp1358141152({"menuHtml" :"<script type='text/javascript'>\n\n ......"}); ############################################################################### if __name__ = = "__main__" : scriptSelfName = crifanLib.extractFilename(sys.argv[ 0 ]); logging.basicConfig( level = logging.DEBUG, format = 'LINE %(lineno)-4d %(levelname)-8s %(message)s' , datefmt = '%m-%d %H:%M' , filename = scriptSelfName + ".log" , filemode = 'w' ); # define a Handler which writes INFO messages or higher to the sys.stderr console = logging.StreamHandler(); console.setLevel(logging.INFO); # set a format which is simpler for console use formatter = logging.Formatter( 'LINE %(lineno)-4d : %(levelname)-8s %(message)s' ); # tell the handler to use this format console.setFormatter(formatter); logging.getLogger('').addHandler(console); try : main(); except : logging.exception( "Unknown Error !" ); raise ; |
【总结】
转载请注明:在路上 » 【代码分享】Python代码:scrape_menupix_com – 抓取menupix.com以获得返回的jsonp字符串