【背景】
之前写的,去模拟:
然后获得返回的jsonp字符串。
【scrape_menupix_com代码分享】
1.截图:
(1)运行效果:
返回的jsonp示例:
jsonp1358141152({"menuHtml" :"<script type='text/javascript'>\n\n ......"});
2.Python项目代码下载:
scrape_menupix_com_2013-01-14.7z
3.代码分享:
(1)scrape_menupix_com.py
#!/usr/bin/python # -*- coding: utf-8 -*- """ ------------------------------------------------------------------------------- Function: scrape menupix.com to got resp jsonp string https://www.elance.com/j/scrape-website/36786225/ Version: 2013-01-14 Author: Crifan Li Contact: https://www.crifan.com/about/me/ ------------------------------------------------------------------------------- """ #--------------------------------const values----------------------------------- gConst = { }; gCfg = { }; gVal = { }; #---------------------------------import--------------------------------------- import os; import re; import sys; sys.path.append("libs"); from BeautifulSoup import BeautifulSoup,Tag,CData; import crifanLib; import logging; # import urllib; # import json; # import csv; # import argparse; # import codecs; def main(): #init cookie crifanLib.initAutoHandleCookies(); prefUrl = "http://www.menupix.com/menudirectory/menu_link.php?mxresto_id="; idList = [ "201384", ]; for eachId in idList: wholeUrl = prefUrl + str(eachId); respHtml = crifanLib.getUrlRespHtml(wholeUrl); logging.debug("respHtml=%s", respHtml); #http://menus.singleplatform.co/restaurants/saketini/menu?apiKey=k47dex17opfs7y7nae9a6p8o0&v=2&callback=jsonp1358010219463 #var menuApi = new MenusApi("k47dex17opfs7y7nae9a6p8o0"); foundMenusApi = re.search('MenusApi\("(?P<menusApi>\w+)"\)', respHtml); logging.info("foundMenusApi=%s", foundMenusApi); if(foundMenusApi): menusApi = foundMenusApi.group("menusApi"); logging.info("menusApi=%s", menusApi); timeStamp10Digit = crifanLib.getCurTimestamp(); logging.info("timeStamp10Digit=%s", timeStamp10Digit); jsonp = "jsonp" + str(timeStamp10Digit); logging.info("jsonp=%s", jsonp); menusBaseUrl = "http://menus.singleplatform.co/restaurants/saketini/menu"; paraDict = { "apiKey" : menusApi, "v" : "2", "callback" : jsonp, }; menusWholeUrl = crifanLib.genFullUrl(menusBaseUrl, paraDict); logging.info("menusWholeUrl=%s", menusWholeUrl); menusRespHtml = crifanLib.getUrlRespHtml(menusWholeUrl); logging.info("menusRespHtml=%s", menusRespHtml); #here successfully got: #menusRespHtml=jsonp1358141152({"menuHtml" :"<script type='text/javascript'>\n\n ......"}); ############################################################################### if __name__=="__main__": scriptSelfName = crifanLib.extractFilename(sys.argv[0]); logging.basicConfig( level = logging.DEBUG, format = 'LINE %(lineno)-4d %(levelname)-8s %(message)s', datefmt = '%m-%d %H:%M', filename = scriptSelfName + ".log", filemode = 'w'); # define a Handler which writes INFO messages or higher to the sys.stderr console = logging.StreamHandler(); console.setLevel(logging.INFO); # set a format which is simpler for console use formatter = logging.Formatter('LINE %(lineno)-4d : %(levelname)-8s %(message)s'); # tell the handler to use this format console.setFormatter(formatter); logging.getLogger('').addHandler(console); try: main(); except: logging.exception("Unknown Error !"); raise;
【总结】
转载请注明:在路上 » 【代码分享】Python代码:scrape_menupix_com – 抓取menupix.com以获得返回的jsonp字符串