【背景】
折腾:
期间,用如下代码:
/** Get response from url */ public HttpResponse getUrlResponse( String url, List<NameValuePair> headerDict, List<NameValuePair> postDict, int timeout ) { // init HttpResponse response = null; HttpUriRequest request = null; DefaultHttpClient httpClient = new DefaultHttpClient(); //HttpParams headerParams = new HttpParams(); //HttpParams headerParams = new DefaultedHttpParams(headerParams, headerParams); //HttpParams headerParams = new BasicHttpParams(); BasicHttpParams headerParams = new BasicHttpParams(); //HttpConnectionParams. //default enable auto redirect headerParams.setParameter(CoreProtocolPNames.USER_AGENT, gUserAgent); headerParams.setParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE); if(postDict != null) { HttpPost postReq = new HttpPost(url); try{ HttpEntity postBodyEnt = new UrlEncodedFormEntity(postDict); postReq.setEntity(postBodyEnt); } catch(Exception e){ e.printStackTrace(); } request = postReq; } else { HttpGet getReq = new HttpGet(url); request = getReq; } if(headerParams != null) { //HttpProtocolParams.setUserAgent(headerParams, gUserAgent); //headerParams.setHeader(HttpMethodParams.USER_AGENT, gUserAgent); request.setParams(headerParams); } //request.setHeader("User-Agent", gUserAgent); try{ HttpContext localContext = new BasicHttpContext(); localContext.setAttribute(ClientContext.COOKIE_STORE, gCurCookieStore); response = httpClient.execute(request, localContext); //response HeaderGroup value: //[Via: 1.1 SC-SZ-06, Connection: Keep-Alive, Proxy-Connection: Keep-Alive, Content-Length: 11006, Expires: Tue, 17 Sep 2013 01:43:44 GMT, Date: Tue, 17 Sep 2013 01:43:44 GMT, Content-Type: text/html;charset=utf-8, Server: BWS/1.0, Cache-Control: private, BDPAGETYPE: 1, BDUSERID: 0, BDQID: 0xaaa869770d8d5dcd, Set-Cookie: BDSVRTM=2; path=/, Set-Cookie: H_PS_PSSID=3361_2777_1465_2975_3109; path=/; domain=.baidu.com, Set-Cookie: BAIDUID=C0C2EAA4B1805EF21EE097E2C6A3D448:FG=1; expires=Tue, 17-Sep-43 01:43:44 GMT; path=/; domain=.baidu.com, P3P: CP=" OTI DSP COR IVA OUR IND COM "] //gCurCookieStore (formatted ouput) value: /*{ [version: 0][name: BAIDUID][value: C0C2EAA4B1805EF21EE097E2C6A3D448:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 09:43:44 CST 2043]=java.lang.Object@55ba1c2b, [version: 0][name: BDSVRTM][value: 2][domain: www.baidu.com][path: /][expiry: null]=java.lang.Object@55ba1c2b, [version: 0][name: H_PS_PSSID][value: 3361_2777_1465_2975_3109][domain: .baidu.com][path: /][expiry: null]=java.lang.Object@55ba1c2b }*/ } catch (ClientProtocolException cpe) { // TODO Auto-generated catch block cpe.printStackTrace(); } catch (IOException ioe) { // TODO Auto-generated catch block ioe.printStackTrace(); } return response; }
和:
/** * [File] * EmulateLoginBaidu.java * * [Function] * Use Java code to emulate login baidu * * 【教程】模拟登陆百度之Java代码版 * https://www.crifan.com/emulate_login_baidu_use_java_code * * [Version] * v1.0 * 2013-09-16 * * [Note] * 1. need add apache http lib: * 【已解决】Eclipse的java代码出错:The import org.apache cannot be resolved * https://www.crifan.com/java_eclipse_the_import_org_apache_cannot_be_resolved/ * 2.need crifanLib.java * http://code.google.com/p/crifanlib/source/browse/trunk/java/crifanLib.java * * [History] * [v1.0] * 1. initial version. */ import java.io.IOException; import java.util.ArrayList; import java.util.Calendar; import java.util.Date; import java.util.GregorianCalendar; import java.util.Hashtable; import java.util.List; import java.util.Map; import java.util.Scanner; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.http.HttpResponse; import org.apache.http.NameValuePair; import org.apache.http.cookie.Cookie; import org.apache.http.impl.cookie.BasicClientCookie; import org.apache.http.impl.cookie.BasicClientCookie2; import org.apache.http.message.BasicNameValuePair; //import crifanLib; /** * @author CLi * */ public class EmulateLoginBaidu { static crifanLib crl; /** * @param args */ public static void main(String[] args) { crl = new crifanLib(); // TODO Auto-generated method stub EmulateLoginBaiduUsingJava(); } // emulate login baidu using java code public static void EmulateLoginBaiduUsingJava() { boolean bLoginBaiduOk = false; List<Cookie> curCookieList; //step1: login baidu, got cookie BAIDUID String strTokenValue = ""; boolean bGotCookieBaiduid = false; String strBaiduUrl = "http://www.baidu.com/"; HttpResponse baiduResp = crl.getUrlResponse(strBaiduUrl); List<Cookie> cookieList =crl.getCurCookieStore().getCookies(); crl.dbgPrintCookies(cookieList, strBaiduUrl); for(Cookie ck : cookieList) { String cookieName = ck.getName(); if(cookieName.equals("BAIDUID")) { bGotCookieBaiduid = true; } } if (bGotCookieBaiduid) { System.out.println("正确:已找到cookie BAIDUID"); } else { System.out.println("错误:没有找到cookie BAIDUID !"); } //step2: login, pass paras, extract resp cookie boolean bExtractTokenValueOK = false; if(bGotCookieBaiduid) { //https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true String getapiUrl = "https://passport.baidu.com/v2/api/?getapi&class=login&tpl=mn&tangram=true"; String getApiRespHtml = crl.getUrlRespHtml(getapiUrl); //bdPass.api.params.login_token='5ab690978812b0e7fbbe1bfc267b90b3'; //bdPass.api.params.login_token='3cf421493884e0fe9080593d05f4744f'; Pattern tokenValP = Pattern.compile("bdPass\\.api\\.params\\.login_token='(?<tokenVal>\\w+)';"); Matcher tokenValMatcher = tokenValP.matcher(getApiRespHtml); //boolean foundTokenValue = tokenValMatcher.matches(); boolean foundTokenValue = tokenValMatcher.find(); if(foundTokenValue) { strTokenValue = tokenValMatcher.group("tokenVal"); //3cf421493884e0fe9080593d05f4744f System.out.println("正确:找到 bdPass.api.params.login_token=" + strTokenValue); bExtractTokenValueOK = true; } else { System.out.println("错误:没找到bdPass.api.params.login_token !"); } } //step3: verify returned cookies if (bGotCookieBaiduid && bExtractTokenValueOK) { //do some workaround to makesure here cookie H_PS_PSSID not expire //[version: 0][name: H_PS_PSSID][value: 3359_3341_2776_1424_2981][domain: .baidu.com][path: /][expiry: null] //Date newExpiryDate = new Date("Thu Sep 17 14:22:08 CST 2043"); //Date newExpiryDate = new Date(); //Date newExpiryDate = new Date(2043, 9, 17); Date newExpiryDate = new Date(143, 9, 17); //Date newExpiryDate = new Date(2020, 9, 17); //Calendar newExpiryCalendar = new Calendar(); //Calendar newExpiryCalendar = new GregorianCalendar(); //Calendar newExpiryCalendar = new GregorianCalendar(2043, 9, 17); //Calendar newExpiryCalendar = new GregorianCalendar(2043, 9, 17, 14, 22, 8); BasicClientCookie hPsPssidCookie = null; //int hPsPssidCookieIdx = 0; curCookieList = crl.getCurCookieList(); for(Cookie ck : curCookieList) { if(ck.getName().equalsIgnoreCase("H_PS_PSSID")) { //hPsPssidCookieIdx = curCookieList.indexOf(ck); hPsPssidCookie = (BasicClientCookie) ck; hPsPssidCookie.setExpiryDate(newExpiryDate); ck = hPsPssidCookie; //break; } } crl.setCurCookieList(curCookieList); String staticPageUrl = "http://www.baidu.com/cache/user/html/jump.html"; //List<NameValuePair> headerDict = new List<NameValuePair>(); List<NameValuePair> postDict = new ArrayList<NameValuePair>(); //ArrayList<NameValuePair> headerDict = new ArrayList<NameValuePair>(); //postDict.Add("ppui_logintime", ""); postDict.add(new BasicNameValuePair("charset", "utf-8")); //postDict.add(new BasicNameValuePair("codestring", "")); //postDict.Add("", ""); postDict.add(new BasicNameValuePair("token", strTokenValue)); postDict.add(new BasicNameValuePair("isPhone", "false")); postDict.add(new BasicNameValuePair("index", "0")); //postDict.add(new BasicNameValuePair("u", "")); //postDict.add(new BasicNameValuePair("safeflg", "0")); postDict.add(new BasicNameValuePair("staticpage", staticPageUrl)); postDict.add(new BasicNameValuePair("loginType", "1")); postDict.add(new BasicNameValuePair("tpl", "mn")); postDict.add(new BasicNameValuePair("callback", "parent.bdPass.api.login._postCallback")); String strBaiduUsername = ""; String strBaiduPassword = ""; Scanner inputReader = new Scanner(System.in); System.out.println("Please Enter Your:" ); System.out.println("Baidu Username:" ); strBaiduUsername = inputReader.nextLine(); System.out.println("You Entered Username=" + strBaiduUsername); System.out.println("Baidu Password:" ); strBaiduPassword = inputReader.nextLine(); System.out.println("You Entered Password=" + strBaiduPassword); postDict.add(new BasicNameValuePair("verifycode", "")); postDict.add(new BasicNameValuePair("mem_pass", "on")); String baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login"; String loginBaiduRespHtml = crl.getUrlRespHtml(baiduMainLoginUrl, null, postDict); /* List<NameValuePair> cookieNamePairList = new ArrayList<NameValuePair>(); cookieNamePairList.add(new BasicNameValuePair("BDUSS", false)); cookieNamePairList.add(new BasicNameValuePair("PTOKEN", false)); cookieNamePairList.add(new BasicNameValuePair("STOKEN", false)); cookieNamePairList.add(new BasicNameValuePair("SAVEUSERID", false));*/ //Map cookieNameDict = new Map(); Map cookieNameDict = new Hashtable(); //Map<Object, Object> cookieNameDict = new Hashtable<Object, Object>; cookieNameDict.put("BDUSS", false); cookieNameDict.put("PTOKEN", false); cookieNameDict.put("STOKEN", false); cookieNameDict.put("SAVEUSERID", false); curCookieList = crl.getCurCookieList(); for(Object objCookieName : cookieNameDict.keySet().toArray()) { String strCookieName = objCookieName.toString(); for(Cookie ck: curCookieList) { if(strCookieName.equalsIgnoreCase(ck.getName())) { cookieNameDict.put(strCookieName, true); } } } boolean bAllCookiesFound = true; for (Object objFoundCurCookie : cookieNameDict.values()) { bAllCookiesFound = bAllCookiesFound && Boolean.parseBoolean(objFoundCurCookie.toString()); } bLoginBaiduOk = bAllCookiesFound; if (bLoginBaiduOk) { System.out.println("成功模拟登陆百度首页!" ); } else { System.out.println("模拟登陆百度首页 失败!"); System.out.println("所返回的HTML源码为:" + loginBaiduRespHtml); } } return; } /* public EmulateLoginBaidu() { //EmulateLoginBaiduUsingJava(); }*/ }
始终还是无法获得对应的返回的html和cookie,从而无法正常模拟登陆。
【折腾过程】
1.很是怀疑,是不是其中的自动跳转,没有工作。
即:
headerParams.setParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE);
没有起到对应的自动跳转的效果,即:
正常,自动跳转后的结果,应该是:
{Connection: keep-alive Pragma: public Content-Length: 515 Cache-Control: public Content-Type: text/html Date: Tue, 17 Sep 2013 06:20:24 GMT Expires: 0 ETag: w/"iIuDLhaYSmAaLXqM5VeFg8Iy2wvCnATm:1379398824" Last-Modified: Tue, 17 Sep 2013 06:20:24 6SepGMT P3P: CP=" OTI DSP COR IVA OUR IND COM " Set-Cookie: BDUSS=43TERMeExpdWVQaUtVLVYxZG4tU0JZSHpiWTJBTVVFVmhqc1QydElqbW9nVjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKj0N1Ko9DdSLX; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=baidu.com; httponly,PTOKEN=deleted; expires=Mon, 17-Sep-2012 06:20:23 GMT; path=/; domain=baidu.com; httponly,PTOKEN=a530e5fed0d27f1600c5be851accbedb; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly,STOKEN=964d5d78f925418579c09f050b76354b; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly,SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 06:20:23 GMT; path=/; domain=passport.baidu.com; httponly,USERNAMETYPE=1; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly Server: Vary: Accept-Encoding Content-Encoding: }
而现在,返回的结果,是没有跳转时,所返回的结果:
{Transfer-Encoding: chunked Connection: keep-alive Pragma: public Vary: Accept-Encoding Content-Encoding: Cache-Control: public Content-Type: text/html Date: Tue, 17 Sep 2013 08:02:32 GMT Expires: 0 ETag: w/"TL3qSUIdbJn8y4PtWqjPr1EFB8W22Q1g:1379404952" Last-Modified: Tue, 17 Sep 2013 08:02:32 8SepGMT P3P: CP=" OTI DSP COR IVA OUR IND COM " Server: }
2.参考:
How auto redirect in HttpClient (java, apache)
说是:
HttpClient 3.x不支持自动跳转
HttpClient 4.x 就支持了。
3.我这里的HttpClient,是之前:
【已解决】Eclipse的java代码出错:The import org.apache cannot be resolved
中加的,去看了下,的确已经是4.2.5,是支持的了。
4.看到:
已不推荐再使用ClientPNames了,改推荐:
但是好像是4.3的,我此处,用的是4.2的库,应该是不支持的。
5.我此处调试的时候,其实可以看到对应的设置的:
{http.useragent=Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E, http.protocol.handle-redirects=true} |
但是,现实的返回结果,貌似是没有自动跳转的。
6.再继续调试,经过:
request.setParams(headerParams);
是可以将对应的上面的header信息,设置给request的:
7.后来继续调试,发现好像不是自动跳转的问题。
貌似是cookie的问题。
正常的话的,C#中,访问:
https://passport.baidu.com/v2/api/?login |
之前的cookie是:
而此处,调试看到的cookie却是:
[ [version: 0][name: BAIDUID][value: 8EFD622E00F965E706C2F4CC3A392AF9:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 16:38:11 CST 2043], [version: 0][name: BDSVRTM][value: 1][domain: www.baidu.com][path: /][expiry: null], [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 16:38:12 CST 2021], [version: 0][name: H_PS_PSSID][value: 3361_3381_1447_2976][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043] ]
很明显,其中的:
- BDSVRTM:
- domain不对
- expiry不对
- H_PS_PSSID:
- 已经经过上述手动加代码,修复了不对的expiry
所以,去手动添加代码修正这些cookie:
BasicClientCookie hPsPssidCookie = null; BasicClientCookie dbsvrtmCookie = null; //int hPsPssidCookieIdx = 0; curCookieList = crl.getCurCookieList(); for(Cookie ck : curCookieList) { if(ck.getName().equalsIgnoreCase("H_PS_PSSID")) { //hPsPssidCookieIdx = curCookieList.indexOf(ck); hPsPssidCookie = (BasicClientCookie) ck; hPsPssidCookie.setExpiryDate(newExpiryDate); ck = hPsPssidCookie; //break; } if(ck.getName().equalsIgnoreCase("BDSVRTM")) { dbsvrtmCookie = (BasicClientCookie) ck; dbsvrtmCookie.setDomain(".baidu.com"); dbsvrtmCookie.setExpiryDate(newExpiryDate); ck = dbsvrtmCookie; //break; } } crl.setCurCookieList(curCookieList);
修改后为:
[ [version: 0][name: BAIDUID][value: B3CCB8F5D6DA04D26A3CF718FDE8A753:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 16:57:38 CST 2043], [version: 0][name: BDSVRTM][value: 1][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043], [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 16:57:39 CST 2021], [version: 0][name: H_PS_PSSID][value: 3359_3381_1422_2975_2981_3092][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043] ]
看看结果如何。
结果问题依旧。
8.但是,注意到此处的输出的html是:
<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <script type="text/javascript"> var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?callback=parent.bdPass.api.login._postCallback&index=0&codestring=captchaservice33303435645673725939414876674e4464472b634173726b77736e62576b674e6f7071306c7074722f39346645676b58634b496e52492b5849394e77744d75554648724931322b7378416873447777766f6853372f4a6e6f572f5555707047635035626746797679386a364269555979625874444a67347a686e454f567171755a79507658386e424d6533754e7a59516b654334507a78492b695a664d784c765945426e584d525764305150666837645a4265324f434e334b796a704c43464966654b527551506e5a737541494d5751526c38566176556f304634307a61334c30376a4a5452756a68614e6f5775785849474c66634a565a67497a3337454a48524c336b564d526a686d506c49413558462b39446933653142627746474338&username=&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=&gotourl=&auth=&error=257'); //parent.callback(url) window.location.replace(url); </script> </body> </html>
看起来,貌似右向正常的返回的html。
所以,真的再次去C#项目中,设置autoredirect为false,看看其返回的结果如何。
结果是:
C#中,如果也是设置autoredirect为false的话,那么返回的内容,也是:
<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <script type="text/javascript"> var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?callback=parent.bdPass.api.login._postCallback&index=0&codestring=captchaservice64346332423033614479666e5a4c6f336a67534864484939683445457846644676452f4734437858723973515236505979696c4d7a726a385447706a6b7349497055316c426e4a577335553945432b3452433361374b325a475a6863396d496e7866477a5771524a4a734457795147442f3445704d533071747a6633386e70543651785663725837515344316b4a74423473614447676653547a7461596b6c726c75334a624d38556a3679706f4d48567875736565624b32584f796d476e646f363179486b426831767745443946397635725250516e305552417432663566486a6176787675757070636a4d53452b3575796a434434456d4b6b4a56305837594b6c41725965636974794a59494c3848423858413771325542657a6a393464326a5973&username=xxx&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=&gotourl=&auth=&error=257'); //parent.callback(url) window.location.replace(url); </script> </body> </html>
但是,注意到,此处C#中,获得的html中,是有:
username=xxx
的,而java中返回的html中是没有的。
9.然后想到,会不会是由于connection不是keep-alive而导致的?
所以,去确认java中的connection,的确是keep-alive
参考:
Tomcat, HTTP Keep-Alive and Java’s HttpsUrlConnection
【整理】关于Java中的httpClient中可以传入的参数
而加入了:
headerParams.setParameter(CoreConnectionPNames.SO_KEEPALIVE, Boolean.TRUE);
试试效果。
结果问题依旧,返回的html中,还是没有username:
<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <script type="text/javascript"> var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?callback=parent.bdPass.api.login._postCallback&index=0&codestring=captchaservice6330386433734b6a45483661695042706663673772305a65396d2b6261636753473545314b41795932745136774576595746576a465557376a2f513332523230596f675a68434b63553738734337645567664643304878665839306e56696d74335535415637716c437379376b2f496f3451626e492f4d50684e4f616d59316b346131476e6163366f6945764d7a476a7976455543614954533066307a59333555646f6a46594f616f3078324b42585870515866496f5473676b3632566e4d36732b47682b494342314837702f572f38635757636f6a4a6e7845564a6f6c68416377596b362b725a2b314742475562435a4844315a376f4b75555942416f7239716f4b6a5762514d516b7a453749484d4766426e3044424f7462615169494d35&username=&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=&gotourl=&auth=&error=257'); //parent.callback(url) window.location.replace(url); </script> </body> </html>
另外,参考:
What is HTTP Persistent Connections?
其实已经说是默认是true了。
10.另外,注意到,之前C#正常的时候,cookie中的:
BDSVRTM的值是2
而此处的java的
BDSVRTM的值,始终是1
所以,要去研究一下,原先的C#中,什么时候得到的BDSVRTM是2的。
调试C#项目,确定了:
autoredirect为false的时候,用某个账户(3),也是可以正常模拟登陆的;
且BDSVRTM值也是1,且返回的html也是上述的html,但是其中是包含username的。
所以,再去java中看看。
结果java中,还是返回html中不带username的。
还是无法获得对应的cookie。
11.然后的然后,去检查自己代码,看看post data中是否少传了什么参数,结果发现一个惊天大问题:
之前只注意去java中从控制台中获得用户名和密码,结果竟然忘了将用户名和密码传入到post data中。。。。
你妹的,这个也太疏忽了。。。
所以,去加上,变成:
String staticPageUrl = "http://www.baidu.com/cache/user/html/jump.html"; //List<NameValuePair> headerDict = new List<NameValuePair>(); List<NameValuePair> postDict = new ArrayList<NameValuePair>(); //ArrayList<NameValuePair> headerDict = new ArrayList<NameValuePair>(); //postDict.Add("ppui_logintime", ""); postDict.add(new BasicNameValuePair("charset", "utf-8")); //postDict.add(new BasicNameValuePair("codestring", "")); //postDict.Add("", ""); postDict.add(new BasicNameValuePair("token", strTokenValue)); postDict.add(new BasicNameValuePair("isPhone", "false")); postDict.add(new BasicNameValuePair("index", "0")); //postDict.add(new BasicNameValuePair("u", "")); //postDict.add(new BasicNameValuePair("safeflg", "0")); postDict.add(new BasicNameValuePair("staticpage", staticPageUrl)); postDict.add(new BasicNameValuePair("loginType", "1")); postDict.add(new BasicNameValuePair("tpl", "mn")); postDict.add(new BasicNameValuePair("callback", "parent.bdPass.api.login._postCallback")); String strBaiduUsername = ""; String strBaiduPassword = ""; Scanner inputReader = new Scanner(System.in); System.out.println("Please Enter Your:" ); System.out.println("Baidu Username:" ); strBaiduUsername = inputReader.nextLine(); System.out.println("You Entered Username=" + strBaiduUsername); System.out.println("Baidu Password:" ); strBaiduPassword = inputReader.nextLine(); System.out.println("You Entered Password=" + strBaiduPassword); postDict.add(new BasicNameValuePair("username", strBaiduUsername)); postDict.add(new BasicNameValuePair("password", strBaiduPassword)); postDict.add(new BasicNameValuePair("verifycode", "")); postDict.add(new BasicNameValuePair("mem_pass", "on")); String baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login"; String loginBaiduRespHtml = crl.getUrlRespHtml(baiduMainLoginUrl, null, postDict);
然后再去试试:
就可以了:
也可以返回cookie了:
[ [version: 0][name: BAIDUID][value: 2804E22D17A174919725D664400B91F5:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 17:39:10 CST 2043], [version: 0][name: BDSVRTM][value: 2][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043], [version: 0][name: BDUSS][value: hVT3RTT2pJcE45azE2R3dxbmVMVXBPaXBnQmlzLVhWNEkyZU5kaEVIaFhzRjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFcjOFJXIzhSb0][domain: baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021], [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:11 CST 2021], [version: 0][name: H_PS_PSSID][value: 3361_2776_1466_2976_2980_3092_3109][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043], [version: 0][name: PTOKEN][value: 7de481b4f02704457006d4a457041ceb][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021], [version: 0][name: STOKEN][value: e35d5d2ab6ad6300cbf92a47ddc90337][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021], [version: 0][name: USERNAMETYPE][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021] ]
返回的html中,也带username了:
<!DOCTYPE html> <html> <head> <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> </head> <body> <script type="text/javascript"> var url = encodeURI('http://www.baidu.com/cache/user/html/jump.html?hao123Param=aFZUM1JUVDJwSmNFNDVhekUyUjNkeGJtVk1WWEJQYVhCblFtbHpMVmhXTkVreVpVNWthRVZJYUZoelJqbFRRVkZCUVVGQkpDUUFBQUFBQUFBQUFBRUFBQUFaUDBnQ1lXZGhhVzVwYm5CMWRETUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFBQUFGY2pPRkpYSXpoU2Iw&callback=parent.bdPass.api.login._postCallback&index=0&codestring=&username=xxx&phonenumber=&mail=&tpl=mn&u=https%3A%2F%2Fpassport.baidu.com%2F&needToModifyPassword=0&gotourl=&auth=&error=0'); //parent.callback(url) window.location.replace(url); </script> </body> </html>
12.然后,此处,其实又出现一点点小意外:
java中,返回的cookie中,没有:SAVEUSERID
而C#中,返回的cookie中是有:SAVEUSERID的。
然后去看看获得的response,结果是,其中的cookie中,是有SAVEUSERID的:
Set-Cookie: BDUSS=stVkRscHpnak5yVXJLcFpUVENqczF1Q3NhbmltV2Rwc2V-bmp6Q3JzT2ZzVjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ8kOFKfJDhSaG; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=baidu.com; httponly, Set-Cookie: PTOKEN=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=baidu.com; httponly, Set-Cookie: PTOKEN=d75d78bd32c23c5cdb9c27b2f484ea9c; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly, Set-Cookie: STOKEN=75ba9addf1f921c25a64dd7f7d278d40; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly, Set-Cookie: SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=passport.baidu.com; httponly, Set-Cookie: USERNAMETYPE=1; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly
但是很明显,SAVEUSERID是deleted,
所以被java中的代码解析后,就被丢掉了。
所以,此处,java中,获得的CookieStore,是没有SAVEUSERID的。其实就是正常的现象了。
所以,java中,判断模拟登陆百度成功后的cookie的判断,就需要修改一下,去掉针对于SAVEUSERID的判断,变成:
//Map cookieNameDict = new Map(); Map cookieNameDict = new Hashtable(); //Map<Object, Object> cookieNameDict = new Hashtable<Object, Object>; cookieNameDict.put("BDUSS", false); cookieNameDict.put("PTOKEN", false); cookieNameDict.put("STOKEN", false); //cookieNameDict.put("SAVEUSERID", false); curCookieList = crl.getCurCookieList(); for(Object objCookieName : cookieNameDict.keySet().toArray()) { String strCookieName = objCookieName.toString(); for(Cookie ck: curCookieList) { if(strCookieName.equalsIgnoreCase(ck.getName())) { cookieNameDict.put(strCookieName, true); } } } boolean bAllCookiesFound = true; for (Object objFoundCurCookie : cookieNameDict.values()) { bAllCookiesFound = bAllCookiesFound && Boolean.parseBoolean(objFoundCurCookie.toString()); } bLoginBaiduOk = bAllCookiesFound; if (bLoginBaiduOk) { System.out.println("成功模拟登陆百度首页!" ); } else { System.out.println("模拟登陆百度首页 失败!"); System.out.println("所返回的HTML源码为:" + loginBaiduRespHtml); }
即可。
成功模拟登陆百度后的输出是:
Cookies for http://www.baidu.com/ [version: 0][name: BAIDUID][value: EB16BC3D9EB401EAD360D7F49C635382:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 17:49:06 CST 2043] [version: 0][name: BDSVRTM][value: 2][domain: www.baidu.com][path: /][expiry: null] [version: 0][name: H_PS_PSSID][value: 3360_3380_1466_2976][domain: .baidu.com][path: /][expiry: null] 正确:已找到cookie BAIDUID 正确:找到 bdPass.api.params.login_token=7744b9c37d759c47709e44b98527af0c Please Enter Your: Baidu Username: xxx Baidu Password: yyy 成功模拟登陆百度首页!
如图:
随后,再稍微整理一下代码去,即可。
【总结】
此处,java中模拟登陆百度访问login的url,没有获得所希望的返回的html和cookie,原因是:
自己粗心大意,导致忘了发送用户名和密码
所以必然登陆失败。
解决办法:
(1)传递对应的username和password,即可成功返回所希望的cookie;
(2)此处java中,将:
Set-Cookie: SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=passport.baidu.com; httponly,
解析后,丢掉了。
所以,java中,判断模拟登陆百度是否成功的检测cookie时,就不去判断对应的SAVEUSERID,只判断:BDUSS,PTOKEN,STOKEN即可。
(3)完整的,成功的,模拟登陆百度的代码,自己去看:
转载请注明:在路上 » 【已解决】Java中模拟登陆百度期间通过HttpClient无法获得所希望的返回的html和cookie