最新消息:20210816 当前crifan.com域名已被污染,为防止失联,请关注(页面右下角的)公众号

【已解决】Java中模拟登陆百度期间通过HttpClient无法获得所希望的返回的html和cookie

Java crifan 6018浏览 0评论

【背景】

折腾:

【教程】模拟登陆百度之Java代码版

期间,用如下代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
/** Get response from url  */
public HttpResponse getUrlResponse(
        String url,
        List<NameValuePair> headerDict,
        List<NameValuePair> postDict,
        int timeout
        )
{
    // init
    HttpResponse response = null;
    HttpUriRequest request = null;
    DefaultHttpClient httpClient = new DefaultHttpClient();
     
    //HttpParams headerParams = new HttpParams();
    //HttpParams headerParams = new DefaultedHttpParams(headerParams, headerParams);
    //HttpParams headerParams = new BasicHttpParams();
    BasicHttpParams headerParams = new BasicHttpParams();
    //HttpConnectionParams.
    //default enable auto redirect
    headerParams.setParameter(CoreProtocolPNames.USER_AGENT, gUserAgent);
    headerParams.setParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE);
     
    if(postDict != null)
    {
        HttpPost postReq = new HttpPost(url);
         
        try{
            HttpEntity postBodyEnt = new UrlEncodedFormEntity(postDict);
            postReq.setEntity(postBodyEnt);
        }
        catch(Exception e){
            e.printStackTrace();
        }
 
        request = postReq;
    }
    else
    {
        HttpGet getReq = new HttpGet(url);
         
        request = getReq;
    }
 
    if(headerParams != null)
    {
        //HttpProtocolParams.setUserAgent(headerParams, gUserAgent);
        //headerParams.setHeader(HttpMethodParams.USER_AGENT, gUserAgent);
        request.setParams(headerParams);
    }
     
    //request.setHeader("User-Agent", gUserAgent);
     
 
    try{           
        HttpContext localContext = new BasicHttpContext();
        localContext.setAttribute(ClientContext.COOKIE_STORE, gCurCookieStore);
        response = httpClient.execute(request, localContext);
         
        //response HeaderGroup value:
        //[Via: 1.1 SC-SZ-06, Connection: Keep-Alive, Proxy-Connection: Keep-Alive, Content-Length: 11006, Expires: Tue, 17 Sep 2013 01:43:44 GMT, Date: Tue, 17 Sep 2013 01:43:44 GMT, Content-Type: text/html;charset=utf-8, Server: BWS/1.0, Cache-Control: private, BDPAGETYPE: 1, BDUSERID: 0, BDQID: 0xaaa869770d8d5dcd, Set-Cookie: BDSVRTM=2; path=/, Set-Cookie: H_PS_PSSID=3361_2777_1465_2975_3109; path=/; domain=.baidu.com, Set-Cookie: BAIDUID=C0C2EAA4B1805EF21EE097E2C6A3D448:FG=1; expires=Tue, 17-Sep-43 01:43:44 GMT; path=/; domain=.baidu.com, P3P: CP=" OTI DSP COR IVA OUR IND COM "]
         
        //gCurCookieStore (formatted ouput) value:
        /*{
            [version: 0][name: BAIDUID][value: C0C2EAA4B1805EF21EE097E2C6A3D448:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 09:43:44 CST 2043]=java.lang.Object@55ba1c2b,
            [version: 0][name: BDSVRTM][value: 2][domain: www.baidu.com][path: /][expiry: null]=java.lang.Object@55ba1c2b,
            [version: 0][name: H_PS_PSSID][value: 3361_2777_1465_2975_3109][domain: .baidu.com][path: /][expiry: null]=java.lang.Object@55ba1c2b
        }*/
    } catch (ClientProtocolException cpe) {
        // TODO Auto-generated catch block
        cpe.printStackTrace();
    } catch (IOException ioe) {
        // TODO Auto-generated catch block
        ioe.printStackTrace();
    }
     
    return response;
}

和:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
/**
 * [File]
 * EmulateLoginBaidu.java
 *
 * [Function]
 * Use Java code to emulate login baidu
 *
 * 【教程】模拟登陆百度之Java代码版
 *
 * [Version]
 * v1.0
 * 2013-09-16
 *
 * [Note]
 * 1. need add apache http lib:
 * 【已解决】Eclipse的java代码出错:The import org.apache cannot be resolved
 * 2.need crifanLib.java
 *
 * [History]
 * [v1.0]
 * 1. initial version.
 */
 
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.Hashtable;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
 
import org.apache.http.HttpResponse;
import org.apache.http.NameValuePair;
import org.apache.http.cookie.Cookie;
import org.apache.http.impl.cookie.BasicClientCookie;
import org.apache.http.impl.cookie.BasicClientCookie2;
import org.apache.http.message.BasicNameValuePair;
 
//import crifanLib;
 
/**
 * @author CLi
 *
 */
public class EmulateLoginBaidu {
    static crifanLib crl;
 
    /**
     * @param args
     */
    public static void main(String[] args) {
        crl = new crifanLib();
                 
        // TODO Auto-generated method stub
        EmulateLoginBaiduUsingJava();
    }
 
    // emulate login baidu using java code 
    public static void EmulateLoginBaiduUsingJava()
    {
        boolean bLoginBaiduOk = false;
        List<Cookie> curCookieList;
         
        //step1: login baidu, got cookie BAIDUID
        String strTokenValue = "";
        boolean bGotCookieBaiduid = false;
        String strBaiduUrl = "http://www.baidu.com/";
        HttpResponse baiduResp = crl.getUrlResponse(strBaiduUrl);
         
        List<Cookie> cookieList =crl.getCurCookieStore().getCookies();
        crl.dbgPrintCookies(cookieList, strBaiduUrl);
        for(Cookie ck : cookieList)
        {
            String cookieName = ck.getName();
            if(cookieName.equals("BAIDUID"))
            {
                bGotCookieBaiduid = true;
            }
        }
        if (bGotCookieBaiduid)
        {
            System.out.println("正确:已找到cookie BAIDUID");
        }
        else
        {
            System.out.println("错误:没有找到cookie BAIDUID !");
        }
         
        //step2: login, pass paras, extract resp cookie
        boolean bExtractTokenValueOK = false;
        if(bGotCookieBaiduid)
        {
            String getApiRespHtml = crl.getUrlRespHtml(getapiUrl);
             
            //bdPass.api.params.login_token='5ab690978812b0e7fbbe1bfc267b90b3';
            //bdPass.api.params.login_token='3cf421493884e0fe9080593d05f4744f';
            Pattern tokenValP = Pattern.compile("bdPass\\.api\\.params\\.login_token='(?<tokenVal>\\w+)';");
            Matcher tokenValMatcher = tokenValP.matcher(getApiRespHtml);
            //boolean foundTokenValue = tokenValMatcher.matches();
            boolean foundTokenValue = tokenValMatcher.find();
            if(foundTokenValue)
            {
                strTokenValue = tokenValMatcher.group("tokenVal"); //3cf421493884e0fe9080593d05f4744f
                System.out.println("正确:找到 bdPass.api.params.login_token=" + strTokenValue);
                 
                bExtractTokenValueOK = true;
            }
            else
            {
                System.out.println("错误:没找到bdPass.api.params.login_token !");
            }
        }
         
        //step3: verify returned cookies
        if (bGotCookieBaiduid && bExtractTokenValueOK)
        {
            //do some workaround to makesure here cookie H_PS_PSSID not expire
            //[version: 0][name: H_PS_PSSID][value: 3359_3341_2776_1424_2981][domain: .baidu.com][path: /][expiry: null]
             
            //Date newExpiryDate = new Date("Thu Sep 17 14:22:08 CST 2043");
            //Date newExpiryDate = new Date();
            //Date newExpiryDate = new Date(2043, 9, 17);
            Date newExpiryDate = new Date(143, 9, 17);
            //Date newExpiryDate = new Date(2020, 9, 17);
            //Calendar newExpiryCalendar = new Calendar();
            //Calendar newExpiryCalendar = new GregorianCalendar();
            //Calendar newExpiryCalendar = new GregorianCalendar(2043, 9, 17);
            //Calendar newExpiryCalendar = new GregorianCalendar(2043, 9, 17, 14, 22, 8);
             
            BasicClientCookie hPsPssidCookie = null;
            //int hPsPssidCookieIdx = 0;
             
            curCookieList = crl.getCurCookieList();
            for(Cookie ck : curCookieList)
            {
                if(ck.getName().equalsIgnoreCase("H_PS_PSSID"))
                {
                    //hPsPssidCookieIdx = curCookieList.indexOf(ck);
                    hPsPssidCookie = (BasicClientCookie) ck;
                    hPsPssidCookie.setExpiryDate(newExpiryDate);
                    ck = hPsPssidCookie;
                    //break;
                }
            }
             
            crl.setCurCookieList(curCookieList);
 
             
            String staticPageUrl = "http://www.baidu.com/cache/user/html/jump.html";
 
            //List<NameValuePair> headerDict = new List<NameValuePair>();
            List<NameValuePair> postDict = new ArrayList<NameValuePair>();
            //ArrayList<NameValuePair> headerDict = new ArrayList<NameValuePair>();
            //postDict.Add("ppui_logintime", "");
            postDict.add(new BasicNameValuePair("charset", "utf-8"));
            //postDict.add(new BasicNameValuePair("codestring", ""));
            //postDict.Add("", "");
            postDict.add(new BasicNameValuePair("token", strTokenValue));
            postDict.add(new BasicNameValuePair("isPhone", "false"));
            postDict.add(new BasicNameValuePair("index", "0"));
            //postDict.add(new BasicNameValuePair("u", ""));
            //postDict.add(new BasicNameValuePair("safeflg", "0"));
            postDict.add(new BasicNameValuePair("staticpage", staticPageUrl));
            postDict.add(new BasicNameValuePair("loginType", "1"));
            postDict.add(new BasicNameValuePair("tpl", "mn"));
            postDict.add(new BasicNameValuePair("callback", "parent.bdPass.api.login._postCallback"));
             
            String strBaiduUsername = "";
            String strBaiduPassword = "";
            Scanner inputReader = new Scanner(System.in);
            System.out.println("Please Enter Your:" );
            System.out.println("Baidu Username:" );
            strBaiduUsername = inputReader.nextLine();
            System.out.println("You Entered Username=" + strBaiduUsername);
            System.out.println("Baidu Password:" );
            strBaiduPassword = inputReader.nextLine();
            System.out.println("You Entered Password=" + strBaiduPassword);
             
            postDict.add(new BasicNameValuePair("verifycode", ""));
             
            postDict.add(new BasicNameValuePair("mem_pass", "on"));
 
            String baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login";
            String loginBaiduRespHtml = crl.getUrlRespHtml(baiduMainLoginUrl, null, postDict);
                   
/*            List<NameValuePair> cookieNamePairList = new ArrayList<NameValuePair>();
            cookieNamePairList.add(new BasicNameValuePair("BDUSS", false));
            cookieNamePairList.add(new BasicNameValuePair("PTOKEN", false));
            cookieNamePairList.add(new BasicNameValuePair("STOKEN", false));
            cookieNamePairList.add(new BasicNameValuePair("SAVEUSERID", false));*/
             
            //Map cookieNameDict = new Map();
            Map cookieNameDict = new Hashtable();
            //Map<Object, Object> cookieNameDict = new Hashtable<Object, Object>;
            cookieNameDict.put("BDUSS", false);
            cookieNameDict.put("PTOKEN", false);
            cookieNameDict.put("STOKEN", false);
            cookieNameDict.put("SAVEUSERID", false);
             
            curCookieList = crl.getCurCookieList();
            for(Object objCookieName : cookieNameDict.keySet().toArray())
            {
                String strCookieName = objCookieName.toString();
                for(Cookie ck: curCookieList)
                {
                    if(strCookieName.equalsIgnoreCase(ck.getName()))
                    {
                        cookieNameDict.put(strCookieName, true);
                    }
                }
            }
             
            boolean bAllCookiesFound = true;
            for (Object  objFoundCurCookie : cookieNameDict.values())
            {
                bAllCookiesFound = bAllCookiesFound && Boolean.parseBoolean(objFoundCurCookie.toString());
            }
 
            bLoginBaiduOk = bAllCookiesFound;
                         
            if (bLoginBaiduOk)
            {
                System.out.println("成功模拟登陆百度首页!" );
            }
            else
            {
                System.out.println("模拟登陆百度首页 失败!");
                System.out.println("所返回的HTML源码为:" + loginBaiduRespHtml);
            }
        }
 
        return;
    }
 
/*  public EmulateLoginBaidu()
    {
         
        //EmulateLoginBaiduUsingJava();
    }*/
}

始终还是无法获得对应的返回的html和cookie,从而无法正常模拟登陆。

【折腾过程】

1.很是怀疑,是不是其中的自动跳转,没有工作。

即:

1
headerParams.setParameter(ClientPNames.HANDLE_REDIRECTS, Boolean.TRUE);

没有起到对应的自动跳转的效果,即:

正常,自动跳转后的结果,应该是:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
{Connection: keep-alive
Pragma: public
Content-Length: 515
Cache-Control: public
Content-Type: text/html
Date: Tue, 17 Sep 2013 06:20:24 GMT
Expires: 0
ETag: w/"iIuDLhaYSmAaLXqM5VeFg8Iy2wvCnATm:1379398824"
Last-Modified: Tue, 17 Sep 2013 06:20:24 6SepGMT
P3P: CP=" OTI DSP COR IVA OUR IND COM "
Set-Cookie: BDUSS=43TERMeExpdWVQaUtVLVYxZG4tU0JZSHpiWTJBTVVFVmhqc1QydElqbW9nVjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAKj0N1Ko9DdSLX; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=baidu.com; httponly,PTOKEN=deleted; expires=Mon, 17-Sep-2012 06:20:23 GMT; path=/; domain=baidu.com; httponly,PTOKEN=a530e5fed0d27f1600c5be851accbedb; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly,STOKEN=964d5d78f925418579c09f050b76354b; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly,SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 06:20:23 GMT; path=/; domain=passport.baidu.com; httponly,USERNAMETYPE=1; expires=Sat, 04-Dec-2021 06:20:24 GMT; path=/; domain=passport.baidu.com; httponly
Server:
Vary: Accept-Encoding
Content-Encoding:
 
}

而现在,返回的结果,是没有跳转时,所返回的结果:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
{Transfer-Encoding: chunked
Connection: keep-alive
Pragma: public
Vary: Accept-Encoding
Content-Encoding:
Cache-Control: public
Content-Type: text/html
Date: Tue, 17 Sep 2013 08:02:32 GMT
Expires: 0
ETag: w/"TL3qSUIdbJn8y4PtWqjPr1EFB8W22Q1g:1379404952"
Last-Modified: Tue, 17 Sep 2013 08:02:32 8SepGMT
P3P: CP=" OTI DSP COR IVA OUR IND COM "
Server:
 
}

2.参考:

How auto redirect in HttpClient (java, apache)

说是:

HttpClient 3.x不支持自动跳转

HttpClient 4.x 就支持了。

3.我这里的HttpClient,是之前:

【已解决】Eclipse的java代码出错:The import org.apache cannot be resolved

中加的,去看了下,的确已经是4.2.5,是支持的了。

4.看到:

Interface ClientPNames

已不推荐再使用ClientPNames了,改推荐:

Class RequestConfig

但是好像是4.3的,我此处,用的是4.2的库,应该是不支持的。

5.我此处调试的时候,其实可以看到对应的设置的:

{http.useragent=Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; WOW64; Trident/4.0; SLCC2; .NET CLR 2.0.50727; .NET CLR 3.5.30729; .NET CLR 3.0.30729; Media Center PC 6.0; InfoPath.3; .NET4.0C; .NET4.0E, http.protocol.handle-redirects=true}

但是,现实的返回结果,貌似是没有自动跳转的。

6.再继续调试,经过:

1
request.setParams(headerParams);

是可以将对应的上面的header信息,设置给request的:

parameters contains the auto redirects true

7.后来继续调试,发现好像不是自动跳转的问题。

貌似是cookie的问题。

正常的话的,C#中,访问:

https://passport.baidu.com/v2/api/?login

之前的cookie是:

cookie BAIDUID value before login

cookie H_PS_PSSID value before login

cookie HOSUPPORT value before login

cookie BDSVRTM value before login

而此处,调试看到的cookie却是:

1
2
3
4
5
6
[
    [version: 0][name: BAIDUID][value: 8EFD622E00F965E706C2F4CC3A392AF9:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 16:38:11 CST 2043],
    [version: 0][name: BDSVRTM][value: 1][domain: www.baidu.com][path: /][expiry: null],
    [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 16:38:12 CST 2021],
    [version: 0][name: H_PS_PSSID][value: 3361_3381_1447_2976][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043]
]

很明显,其中的:

  • BDSVRTM:
    • domain不对
    • expiry不对
  • H_PS_PSSID:
    • 已经经过上述手动加代码,修复了不对的expiry

所以,去手动添加代码修正这些cookie:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
BasicClientCookie hPsPssidCookie = null;
BasicClientCookie dbsvrtmCookie = null;
//int hPsPssidCookieIdx = 0;
 
curCookieList = crl.getCurCookieList();
for(Cookie ck : curCookieList)
{
    if(ck.getName().equalsIgnoreCase("H_PS_PSSID"))
    {
        //hPsPssidCookieIdx = curCookieList.indexOf(ck);
        hPsPssidCookie = (BasicClientCookie) ck;
        hPsPssidCookie.setExpiryDate(newExpiryDate);
        ck = hPsPssidCookie;
        //break;
    }
     
    if(ck.getName().equalsIgnoreCase("BDSVRTM"))
    {
        dbsvrtmCookie = (BasicClientCookie) ck;
        dbsvrtmCookie.setDomain(".baidu.com");
        dbsvrtmCookie.setExpiryDate(newExpiryDate);
        ck = dbsvrtmCookie;
        //break;
    }
}
 
crl.setCurCookieList(curCookieList);

修改后为:

1
2
3
4
5
6
[
    [version: 0][name: BAIDUID][value: B3CCB8F5D6DA04D26A3CF718FDE8A753:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 16:57:38 CST 2043],
    [version: 0][name: BDSVRTM][value: 1][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043],
    [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 16:57:39 CST 2021],
    [version: 0][name: H_PS_PSSID][value: 3359_3381_1422_2975_2981_3092][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043]
]

看看结果如何。

结果问题依旧。

8.但是,注意到此处的输出的html是:

看起来,貌似右向正常的返回的html。

所以,真的再次去C#项目中,设置autoredirect为false,看看其返回的结果如何。

结果是:

C#中,如果也是设置autoredirect为false的话,那么返回的内容,也是:

但是,注意到,此处C#中,获得的html中,是有:

username=xxx

的,而java中返回的html中是没有的。

9.然后想到,会不会是由于connection不是keep-alive而导致的?

所以,去确认java中的connection,的确是keep-alive

参考:

Tomcat, HTTP Keep-Alive and Java’s HttpsUrlConnection

【整理】关于Java中的httpClient中可以传入的参数

而加入了:

1
headerParams.setParameter(CoreConnectionPNames.SO_KEEPALIVE, Boolean.TRUE);

试试效果。

结果问题依旧,返回的html中,还是没有username:

另外,参考:

What is HTTP Persistent Connections?

其实已经说是默认是true了。

10.另外,注意到,之前C#正常的时候,cookie中的:

BDSVRTM的值是2

而此处的java的

BDSVRTM的值,始终是1

所以,要去研究一下,原先的C#中,什么时候得到的BDSVRTM是2的。

调试C#项目,确定了:

autoredirect为false的时候,用某个账户(3),也是可以正常模拟登陆的;

且BDSVRTM值也是1,且返回的html也是上述的html,但是其中是包含username的。

所以,再去java中看看。

结果java中,还是返回html中不带username的。

还是无法获得对应的cookie。

11.然后的然后,去检查自己代码,看看post data中是否少传了什么参数,结果发现一个惊天大问题:

之前只注意去java中从控制台中获得用户名和密码,结果竟然忘了将用户名和密码传入到post data中。。。。

你妹的,这个也太疏忽了。。。

所以,去加上,变成:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
 
//List<NameValuePair> headerDict = new List<NameValuePair>();
List<NameValuePair> postDict = new ArrayList<NameValuePair>();
//ArrayList<NameValuePair> headerDict = new ArrayList<NameValuePair>();
//postDict.Add("ppui_logintime", "");
postDict.add(new BasicNameValuePair("charset", "utf-8"));
//postDict.add(new BasicNameValuePair("codestring", ""));
//postDict.Add("", "");
postDict.add(new BasicNameValuePair("token", strTokenValue));
postDict.add(new BasicNameValuePair("isPhone", "false"));
postDict.add(new BasicNameValuePair("index", "0"));
//postDict.add(new BasicNameValuePair("u", ""));
//postDict.add(new BasicNameValuePair("safeflg", "0"));
postDict.add(new BasicNameValuePair("staticpage", staticPageUrl));
postDict.add(new BasicNameValuePair("loginType", "1"));
postDict.add(new BasicNameValuePair("tpl", "mn"));
postDict.add(new BasicNameValuePair("callback", "parent.bdPass.api.login._postCallback"));
 
String strBaiduUsername = "";
String strBaiduPassword = "";
Scanner inputReader = new Scanner(System.in);
System.out.println("Please Enter Your:" );
System.out.println("Baidu Username:" );
strBaiduUsername = inputReader.nextLine();
System.out.println("You Entered Username=" + strBaiduUsername);
System.out.println("Baidu Password:" );
strBaiduPassword = inputReader.nextLine();
System.out.println("You Entered Password=" + strBaiduPassword);
 
postDict.add(new BasicNameValuePair("username", strBaiduUsername));
postDict.add(new BasicNameValuePair("password", strBaiduPassword));
 
postDict.add(new BasicNameValuePair("verifycode", ""));
 
postDict.add(new BasicNameValuePair("mem_pass", "on"));
 
String baiduMainLoginUrl = "https://passport.baidu.com/v2/api/?login";
String loginBaiduRespHtml = crl.getUrlRespHtml(baiduMainLoginUrl, null, postDict);

然后再去试试:

就可以了:

也可以返回cookie了:

1
2
3
4
5
6
7
8
9
10
[
    [version: 0][name: BAIDUID][value: 2804E22D17A174919725D664400B91F5:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 17:39:10 CST 2043],
    [version: 0][name: BDSVRTM][value: 2][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043],
    [version: 0][name: BDUSS][value: hVT3RTT2pJcE45azE2R3dxbmVMVXBPaXBnQmlzLVhWNEkyZU5kaEVIaFhzRjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAFcjOFJXIzhSb0][domain: baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021],
    [version: 0][name: HOSUPPORT][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:11 CST 2021],
    [version: 0][name: H_PS_PSSID][value: 3361_2776_1466_2976_2980_3092_3109][domain: .baidu.com][path: /][expiry: Sat Oct 17 00:00:00 CST 2043],
    [version: 0][name: PTOKEN][value: 7de481b4f02704457006d4a457041ceb][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021],
    [version: 0][name: STOKEN][value: e35d5d2ab6ad6300cbf92a47ddc90337][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021],
    [version: 0][name: USERNAMETYPE][value: 1][domain: passport.baidu.com][path: /][expiry: Sat Dec 04 17:39:35 CST 2021]
]

返回的html中,也带username了:

12.然后,此处,其实又出现一点点小意外:

java中,返回的cookie中,没有:SAVEUSERID

而C#中,返回的cookie中是有:SAVEUSERID的。

然后去看看获得的response,结果是,其中的cookie中,是有SAVEUSERID的:

1
2
3
4
5
6
Set-Cookie: BDUSS=stVkRscHpnak5yVXJLcFpUVENqczF1Q3NhbmltV2Rwc2V-bmp6Q3JzT2ZzVjlTQVFBQUFBJCQAAAAAAAAAAAEAAAAZP0gCYWdhaW5pbnB1dDMAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAJ8kOFKfJDhSaG; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=baidu.com; httponly,
Set-Cookie: PTOKEN=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=baidu.com; httponly,
Set-Cookie: PTOKEN=d75d78bd32c23c5cdb9c27b2f484ea9c; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly,
Set-Cookie: STOKEN=75ba9addf1f921c25a64dd7f7d278d40; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly,
Set-Cookie: SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=passport.baidu.com; httponly,
Set-Cookie: USERNAMETYPE=1; expires=Sat, 04-Dec-2021 09:45:04 GMT; path=/; domain=passport.baidu.com; httponly

但是很明显,SAVEUSERID是deleted,

所以被java中的代码解析后,就被丢掉了。

所以,此处,java中,获得的CookieStore,是没有SAVEUSERID的。其实就是正常的现象了。

所以,java中,判断模拟登陆百度成功后的cookie的判断,就需要修改一下,去掉针对于SAVEUSERID的判断,变成:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
//Map cookieNameDict = new Map();
Map cookieNameDict = new Hashtable();
//Map<Object, Object> cookieNameDict = new Hashtable<Object, Object>;
cookieNameDict.put("BDUSS", false);
cookieNameDict.put("PTOKEN", false);
cookieNameDict.put("STOKEN", false);
//cookieNameDict.put("SAVEUSERID", false);
 
curCookieList = crl.getCurCookieList();
for(Object objCookieName : cookieNameDict.keySet().toArray())
{
    String strCookieName = objCookieName.toString();
    for(Cookie ck: curCookieList)
    {
        if(strCookieName.equalsIgnoreCase(ck.getName()))
        {
            cookieNameDict.put(strCookieName, true);
        }
    }
}
 
boolean bAllCookiesFound = true;
for (Object  objFoundCurCookie : cookieNameDict.values())
{
    bAllCookiesFound = bAllCookiesFound && Boolean.parseBoolean(objFoundCurCookie.toString());
}
 
bLoginBaiduOk = bAllCookiesFound;
             
if (bLoginBaiduOk)
{
    System.out.println("成功模拟登陆百度首页!" );
}
else
{
    System.out.println("模拟登陆百度首页 失败!");
    System.out.println("所返回的HTML源码为:" + loginBaiduRespHtml);
}

即可。

成功模拟登陆百度后的输出是:

1
2
3
4
5
6
7
8
9
10
11
12
[version: 0][name: BAIDUID][value: EB16BC3D9EB401EAD360D7F49C635382:FG=1][domain: .baidu.com][path: /][expiry: Thu Sep 17 17:49:06 CST 2043]
[version: 0][name: BDSVRTM][value: 2][domain: www.baidu.com][path: /][expiry: null]
[version: 0][name: H_PS_PSSID][value: 3360_3380_1466_2976][domain: .baidu.com][path: /][expiry: null]
正确:已找到cookie BAIDUID
正确:找到 bdPass.api.params.login_token=7744b9c37d759c47709e44b98527af0c
Please Enter Your:
Baidu Username:
xxx
Baidu Password:
yyy
成功模拟登陆百度首页!

如图:

can found cookie and token and emulate login baidu ok

随后,再稍微整理一下代码去,即可。

 

【总结】

此处,java中模拟登陆百度访问login的url,没有获得所希望的返回的html和cookie,原因是:

自己粗心大意,导致忘了发送用户名和密码

所以必然登陆失败。

解决办法:

(1)传递对应的username和password,即可成功返回所希望的cookie;

(2)此处java中,将:

1
Set-Cookie: SAVEUSERID=deleted; expires=Mon, 17-Sep-2012 09:45:03 GMT; path=/; domain=passport.baidu.com; httponly,

解析后,丢掉了。

所以,java中,判断模拟登陆百度是否成功的检测cookie时,就不去判断对应的SAVEUSERID,只判断:BDUSS,PTOKEN,STOKEN即可。

(3)完整的,成功的,模拟登陆百度的代码,自己去看:

【教程】模拟登陆百度之Java代码版

转载请注明:在路上 » 【已解决】Java中模拟登陆百度期间通过HttpClient无法获得所希望的返回的html和cookie

发表我的评论
取消评论

表情

Hi,您需要填写昵称和邮箱!

  • 昵称 (必填)
  • 邮箱 (必填)
  • 网址
82 queries in 0.210 seconds, using 22.27MB memory