/** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.nutch.protocol.httpclient; import java.lang.invoke.MethodHandles; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.CookieHandler; import java.net.CookieManager; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import org.apache.commons.httpclient.Header; import org.apache.commons.httpclient.HttpClient; import org.apache.commons.httpclient.NameValuePair; import org.apache.commons.httpclient.cookie.CookiePolicy; import org.apache.commons.httpclient.methods.GetMethod; import org.apache.commons.httpclient.methods.PostMethod; import org.apache.commons.httpclient.params.HttpMethodParams; import org.apache.commons.io.IOUtils; import org.apache.commons.lang3.reflect.FieldUtils; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class HttpFormAuthentication { private static final Logger LOG = LoggerFactory .getLogger(MethodHandles.lookup().lookupClass()); private static Map<String, String> defaultLoginHeaders = new HashMap<String, String>(); static { defaultLoginHeaders.put("User-Agent", "Mozilla/5.0"); defaultLoginHeaders.put("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"); defaultLoginHeaders.put("Accept-Language", "en-US,en;q=0.5"); defaultLoginHeaders.put("Connection", "keep-alive"); defaultLoginHeaders.put("Content-Type", "application/x-www-form-urlencoded"); } private HttpClient client; private HttpFormAuthConfigurer authConfigurer = new HttpFormAuthConfigurer(); private String cookies; public HttpFormAuthentication(HttpFormAuthConfigurer authConfigurer, HttpClient client, Http http) { this.authConfigurer = authConfigurer; this.client = client; defaultLoginHeaders.put("Accept", http.getAccept()); defaultLoginHeaders.put("Accept-Language", http.getAcceptLanguage()); defaultLoginHeaders.put("User-Agent", http.getUserAgent()); } public HttpFormAuthentication(String loginUrl, String loginForm, Map<String, String> loginPostData, Map<String, String> additionalPostHeaders, Set<String> removedFormFields) { this.authConfigurer.setLoginUrl(loginUrl); this.authConfigurer.setLoginFormId(loginForm); this.authConfigurer.setLoginPostData( loginPostData == null ? new HashMap<String, String>() : loginPostData); this.authConfigurer.setAdditionalPostHeaders(additionalPostHeaders == null ? new HashMap<String, String>() : additionalPostHeaders); this.authConfigurer.setRemovedFormFields( removedFormFields == null ? new HashSet<String>() : removedFormFields); this.client = new HttpClient(); } public void login() throws Exception { // make sure cookies are turned on CookieHandler.setDefault(new CookieManager()); String pageContent = httpGetPageContent(authConfigurer.getLoginUrl()); List<NameValuePair> params = getLoginFormParams(pageContent); sendPost(authConfigurer.getLoginUrl(), params); } private void sendPost(String url, List<NameValuePair> params) throws Exception { PostMethod post = null; try { if (authConfigurer.isLoginRedirect()) { post = new PostMethod(url) { @Override public boolean getFollowRedirects() { return true; } }; } else { post = new PostMethod(url); } // we can't use post.setFollowRedirects(true) as it will throw // IllegalArgumentException: // Entity enclosing requests cannot be redirected without user // intervention setLoginHeader(post); // NUTCH-2280 LOG.debug("FormAuth: set cookie policy"); this.setCookieParams(authConfigurer, post.getParams()); post.addParameters(params.toArray(new NameValuePair[0])); int rspCode = client.executeMethod(post); if (LOG.isDebugEnabled()) { LOG.debug("rspCode: " + rspCode); LOG.debug("\nSending 'POST' request to URL : " + url); LOG.debug("Post parameters : " + params); LOG.debug("Response Code : " + rspCode); for (Header header : post.getRequestHeaders()) { LOG.debug("Response headers : " + header); } } String rst = IOUtils.toString(post.getResponseBodyAsStream()); LOG.debug("login post result: " + rst); } finally { if (post != null) { post.releaseConnection(); } } } /** * NUTCH-2280 Set the cookie policy value from httpclient-auth.xml for the * Post httpClient action. * * @param fromConfigurer * - the httpclient-auth.xml values * * @param params * - the HttpMethodParams from the current httpclient instance * * @throws NoSuchFieldException * @throws SecurityException * @throws IllegalArgumentException * @throws IllegalAccessException */ private void setCookieParams(HttpFormAuthConfigurer formConfigurer, HttpMethodParams params) throws NoSuchFieldException, SecurityException, IllegalArgumentException, IllegalAccessException { // NUTCH-2280 - set the HttpClient cookie policy if (formConfigurer.getCookiePolicy() != null) { String policy = formConfigurer.getCookiePolicy(); Object p = FieldUtils.readDeclaredStaticField(CookiePolicy.class, policy); if (null != p) { LOG.debug("reflection of cookie value: " + p.toString()); params.setParameter(HttpMethodParams.COOKIE_POLICY, p); } } } private void setLoginHeader(PostMethod post) { Map<String, String> headers = new HashMap<String, String>(); headers.putAll(defaultLoginHeaders); // additionalPostHeaders can overwrite value in defaultLoginHeaders headers.putAll(authConfigurer.getAdditionalPostHeaders()); for (Entry<String, String> entry : headers.entrySet()) { post.addRequestHeader(entry.getKey(), entry.getValue()); } post.addRequestHeader("Cookie", getCookies()); } private String httpGetPageContent(String url) throws IOException { GetMethod get = new GetMethod(url); try { for (Entry<String, String> entry : authConfigurer .getAdditionalPostHeaders().entrySet()) { get.addRequestHeader(entry.getKey(), entry.getValue()); } client.executeMethod(get); Header cookieHeader = get.getResponseHeader("Set-Cookie"); if (cookieHeader != null) { setCookies(cookieHeader.getValue()); } String rst = IOUtils.toString(get.getResponseBodyAsStream()); return rst; } finally { get.releaseConnection(); } } private List<NameValuePair> getLoginFormParams(String pageContent) throws UnsupportedEncodingException { List<NameValuePair> params = new ArrayList<NameValuePair>(); Document doc = Jsoup.parse(pageContent); Element loginform = doc.getElementById(authConfigurer.getLoginFormId()); if (loginform == null) { LOG.debug("No form element found with 'id' = {}, trying 'name'.", authConfigurer.getLoginFormId()); loginform = doc .select("form[name=" + authConfigurer.getLoginFormId() + "]").first(); if (loginform == null) { LOG.debug("No form element found with 'name' = {}", authConfigurer.getLoginFormId()); throw new IllegalArgumentException( "No form exists: " + authConfigurer.getLoginFormId()); } } Elements inputElements = loginform.getElementsByTag("input"); // skip fields in removedFormFields or loginPostData for (Element inputElement : inputElements) { String key = inputElement.attr("name"); String value = inputElement.attr("value"); if (authConfigurer.getLoginPostData().containsKey(key) || authConfigurer.getRemovedFormFields().contains(key)) { // value = loginPostData.get(key); continue; } params.add(new NameValuePair(key, value)); } // add key and value in loginPostData for (Entry<String, String> entry : authConfigurer.getLoginPostData() .entrySet()) { params.add(new NameValuePair(entry.getKey(), entry.getValue())); } return params; } public String getCookies() { return cookies; } public void setCookies(String cookies) { this.cookies = cookies; } public boolean isRedirect() { return authConfigurer.isLoginRedirect(); } public void setRedirect(boolean redirect) { this.authConfigurer.setLoginRedirect(redirect); } }