/** * Copyright (C) 2012-2014 Gist Labs, LLC. (http://gistlabs.com) * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ package com.gistlabs.mechanize.impl; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.MalformedURLException; import java.net.URL; import java.util.Collection; import java.util.HashMap; import java.util.Map; import org.apache.http.Header; import org.apache.http.HttpException; import org.apache.http.HttpResponse; import org.apache.http.HttpResponseInterceptor; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpRequestBase; import org.apache.http.entity.BufferedHttpEntity; import org.apache.http.impl.client.AbstractHttpClient; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.params.HttpProtocolParams; import org.apache.http.protocol.BasicHttpContext; import org.apache.http.protocol.HttpContext; import com.gistlabs.mechanize.Mechanize; import com.gistlabs.mechanize.Resource; import com.gistlabs.mechanize.ResourceFactory; import com.gistlabs.mechanize.cache.HttpCacheFilter; import com.gistlabs.mechanize.cache.api.HttpCache; import com.gistlabs.mechanize.cache.inMemory.InMemoryHttpCache; import com.gistlabs.mechanize.cookie.Cookies; import com.gistlabs.mechanize.exceptions.MechanizeExceptionFactory; import com.gistlabs.mechanize.filters.DefaultMechanizeChainFilter; import com.gistlabs.mechanize.filters.MechanizeChainFilter; import com.gistlabs.mechanize.parameters.Parameters; import com.gistlabs.mechanize.requestor.PageRequestor; import com.gistlabs.mechanize.requestor.RequestBuilder; import com.gistlabs.mechanize.requestor.RequestBuilderFactory; import com.gistlabs.mechanize.util.apache.ContentType; /** * Mechanize agent acts as a focal point for HTTP interactions and also as a factory for Page objects from responses. * * <p>Interesting resources: http://en.wikipedia.org/wiki/List_of_HTTP_header_fields</p> * * <p>NOTE: The mechanize library is not synchronized and should be used in a single thread environment or with custom synchronization.</p> * * @author Martin Kersten<Martin.Kersten.mk@gmail.com> * @author John Heintz <john@gistlabs.com> */ public class MechanizeAgent implements PageRequestor<Resource>, RequestBuilderFactory<Resource>, Mechanize { static final Map<String,ResourceFactory> PAGE_FACTORIES = new HashMap<String, ResourceFactory>(); static ResourceFactory lookupFactory(final String mimeType) { return PAGE_FACTORIES.get(mimeType); } static void registerFactory(final ResourceFactory factory) { Collection<String> contentMatches = factory.getContentMatches(); for (String mimeType : contentMatches) PAGE_FACTORIES.put(mimeType, factory); } static String VERSION; public static void setVersion(final String version) { VERSION=version; } static { MechanizeInitializer.initialize(); } private final DefaultMechanizeChainFilter requestChain; private final AbstractHttpClient client; private final Cookies cookies; public MechanizeAgent() { this(buildDefaultHttpClient()); } public MechanizeAgent(final AbstractHttpClient client) { this(client, new InMemoryHttpCache()); } public MechanizeAgent(final HttpCache httpCache) { this(buildDefaultHttpClient(), httpCache); } public MechanizeAgent(final AbstractHttpClient client, final HttpCache httpCache) { this.client = client; setupClient(client); this.requestChain = new DefaultMechanizeChainFilter(new MechanizeHttpClientFilter(this.client)); addFilter(new HttpCacheFilter(httpCache)); this.cookies = new Cookies(this.client); } /** * This method is used to capture Location headers after HttpClient redirect handling. */ private void setupClient(final AbstractHttpClient client) { this.client.addResponseInterceptor(new HttpResponseInterceptor() { @Override public void process(final HttpResponse response, final HttpContext context) throws HttpException, IOException { Header header = response.getFirstHeader("Location"); if (header!=null) context.setAttribute("Location", header.getValue()); } }); } public MechanizeAgent prefixFilter(final MechanizeChainFilter filter) { this.requestChain.prefix(filter); return this; } public MechanizeAgent addFilter(final MechanizeChainFilter filter) { this.requestChain.add(filter); return this; } /** * Configure the default HttpClient used by mechanize. */ public static AbstractHttpClient buildDefaultHttpClient() { DefaultHttpClient defaultHttpClient = new DefaultHttpClient(); return defaultHttpClient; } /** * * @param userAgent The value to set User-Agent HTTP parameter to for requests * @return */ public MechanizeAgent setUserAgent(final String userAgent) { HttpProtocolParams.setUserAgent(this.client.getParams(), userAgent); return this; } /** * * @return the User-Agent that HttpClient is currently using. */ public String getUserAgent() { return HttpProtocolParams.getUserAgent(this.client.getParams()); } /* (non-Javadoc) * @see com.gistlabs.mechanize.Mechanize#getClient() */ @Override public AbstractHttpClient getClient() { return client; } /* (non-Javadoc) * @see com.gistlabs.mechanize.Mechanize#doRequest(java.lang.String) */ @Override public RequestBuilder<Resource> doRequest(final String uri) { return new RequestBuilder<Resource>(this, uri); } /** * Returns the resource received uppon the request. The resource can be casted to any expected subclass of resource * but will fail with ClassCastException if the expected type of resource is not the actual returned resource. */ @SuppressWarnings("unchecked") @Override public <T extends Resource> T request(final HttpRequestBase request) { try { HttpResponse response = execute(client, request); Resource resource = toPage(request, response); return (T)resource; } catch (Exception e) { throw MechanizeExceptionFactory.newException(e); } } /* (non-Javadoc) * @see com.gistlabs.mechanize.Mechanize#get(java.lang.String) */ @Override public <T extends Resource> T get(final String uri) { return doRequest(uri).get(); } /* (non-Javadoc) * @see com.gistlabs.mechanize.Mechanize#post(java.lang.String, java.util.Map) */ @Override public <T extends Resource> T post(final String uri, final Map<String, String> params) throws UnsupportedEncodingException { return post(uri, new Parameters(unsafeCast(params))); } @SuppressWarnings({ "unchecked", "rawtypes" }) private Map<String, Object> unsafeCast(final Map<String, String> params) { return (Map)params; } /* (non-Javadoc) * @see com.gistlabs.mechanize.Mechanize#post(java.lang.String, com.gistlabs.mechanize.parameters.Parameters) */ @Override public <T extends Resource> T post(final String uri, final Parameters params) { return doRequest(uri).set(params).post(); } /** Idles / Waits for the given amount of milliseconds useful to prevent being blocked by mass sending * requests or to appear as a artificial user. */ public void idle(final int milliseconds) { long startTime = System.currentTimeMillis(); while((System.currentTimeMillis() - startTime) < milliseconds) try { Thread.sleep(Math.max(1, milliseconds - (System.currentTimeMillis() - startTime))); } catch(InterruptedException e) { } } /* (non-Javadoc) * @see com.gistlabs.mechanize.Mechanize#cookies() */ @Override public Cookies cookies() { return cookies; } protected Resource toPage(final HttpRequestBase request, final HttpResponse response) throws IOException, UnsupportedEncodingException { ContentType contentType = getContentType(response); ResourceFactory factory = lookupFactory(contentType.getMimeType()); if (factory == null) factory = lookupFactory(ContentType.WILDCARD.getMimeType()); if (factory == null) throw MechanizeExceptionFactory.newMechanizeException("No viable page type found, and no wildcard mime type factory registered."); return factory.buildPage(this, request, response); } protected ContentType getContentType(final HttpResponse response) { return ContentType.getOrDefault(response.getEntity()); } protected HttpResponse execute(final HttpClient client, final HttpRequestBase request) throws Exception { HttpContext context = new BasicHttpContext(); HttpResponse response = requestChain.execute(request, context); if (context.getAttribute("Location")!=null) response.setHeader(MECHANIZE_LOCATION, (String) context.getAttribute("Location")); response.setEntity(new BufferedHttpEntity(response.getEntity())); return response; } @Override public String absoluteUrl(final String uri) { try { return new URL(uri).toExternalForm(); } catch (MalformedURLException e) { throw MechanizeExceptionFactory.newException(e); } } }