/*
* This is eMonocot, a global online biodiversity information resource.
*
* Copyright © 2011–2015 The Board of Trustees of the Royal Botanic Gardens, Kew and The University of Oxford
*
* eMonocot is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* eMonocot is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even
* the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* The complete text of the GNU Affero General Public License is in the source repository as the file
* ‘COPYING’. It is also available from <http://www.gnu.org/licenses/>.
*/
package org.emonocot.harvest.common;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.net.HttpURLConnection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.http.Header;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHost;
import org.apache.http.HttpResponse;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.HttpClient;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.conn.params.ConnRoutePNames;
import org.apache.http.entity.BufferedHttpEntity;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.cookie.DateUtils;
import org.apache.http.message.BasicHeader;
import org.apache.http.util.EntityUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.batch.core.ExitStatus;
import org.springframework.batch.retry.RetryCallback;
import org.springframework.batch.retry.RetryContext;
import org.springframework.batch.retry.RetryListener;
import org.springframework.batch.retry.backoff.FixedBackOffPolicy;
import org.springframework.batch.retry.policy.SimpleRetryPolicy;
import org.springframework.batch.retry.support.RetryTemplate;
/**
*
* @author ben
*
*/
public class GetResourceClient {
static final int BUFFER = 2048;
private Logger logger = LoggerFactory.getLogger(GetResourceClient.class);
private HttpClient httpClient = new DefaultHttpClient();
private String proxyHost;
private Integer proxyPort;
private Long backoffPeriod = 3000L;
private int retryAttempts = 3;
private RetryListener[] retryListeners = new RetryListener[] {};
public void setRetryListeners(RetryListener[] retryListeners) {
this.retryListeners = retryListeners;
}
public void setBackoffPeriod(Long backoffPeriod) {
this.backoffPeriod = backoffPeriod;
}
public void setRetryAttempts(int retryAttempts) {
this.retryAttempts = retryAttempts;
}
/**
*
* @param newProxyHost Set the proxy host
*/
public final void setProxyHost(final String newProxyHost) {
this.proxyHost = newProxyHost;
}
/**
*
* @param newProxyPort Set the proxy port
*/
public final void setProxyPort(final String newProxyPort) {
if (newProxyPort != null && !newProxyPort.isEmpty()) {
try {
this.proxyPort = Integer.decode(newProxyPort);
} catch (NumberFormatException nfe) {
logger.warn(nfe.getMessage());
}
}
}
/**
*
* @param newHttpClient Set the http client instance to use.
*/
public final void setHttpClient(final HttpClient newHttpClient) {
this.httpClient = newHttpClient;
}
/**
* Executes a HTTP GET request with the If-Modified-Since header set to
* dateLastHarvested. If the resource has not been modified then the
* Source may respond with the HTTP status 304 NOT MODIFIED, in which
* case the method will return an ExitStatus with an exit code 'NOT_MODIFIED'
* and the job will terminate.
*
* If the resource has been modified, the client will save the response in a
* document specified in temporaryFileName and will an ExitStatus with an
* exit code 'COMPLETE'.
*
* @param authorityName
* The name of the Source being harvested.
* @param resource
* The endpoint (uri) being harvested.
* @param ifModifiedSince
* The dateTime when this Source was last harvested.
* @param temporaryFileName
* The name of the temporary file to store the response in
* @return An exit status indicating that the step was completed, failed, or
* if the Source responded with a 304 NOT MODIFIED response
* indicating that no records have been modified
*/
public final ExitStatus getResource(final String resource, final String ifModifiedSince,
final String temporaryFileName) {
if (proxyHost != null && proxyPort != null) {
HttpHost proxy = new HttpHost(proxyHost, proxyPort);
httpClient.getParams()
.setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
}
httpClient.getParams().setParameter("http.useragent", "org.emonocot.ws.GetResourceClient");
RetryTemplate retryTemplate = new RetryTemplate();
FixedBackOffPolicy backOffPolicy = new FixedBackOffPolicy();
backOffPolicy.setBackOffPeriod(backoffPeriod);
SimpleRetryPolicy retryPolicy = new SimpleRetryPolicy();
retryPolicy.setMaxAttempts(retryAttempts);
Map<Class<? extends Throwable>,Boolean> retryableExceptions = new HashMap<Class<? extends Throwable>,Boolean>();
retryableExceptions.put(ClientProtocolException.class, Boolean.TRUE);
retryableExceptions.put(IOException.class, Boolean.TRUE);
retryPolicy.setRetryableExceptions(retryableExceptions);
retryTemplate.setListeners(retryListeners);
retryTemplate.setBackOffPolicy(backOffPolicy);
retryTemplate.setRetryPolicy(retryPolicy);
try {
return retryTemplate.execute(new RetryCallback<ExitStatus> () {
@Override
public ExitStatus doWithRetry(RetryContext context)
throws Exception {
InputStreamReader inputStreamReader = null;
OutputStreamWriter outputStreamWriter = null;
HttpGet httpGet = new HttpGet(resource);
httpGet.setHeader("If-Modified-Since", DateUtils
.formatDate(new Date(Long.parseLong(ifModifiedSince))));
try {
HttpResponse httpResponse = httpClient.execute(httpGet);
switch(httpResponse.getStatusLine().getStatusCode()) {
case HttpURLConnection.HTTP_NOT_MODIFIED:
return new ExitStatus("NOT_MODIFIED");
case HttpURLConnection.HTTP_OK:
HttpEntity entity = httpResponse.getEntity();
if (entity != null) {
inputStreamReader = new InputStreamReader(
new BufferedInputStream(entity.getContent()),
"UTF-8");
outputStreamWriter = new OutputStreamWriter(
new BufferedOutputStream(new FileOutputStream(
new File(temporaryFileName))), "UTF-8");
int count;
char[] data = new char[BUFFER];
while ((count
= inputStreamReader.read(data, 0, BUFFER)) != -1) {
outputStreamWriter.write(data, 0, count);
}
outputStreamWriter.flush();
outputStreamWriter.close();
inputStreamReader.close();
} else {
logger.warn("Server returned "
+ httpResponse.getStatusLine()
+ " but HttpEntity is null");
throw new IOException("Server returned "
+ httpResponse.getStatusLine()
+ " but HttpEntity is null");
}
return ExitStatus.COMPLETED;
default:
logger.warn("Server returned unexpected status code "
+ httpResponse.getStatusLine() + " for document "
+ resource); // This is not an error in this
// application but a server side error
EntityUtils.consumeQuietly(httpResponse.getEntity());
throw new IOException("Server returned unexpected status code "
+ httpResponse.getStatusLine() + " for document "
+ resource);
}
} catch (ClientProtocolException cpe) {
if(cpe instanceof HttpResponseException) {
HttpResponseException hre = (HttpResponseException) cpe;
logger.error("HttpResponse Exception getting document "
+ resource + " " + hre.getMessage()
+ " with status code " + hre.getStatusCode());
} else {
logger.error("Client Protocol Exception getting document "
+ resource + " " + cpe.getMessage());
}
throw cpe;
} catch (IOException ioe) {
logger.error("Input Output Exception getting document "
+ resource + " " + ioe.getLocalizedMessage(), ioe);
throw ioe;
} finally {
httpGet.releaseConnection();
if (inputStreamReader != null) {
try {
inputStreamReader.close();
} catch (IOException ioe) {
logger.error(
"Input Output Exception closing inputStream for "
+ resource + " " + ioe.getLocalizedMessage(), ioe);
}
}
if (outputStreamWriter != null) {
try {
outputStreamWriter.close();
} catch (IOException ioe) {
logger.error(
"Input Output Exception closing outputStream for "
+ resource + " " + ioe.getLocalizedMessage(), ioe);
}
}
}
}
});
} catch (Exception e) {
logger.error("Retry processing failed " + e.getMessage(), e);
return ExitStatus.FAILED;
}
}
/**
* Executes a HTTP GET request with the If-Modified-Since header set to
* dateLastHarvested. If the resource has not been modified then the
* Source may respond with the HTTP status 304 NOT MODIFIED, in which
* case the method will return an ExitStatus with an exit code 'NOT
* MODIFIED' and the job will terminate.
*
* If the resource has been modified, the client will save the response in a
* document specified in temporaryFileName and will an ExitStatus with an
* exit code 'COMPLETE'.
*
* @param authorityName
* The name of the Source being harvested.
* @param resource
* The endpoint (uri) being harvested.
* @param ifModifiedSince
* The dateTime when this Source was last harvested.
* @param temporaryFileName
* The name of the temporary file to store the response in
* @return An exit status indicating that the step was completed, failed, or
* if the Source responded with a 304 NOT MODIFIED response
* indicating that no records have been modified
*/
public final ExitStatus getBinaryResource(final String resource, final String ifModifiedSince,
final String temporaryFileName) {
if (proxyHost != null && proxyPort != null) {
HttpHost proxy = new HttpHost(proxyHost, proxyPort);
httpClient.getParams()
.setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
}
httpClient.getParams().setParameter("http.useragent", "org.emonocot.ws.GetResourceClient");
RetryTemplate retryTemplate = new RetryTemplate();
retryTemplate.setListeners(retryListeners);
FixedBackOffPolicy backOffPolicy = new FixedBackOffPolicy();
backOffPolicy.setBackOffPeriod(backoffPeriod);
SimpleRetryPolicy retryPolicy = new SimpleRetryPolicy();
retryPolicy.setMaxAttempts(retryAttempts);
Map<Class<? extends Throwable>,Boolean> retryableExceptions = new HashMap<Class<? extends Throwable>,Boolean>();
retryableExceptions.put(ClientProtocolException.class, Boolean.TRUE);
retryableExceptions.put(IOException.class, Boolean.TRUE);
retryPolicy.setRetryableExceptions(retryableExceptions);
retryTemplate.setBackOffPolicy(backOffPolicy);
retryTemplate.setRetryPolicy(retryPolicy);
try {
return retryTemplate.execute(new RetryCallback<ExitStatus> () {
@Override
public ExitStatus doWithRetry(RetryContext context)
throws Exception {
BufferedInputStream bufferedInputStream = null;
BufferedOutputStream bufferedOutputStream = null;
HttpGet httpGet = new HttpGet(resource.replace(" ", "%20"));
httpGet.addHeader(new BasicHeader("If-Modified-Since", DateUtils
.formatDate(new Date(Long.parseLong(ifModifiedSince)))));
try {
HttpResponse httpResponse = httpClient.execute(httpGet);
switch(httpResponse.getStatusLine().getStatusCode()) {
case HttpURLConnection.HTTP_NOT_MODIFIED:
return new ExitStatus("NOT_MODIFIED");
case HttpURLConnection.HTTP_OK:
HttpEntity entity = httpResponse.getEntity();
if (entity != null) {
bufferedInputStream =
new BufferedInputStream(entity.getContent());
bufferedOutputStream =
new BufferedOutputStream(new FileOutputStream(
new File(temporaryFileName)));
int count;
byte[] data = new byte[BUFFER];
while ((count
= bufferedInputStream.read(data, 0, BUFFER)) != -1) {
bufferedOutputStream.write(data, 0, count);
}
bufferedOutputStream.flush();
bufferedOutputStream.close();
bufferedInputStream.close();
} else {
logger.warn("Server returned "
+ httpResponse.getStatusLine()
+ " but HttpEntity is null");
throw new IOException("Server returned "
+ httpResponse.getStatusLine()
+ " but HttpEntity is null");
}
return ExitStatus.COMPLETED;
default:
logger.warn("Server returned unexpected status code "
+ httpResponse.getStatusLine() + " for document "
+ resource); // This is not an error in this
// application but a server side error
BufferedHttpEntity bufferedEntity = new BufferedHttpEntity(
httpResponse.getEntity());
InputStreamReader reader = new InputStreamReader(
bufferedEntity.getContent());
StringBuffer stringBuffer = new StringBuffer();
int count;
char[] content = new char[BUFFER];
while ((count = reader.read(content, 0, BUFFER)) != -1) {
stringBuffer.append(content);
}
logger.warn("Server Response was: " + stringBuffer.toString());
httpGet.abort();
throw new IOException("Server returned unexpected status code "
+ httpResponse.getStatusLine() + " for document "
+ resource);
}
} catch (ClientProtocolException cpe) {
if(cpe instanceof HttpResponseException) {
HttpResponseException hre = (HttpResponseException) cpe;
logger.error("HttpResponse Exception getting document "
+ resource + " " + hre.getMessage()
+ " with status code " + hre.getStatusCode());
} else {
logger.error("Client Protocol Exception getting document "
+ resource + " " + cpe.getMessage());
}
throw cpe;
} catch (IOException ioe) {
logger.error("Input Output Exception getting document "
+ resource + " " + ioe.getLocalizedMessage());
throw ioe;
} finally {
httpGet.releaseConnection();
if (bufferedInputStream != null) {
try {
bufferedInputStream.close();
} catch (IOException ioe) {
logger.error(
"Input Output Exception closing inputStream for "
+ resource + " " + ioe.getLocalizedMessage());
}
}
if (bufferedOutputStream != null) {
try {
bufferedOutputStream.close();
} catch (IOException ioe) {
logger.error(
"Input Output Exception closing outputStream for "
+ resource + " " + ioe.getLocalizedMessage());
}
}
}
}
});
} catch (Exception e) {
logger.error("Retry processing failed " + e.getMessage());
return ExitStatus.FAILED;
}
}
public ExitStatus postBody(final String authorityURI, final Map<String,String> params, final StringBuffer response, final Map<String,String> responseHeaders) {
if (proxyHost != null && proxyPort != null) {
HttpHost proxy = new HttpHost(proxyHost, proxyPort);
httpClient.getParams()
.setParameter(ConnRoutePNames.DEFAULT_PROXY, proxy);
}
httpClient.getParams().setParameter("http.useragent", "org.emonocot.ws.GetResourceClient");
RetryTemplate retryTemplate = new RetryTemplate();
retryTemplate.setListeners(retryListeners);
FixedBackOffPolicy backOffPolicy = new FixedBackOffPolicy();
backOffPolicy.setBackOffPeriod(backoffPeriod);
SimpleRetryPolicy retryPolicy = new SimpleRetryPolicy();
retryPolicy.setMaxAttempts(retryAttempts);
Map<Class<? extends Throwable>,Boolean> retryableExceptions = new HashMap<Class<? extends Throwable>,Boolean>();
retryableExceptions.put(ClientProtocolException.class, Boolean.TRUE);
retryableExceptions.put(IOException.class, Boolean.TRUE);
retryPolicy.setRetryableExceptions(retryableExceptions);
retryTemplate.setBackOffPolicy(backOffPolicy);
retryTemplate.setRetryPolicy(retryPolicy);
try {
return retryTemplate.execute(new RetryCallback<ExitStatus> () {
@Override
public ExitStatus doWithRetry(RetryContext context)
throws Exception {
InputStreamReader reader = null;
HttpPost httpPost = new HttpPost(authorityURI.replace(" ", "%20"));
try {
HttpResponse httpResponse = httpClient.execute(httpPost);
switch(httpResponse.getStatusLine().getStatusCode()) {
case HttpURLConnection.HTTP_CREATED:
case HttpURLConnection.HTTP_OK:
for(Header header : httpResponse.getAllHeaders()) {
responseHeaders.put(header.getName(), header.getValue());
}
HttpEntity entity = httpResponse.getEntity();
if (entity != null) {
BufferedHttpEntity bufferedEntity = new BufferedHttpEntity(
httpResponse.getEntity());
reader = new InputStreamReader(
bufferedEntity.getContent());
int count;
char[] content = new char[BUFFER];
while ((count = reader.read(content, 0, BUFFER)) != -1) {
response.append(content);
}
} else {
logger.warn("Server returned "
+ httpResponse.getStatusLine()
+ " but HttpEntity is null");
throw new IOException("Server returned "
+ httpResponse.getStatusLine()
+ " but HttpEntity is null");
}
return ExitStatus.COMPLETED;
default:
logger.warn("Server returned unexpected status code "
+ httpResponse.getStatusLine() + " for document "
+ authorityURI); // This is not an error in this
// application but a server side error
BufferedHttpEntity bufferedEntity = new BufferedHttpEntity(
httpResponse.getEntity());
reader = new InputStreamReader(
bufferedEntity.getContent());
StringBuffer stringBuffer = new StringBuffer();
int count;
char[] content = new char[BUFFER];
while ((count = reader.read(content, 0, BUFFER)) != -1) {
stringBuffer.append(content);
}
logger.warn("Server Response was: " + stringBuffer.toString());
httpPost.abort();
throw new IOException("Server returned unexpected status code "
+ httpResponse.getStatusLine() + " for document "
+ authorityURI);
}
} catch (ClientProtocolException cpe) {
if(cpe instanceof HttpResponseException) {
HttpResponseException hre = (HttpResponseException) cpe;
logger.error("HttpResponse Exception getting document "
+ authorityURI + " " + hre.getMessage()
+ " with status code " + hre.getStatusCode());
} else {
logger.error("Client Protocol Exception getting document "
+ authorityURI + " " + cpe.getMessage(), cpe);
}
throw cpe;
} catch (IOException ioe) {
logger.error("Input Output Exception getting document "
+ authorityURI + " " + ioe.getLocalizedMessage(), ioe);
throw ioe;
} finally {
httpPost.releaseConnection();
if (reader != null) {
try {
reader.close();
} catch (IOException ioe) {
logger.error(
"Input Output Exception closing inputStream for "
+ authorityURI + " " + ioe.getLocalizedMessage(), ioe);
}
}
}
}
});
} catch (Exception e) {
logger.error("Retry processing failed " + e.getMessage(), e);
return ExitStatus.FAILED;
}
}
}