/*
* Copyright (c) 2009, University of Bristol
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1) Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2) Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* 3) Neither the name of the University of Bristol nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
*/
package org.ilrt.mca.harvester;
import com.hp.hpl.jena.rdf.model.Model;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpMethod;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.httpclient.params.HttpMethodParams;
import org.apache.log4j.Logger;
import java.io.IOException;
import java.io.InputStream;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Properties;
import java.util.TimeZone;
/**
* @author Mike Jones (mike.a.jones@bristol.ac.uk)
*/
public class HttpResolverImpl implements Resolver {
public HttpResolverImpl() throws IOException {
properties = new Properties();
properties.load(this.getClass().getResourceAsStream("/httpResolver.properties"));
}
/**
* @param source source we want to resolve
* @param responseHandler handles the response and creates a model
* @return a model
*/
public Model resolve(Source source, ResponseHandler responseHandler) {
HttpClient httpClient = new HttpClient();
// set the user agent (default provide by the apache client if null)
if (properties.getProperty("user.agent") != null) {
httpClient.getParams().setParameter(HttpMethodParams.USER_AGENT,
properties.getProperty("user.agent"));
}
HttpMethod httpMethod = new GetMethod(source.getUrl());
// only resolve if the source has been updated
if (source.getLastVisited() != null) {
httpMethod.addRequestHeader("If-Modified-Since",
getDateFormat(source.getLastVisited()));
}
// request the url and get the status
int status;
try {
status = httpClient.executeMethod(httpMethod);
} catch (IOException e) {
log.error("Error trying to GET " + source.getUrl() + " : " + e.getMessage());
return null;
}
// handle unexpected response codes
if (status != HttpStatus.SC_OK) {
// TODO - what about access to feeds that need authentication?
if (status > 400) {
log.info("The requested resource " + source.getUrl() + " failed to return with the "
+ "following response code: " + status + ")");
return null;
}
}
log.info("The requested resource " + source.getUrl()
+ " returned the following response code: " + status);
// get the content type
String contentType = httpMethod.getResponseHeader("Content-Type").getValue();
// only handle data if we have expected data type
if (responseHandler.isSupportedMediaType(contentType)) {
try {
log.info("Handler can respond to the content type: " + contentType);
InputStream is = httpMethod.getResponseBodyAsStream();
return responseHandler.getModel(source.getUrl(), is);
} catch (IOException e) {
log.error("Error occured when handling response: " + e.getMessage());
}
} else {
log.info("Handler cannot handle the content type: " + contentType);
}
return null;
}
private String getDateFormat(Date date) {
SimpleDateFormat httpDateFormat = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z");
httpDateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
return httpDateFormat.format(date);
}
Logger log = Logger.getLogger(HttpResolverImpl.class);
Properties properties;
}