/**
* The contents of this file are subject to the license and copyright
* detailed in the LICENSE and NOTICE files at the root of the source
* tree and available online at
*
* http://www.dspace.org/license/
*/
package org.dspace.ctask.general;
import org.apache.log4j.Logger;
import org.dspace.content.DCValue;
import org.dspace.content.DSpaceObject;
import org.dspace.content.Item;
import org.dspace.curate.AbstractCurationTask;
import org.dspace.curate.Curator;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.URL;
import java.util.ArrayList;
import java.util.List;
/**
* A basic link checker that is designed to be extended. By default this link checker
* will check that all links stored in anyschema.anyelement.uri metadata fields return
* a 20x status code.
*
* This link checker can be enhanced by extending this class, and overriding the
* getURLs and checkURL methods.
*
* @author Stuart Lewis
*/
public class BasicLinkChecker extends AbstractCurationTask
{
// The status of the link checking of this item
private int status = Curator.CURATE_UNSET;
// The results of link checking this item
private List<String> results = null;
// The log4j logger for this class
private static Logger log = Logger.getLogger(BasicLinkChecker.class);
/**
* Perform the link checking.
*
* @param dso The DSpaaceObject to be checked
* @return The curation task status of the checking
* @throws java.io.IOException THrown if something went wrong
*/
@Override
public int perform(DSpaceObject dso) throws IOException
{
// The results that we'll return
StringBuilder results = new StringBuilder();
// Unless this is an item, we'll skip this item
status = Curator.CURATE_SKIP;
if (dso instanceof Item)
{
Item item = (Item)dso;
// Get the URLs
List<String> urls = getURLs(item);
// Assume skip until we hit a URL to check
status = Curator.CURATE_SKIP;
results.append("Item: ").append(getItemHandle(item)).append("\n");
// Check the URLs
for (String url : urls)
{
boolean ok = checkURL(url, results);
if(ok)
{
status = Curator.CURATE_SUCCESS;
}
else
{
status = Curator.CURATE_FAIL;
}
}
}
setResult(results.toString());
report(results.toString());
return status;
}
/**
* Get the URLs to check
*
* @param item The item to extract URLs from
* @return An array of URL Strings
*/
protected List<String> getURLs(Item item)
{
// Get URIs from anyschema.anyelement.uri.*
DCValue[] urls = item.getMetadata(Item.ANY, Item.ANY, "uri", Item.ANY);
ArrayList<String> theURLs = new ArrayList<String>();
for (DCValue url : urls)
{
theURLs.add(url.value);
}
return theURLs;
}
/**
* Check the URL and perform appropriate reporting
*
* @param url The URL to check
* @return If the URL was OK or not
*/
protected boolean checkURL(String url, StringBuilder results)
{
// Link check the URL
int httpStatus = getResponseStatus(url);
if ((httpStatus >= 200) && (httpStatus < 300))
{
results.append(" - " + url + " = " + httpStatus + " - OK\n");
return true;
}
else
{
results.append(" - " + url + " = " + httpStatus + " - FAILED\n");
return false;
}
}
/**
* Get the response code for a URL. If something goes wrong opening the URL, a
* response code of 0 is returned.
*
* @param url The url to open
* @return The HTTP response code (e.g. 200 / 301 / 404 / 500)
*/
protected int getResponseStatus(String url)
{
try
{
URL theURL = new URL(url);
HttpURLConnection connection = (HttpURLConnection)theURL.openConnection();
int code = connection.getResponseCode();
connection.disconnect();
return code;
} catch (IOException ioe)
{
// Must be a bad URL
log.debug("Bad link: " + ioe.getMessage());
return 0;
}
}
/**
* Internal utitity method to get a description of the handle
*
* @param item The item to get a description of
* @return The handle, or in workflow
*/
private static String getItemHandle(Item item)
{
String handle = item.getHandle();
return (handle != null) ? handle: " in workflow";
}
}