package org.agnitas.service;
import java.io.IOException;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.Vector;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.log4j.Logger;
public class LinkcheckWorker implements Runnable {
private static final transient Logger logger = Logger.getLogger( LinkcheckWorker.class);
int timeout = 0; // default, no timeout.
// contains a list with links. if the given URL is valid and reachable,
// the given link will be removed from the link-list. Vector because its thread-save.
Vector<String> links;
String linkToCheck = null;
/**
* @param timeout
* @param links
* @param linkToCheck
*
*/
public LinkcheckWorker(int timeout, Vector<String> links, String linkToCheck) {
this.timeout = timeout;
this.links = links;
this.linkToCheck = linkToCheck;
}
@Override
public void run() {
boolean failure = false;
boolean dynamic = false;
// check if the link has dynamic content.
dynamic = dynamicLinkCheck();
if (dynamic) {
if( logger.isInfoEnabled()) {
logger.info( "Link is dynamic - no checking for: " + linkToCheck);
}
failure = false;
} else {
failure = netBasedTest();
}
// remove working link from failure-list
if (!failure) {
if( logger.isInfoEnabled()) {
logger.info( "Link is working: " + linkToCheck);
}
links.remove(linkToCheck);
}
}
/**
* this method checks, if the given link contains dynamic content like ##AGNUID##
* if thats the case, we wont check the link anymore.
* If failure = true, the link is dynamic and has to be removed. "failure" here means dynamic.
* @return
*/
private boolean dynamicLinkCheck() {
boolean dynamic = false;
Pattern pattern = Pattern.compile ("##([^#]+)##");
Matcher aMatch = pattern.matcher(linkToCheck);
if (aMatch.find() ) {
// found dynamic content
return true;
}
return dynamic;
}
/**
* this method checks, if the given link works. It gets a real connection
* to the given server and tries to fetch some answers.
* @param failure
* @return
*/
private boolean netBasedTest() {
boolean failure = false;
URL url;
try {
if( logger.isInfoEnabled()) {
logger.info( "Checking link: " + linkToCheck);
}
url = new URL(linkToCheck); // just for checking, we could use the plain String...
HttpClient client = new HttpClient();
// create get-method.
GetMethod get = new GetMethod(url.toString());
get.getParams().setParameter("http.socket.timeout", new Integer(timeout));
get.getParams().setParameter("User-Agent", "Mozilla/5.0 (Windows; U; Windows NT 6.0; en-US; rv:1.9.2.2) Gecko/20100316 Firefox/3.6.2 (.NET CLR 3.5.30729)");
// lets start working...
client.executeMethod(get);
// check response code
if (get.getStatusCode() == HttpURLConnection.HTTP_NOT_FOUND) {
failure = true;
}
}
catch (MalformedURLException e) {
if( logger.isInfoEnabled()) {
logger.info( "Link URL malformed: " + linkToCheck); // This is no "real error", this is a test result for the link. So we can log this at INFO leven
}
failure = true;
}
catch (UnknownHostException e) {
if( logger.isInfoEnabled()) {
logger.info( "Unknown host: " + linkToCheck); // This is no "real error", this is a test result for the link. So we can log this at INFO leven
}
failure = true;
}
catch (IOException e1) {
logger.warn( "I/O error testing URL: " + linkToCheck, e1); // This is no "real error", this is a test result for the link. Since this could be any IO problem, let us report this at WARN level
// some other connection problem, but link was found, so don't add it to invalid links.
// invalidlinks.add(fullUrl);
}
return failure;
}
}