package com.netifera.platform.net.http.tools;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import com.netifera.platform.api.probe.IProbe;
import com.netifera.platform.api.tools.ITool;
import com.netifera.platform.api.tools.IToolContext;
import com.netifera.platform.api.tools.ToolException;
import com.netifera.platform.net.http.internal.tools.Activator;
import com.netifera.platform.net.http.service.HTTP;
import com.netifera.platform.tools.RequiredOptionMissingException;
public class WebCrawler implements ITool {
private IToolContext context;
private HTTP http;
private URI base;
public void toolRun(IToolContext context) throws ToolException {
this.context = context;
context.setTitle("Web crawler");
setupToolOptions();
// XXX hardcode local probe as realm
IProbe probe = Activator.getInstance().getProbeManager().getLocalProbe();
long realm = probe.getEntity().getId();
String host = base.getHost();
if (host != null && host.compareTo(http.getURIHost()) == 0) {
context.setTitle("Crawl "+base);
} else {
context.setTitle("Crawl "+base+" at "+http.getLocator());
}
try {
WebSpider spider = new WebSpider(http);
spider.setContext(context);
spider.setRealm(realm);
spider.setBaseURL(base);
spider.addURL(base);
spider.addURL(base.resolve("/favicon.ico"));
if (context.getConfiguration().get("followLinks") != null)
spider.setFollowLinks((Boolean)context.getConfiguration().get("followLinks"));
if (context.getConfiguration().get("fetchImages") != null)
spider.setFetchImages((Boolean)context.getConfiguration().get("fetchImages"));
if (context.getConfiguration().get("scanWebApplications") != null)
if ((Boolean)context.getConfiguration().get("scanWebApplications"))
for (String url: Activator.getInstance().getWebApplicationDetector().getTriggers())
spider.addURL(base.resolve(url));
if (context.getConfiguration().get("maximumConnections") != null)
spider.setMaximumConnections((Integer)context.getConfiguration().get("maximumConnections"));
spider.run();
} catch (IOException e) {
context.exception("I/O error: " + e.getMessage(), e);
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
context.warning("Interrupted");
} finally {
context.done();
}
}
private void setupToolOptions() throws ToolException {
http = (HTTP) context.getConfiguration().get("target");
String url = (String) context.getConfiguration().get("url");
if (url == null)
throw new RequiredOptionMissingException("url");
if (url.length() == 0)
throw new ToolException("Empty URL parameter");
// if no port
if (url.startsWith("/")) {
url = http.getURIHostPort() + url;
}
if (!url.contains("/")) {
url += '/';
}
// if no protocol
if (!url.startsWith("http")) {
//url = http.getURIScheme() + "://" + url;
url = "http://"+url;
}
url = url.replaceAll(" ", "%20"); // TODO escape more
try {
base = new URI(url);
} catch (URISyntaxException e) {
throw new ToolException("Malformed URL parameter: ", e);
}
base = base.normalize();
}
}