package net.jforum.util.legacy.clickstream;
import java.util.Iterator;
import java.util.List;
import javax.servlet.http.HttpServletRequest;
import net.jforum.util.legacy.clickstream.config.ClickstreamConfig;
import net.jforum.util.legacy.clickstream.config.ConfigLoader;
/**
* Determines if a request is actually a bot or spider.
*
* @author <a href="plightbo@hotmail.com">Patrick Lightbody</a>
* @author Rafael Steil (little hacks for JForum)
* @version $Id: BotChecker.java,v 1.6 2005/12/18 02:12:54 rafaelsteil Exp $
*/
public class BotChecker
{
/**
* Checks if we have a bot
* @param request the request
* @return <code>null</code> if there is no bots in the current request,
* or the bot's name otherwise
*/
public static String isBot(HttpServletRequest request)
{
if (request.getRequestURI().indexOf("robots.txt") != -1) {
// there is a specific request for the robots.txt file, so we assume
// it must be a robot (only robots request robots.txt)
return "Unknown (asked for robots.txt)";
}
String userAgent = request.getHeader("User-Agent");
ClickstreamConfig config = ConfigLoader.instance().getConfig();
if (userAgent != null && config != null) {
List agents = config.getBotAgents();
userAgent = userAgent.toLowerCase();
for (Iterator iterator = agents.iterator(); iterator.hasNext(); ) {
String agent = (String) iterator.next();
if (agent == null) {
continue;
}
if (userAgent.indexOf(agent) != -1) {
return userAgent;
}
}
}
String remoteHost = request.getRemoteHost(); // requires a DNS lookup
if (remoteHost != null && remoteHost.length() > 0 && remoteHost.charAt(remoteHost.length() - 1) > 64) {
List hosts = config.getBotHosts();
remoteHost = remoteHost.toLowerCase();
for (Iterator iterator = hosts.iterator(); iterator.hasNext(); ) {
String host = (String) iterator.next();
if (host == null) {
continue;
}
if (remoteHost.indexOf(host) != -1) {
return remoteHost;
}
}
}
return null;
}
}