package com.kkazmierczyk.freestyle;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import javax.persistence.EntityManagerFactory;
import javax.persistence.Persistence;
import javax.servlet.ServletException;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.log4j.Logger;
/**
*
* This servlet logs any visit on the page and saves to database if visit is
* caused by bot
*
* @version $Id: MainServlet.java 39 2008-06-04 07:48:40Z kazik $
*
*/
public class MainServlet extends HttpServlet {
/** Comment to have bots.xt.pl page */
private static final String COMMENT_TO_ALIAS = "<!-- "
+ "XT:gBTFwZEpwhPakNe27iaiTh6X6R0iLspczqsEohuZZeqwadcCYVL2WXoWbfurtHu4 "
+ "-->";
/** Counts number of visits on web site */
private static Long counter = 0L;
public static final String DOCTYPE = "<!DOCTYPE html PUBLIC "
+ "\"-//W3C//DTD HTML 4.01 Transitional//EN\" "
+ "\"http://www.w3.org/TR/1999/REC-html401-19991224/loose.dtd\">";
/** Code to add this page to google analitics */
private static final String GOOGLE_FOLLOW_CODE = "<script type=\"text/javascript\">"
+ "var gaJsHost = ((\"https:\" == document.location.protocol) ? "
+ "\"https://ssl.\" : \"http://www.\");"
+ "document.write(unescape(\"%3Cscript src='\" + gaJsHost + "
+ "\"google-analytics.com/ga.js' "
+ "type='text/javascript'%3E%3C/script%3E\"));"
+ "</script>"
+ "<script type=\"text/javascript\">"
+ "var pageTracker = _gat._getTracker(\"UA-4545823-1\");"
+ "pageTracker._initData();"
+ "pageTracker._trackPageview();"
+ "</script>";
/**
* Number of links on each page. Retrieved from application context context
*/
private static int LINKS_NO;
/** Logger instance. */
private final static Logger logger = Logger.getLogger(MainServlet.class);
/** Number of visits of each page */
private static Map<String, Integer> pagesVisits = new HashMap<String, Integer>();
/** Used in jpa. */
private static final String PERSISTENT_UNIT_NAME =
"com.kkazmierczyk.freestyle";
/** One level up in the tree. */
private static final String UP = "up";
/** Increases value of counter and returns its new value */
private static Long increaseCounter() {
synchronized (counter) {
return ++counter;
}
}
private final Random random = new Random();
private static final int MAX_RANDOM_TEXT_LENGTH = 127;
private static final String charset = "1AaBb2CcDdEe3FfGgHh4IiJjKk5LlMmNn6OoPp7QqRr8SsTt9UuVvWw0XxYyZz ";
/** Returns random squence of chars */
private String getRandomText() {
//I do not need to synchronize
int length = Math.abs(random.nextInt() % MAX_RANDOM_TEXT_LENGTH);
char[] result = new char[length];
for (int i = 0; i < result.length; i++) {
short pos = (short) Math.abs(random.nextInt() % charset.length());
result[i] = charset.charAt(pos);
}
return new String(result);
}
/** Increase number of visits of each page and returns this number */
private static int increaseVisit(String path) {
synchronized (pagesVisits) {
Integer visitsNO = pagesVisits.get(path);
if (visitsNO == null) {
visitsNO = 0;
}
pagesVisits.put(path, ++visitsNO);
return visitsNO;
}
}
/** List of bots */
private Bots bots;
private EntityManagerFactory emf;
private void countBot(Map<String, Integer> numberOfVisits, String botName) {
Integer temp = numberOfVisits.get(botName);
if (temp == null) {
numberOfVisits.put(botName, 1);
} else {
numberOfVisits.put(botName, ++temp);
}
}
/**
* Perform servlet shutdown.
*/
@Override
public void destroy() {
emf.close();
logger.info("Servlet destroyed.");
super.destroy();
}
/**
* Serves HTTP GET request.
*/
@Override
protected void doGet(HttpServletRequest request,
HttpServletResponse response) throws ServletException, IOException {
String path = request.getPathInfo().substring(1); // without trailing
// "/"
String context = request.getContextPath();
if (validPath(path)) {
String agent = request.getHeader("User-Agent");
logger.info("Path: " + path + " visited by " + agent);
if (bots.getBotName(agent) != null) {
DataManager.saveVisitInDatabase(emf, agent, path);
}
response.setHeader("Content-type", "text/html; charset=utf-8");
char linkName = 'a';
final Writer writer = new OutputStreamWriter(response
.getOutputStream(), "utf-8");
writer.write(DOCTYPE + "<html>" + COMMENT_TO_ALIAS + "<head>"
+ "<meta http-equiv=\"content-type\" content=\"utf-8\">"
+ "<meta name=\"robots\" content=\"all\">" + "<title>"
+ path + "</title>" + "</head><body>");
writer.write("<h1> Welcome on follow spy bot web page </h1>");
writer.write("<p>This server has been visited " + increaseCounter()
+ " times</p>");
writer.write("<p>This site has been visited " + increaseVisit(path)
+ " times</p>");
writer
.write("<p>Current date: " + (new Date()).toString()
+ "</p>");
writer.write("<p>Site name context: " + path + "</p>");
writer.write("<p>Random sequence of text: " + getRandomText() + "</p>");
// Writing links
writer.write("<h2>Links:</h2>");
for (int i = 0; i < LINKS_NO; i++) {
writer.write("<a href=\"" + context + "/" + path + linkName
+ "\">" + linkName + "</a> ");
linkName++;
}
// Writing log
writeLog(DataManager.getPageLog(emf, path), writer);
writer.write("<p>");
if (path.length() > 0) {
// writing up page
writer.write("<a href=\"" + context + "/"
+ path.substring(0, path.length() - 1) + "\">" + UP
+ "</a><br>");
} else {
// writing links to make easier to find this pages by bots
writer
.write("<h2> Other interesting sites: </h2>"
+ "<a href=\"http://www.google.com\">Google web page</a><br>"
+ "<a href=\"http://www.yahoo.com\">Yahoo web page</a><br>"
+ "<a href=\"http://www.msn.com\">MSN web page</a><br>");
}
writer.write(GOOGLE_FOLLOW_CODE + "</body></head>");
writer.close();
} else {
response.sendError(HttpServletResponse.SC_NOT_FOUND,
"Page not exist");
}
}
/**
* Initialize the servlet (if needed).
*/
@Override
public void init() throws ServletException {
super.init();
LINKS_NO = Integer.parseInt((String) getServletContext()
.getInitParameter("linksNO"));
try {
emf = Persistence.createEntityManagerFactory(PERSISTENT_UNIT_NAME);
getServletContext().setAttribute("emf", emf);
bots = new Bots();
getServletContext().setAttribute("bots", bots);
logger.info("Servlet initialized.");
} catch (Exception e) {
logger.fatal("Could not create entity manager factory", e);
throw new RuntimeException(e);
}
}
/**
* Checks if given path is valid (Contains only characters which can be
* names of links
*/
private boolean validPath(String path) {
final char lastChar = (char) ('a' + LINKS_NO - 1);
for (int i = 0; i < path.length(); i++) {
char processed = path.charAt(i);
if (processed < 'a' || processed > lastChar) {
return false;
}
}
return true;
}
private void writeLog(Collection<LoggedRequest> pageLog, Writer writer)
throws IOException {
final Map<String, Integer> numberOfBots = new HashMap<String, Integer>(
3);
StringBuilder log = new StringBuilder();
log.append("<h2>Visits of robots</h2>");
log.append("<table>");
for (LoggedRequest loggedRequest : pageLog) {
String botName = bots.getBotName(loggedRequest.getAgent());
if (botName != null) {
log.append("<tr>");
log.append("<td>").append(loggedRequest.getDate().toString())
.append("</td>");
log.append("<td>").append(botName).append("</td>");
log.append("</tr>");
countBot(numberOfBots, botName);
}
}
log.append("</table>");
writer.write("<h2>Number of visits of each bot</h2>");
writer.write(writeNumberOfVisits(numberOfBots).toString());
writer.write(log.toString());
}
/**
* Writes number of visits of each bot
*
* @return String builder with printed table of results
*/
private StringBuilder writeNumberOfVisits(Map<String, Integer> numberOfBots)
throws IOException {
StringBuilder result = new StringBuilder();
result.append("<table>");
/*
* numberOfBots never will be empty so does not need to write tr
* attribute inside table to make it always valid
*/
for (String bot : numberOfBots.keySet()) {
result.append("<tr><td>" + bot + "</td><td>" + numberOfBots.get(bot)
+ "</td></tr>");
}
result.append("</table>");
return result;
}
}