/*
* Copyright (2006-2012) Schibsted ASA
* This file is part of Possom.
*
* Possom is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Possom is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with Possom. If not, see <http://www.gnu.org/licenses/>.
*/
package no.sesat.search.http.servlet;
import java.io.UnsupportedEncodingException;
import java.util.logging.Level;
import javax.servlet.http.HttpServlet;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import javax.servlet.ServletException;
import java.io.IOException;
import java.net.URLDecoder;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import no.sesat.search.datamodel.DataModel;
import no.sesat.search.datamodel.generic.StringDataObject;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.log4j.Logger;
/**
* Provides the user-statistics logging in Possom.
* Links are logged with <b>ceremonial</b> boomerangs that come back (ie with a redirect response).
* Javascript functionality (or user behavour) is logged with <b>hunting</b> boomerangs that do not come back.
*
* A cermonial example is:
* http://sesam.no/boomerang/category=results;subcategory=main/http://wever.org
*
* A hunting example is:
* http://sesam.no/hunting/?parameter-list
*
* @version <tt>$Id: 3361 $</tt>
*
*/
public final class BoomerangServlet extends HttpServlet {
private static final Logger LOG = Logger.getLogger(BoomerangServlet.class);
private static final Logger ACCESS = Logger.getLogger("no.sesat.Access");
private static final String CEREMONIAL = "/boomerang/";
private static final Pattern ROBOTS = Pattern.compile("(Googlebot|Slurp|Crawler|Bot)", Pattern.CASE_INSENSITIVE);
@Override
public void destroy() { }
@Override
public void init() { }
@Override
protected void doGet(final HttpServletRequest req, final HttpServletResponse res)
throws ServletException, IOException {
// clients must not cache these requests
res.setHeader("Cache-Control", "no-cache, must-revalidate, post-check=0, pre-check=0");
res.setHeader("Pragma", "no-cache"); // for old browsers
res.setDateHeader("Expires", 0); // to be double-safe
// entrails is the map of logging information
final Map<String,Object> entrails = new HashMap<String,Object>();
// request attribute to keep
entrails.put("referer", req.getHeader("Referer"));
entrails.put("method", req.getMethod());
entrails.put("ipaddress", req.getRemoteAddr());
entrails.put("user-agent", req.getHeader("User-Agent"));
entrails.put("user-id", SearchServlet.getCookieValue(req, "SesamID"));
entrails.put("user", SearchServlet.getCookieValue(req, "SesamUser"));
if(req.getRequestURI().startsWith(CEREMONIAL)){
// ceremonial boomerang
final StringBuffer url = req.getRequestURL();
if(null != req.getQueryString()){
url.append('?' + req.getQueryString());
}
// pick out the entrails
final int boomerangStart = url.indexOf(CEREMONIAL) + CEREMONIAL.length();
try{
final String grub = url.substring(boomerangStart, url.indexOf("/", boomerangStart));
LOG.debug(grub);
// the url to return to
final String destination = url.substring(
url.indexOf("/", url.indexOf(CEREMONIAL) + CEREMONIAL.length() + 1) + 1);
// the grub details to add
if(0 < grub.length()){
final StringTokenizer tokeniser = new StringTokenizer(grub, ";");
while(tokeniser.hasMoreTokens()){
final String[] entry = tokeniser.nextToken().split("=");
entrails.put(entry[0], 1 < entry.length ? entry[1] : entry[0]);
}
}
entrails.put("boomerang", destination);
kangerooGrub(entrails);
LOG.debug("Ceremonial boomerang to " + destination.toString());
if(ROBOTS.matcher(req.getHeader("User-agent")).find()){
// robots like permanent redirects. and we're not interested in their clicks so ok to cache.
res.setStatus(HttpServletResponse.SC_MOVED_PERMANENTLY);
res.setHeader("Location", destination.toString());
res.setHeader("Connection", "close");
}else{
// default behaviour for users.
res.sendRedirect(destination.toString());
}
}catch(StringIndexOutOfBoundsException sioobe){
// SEARCH-4668
LOG.error("Boomerang url not to standard --> " + url);
LOG.debug(sioobe.getMessage(), sioobe);
}
}else{
// hunting boomerang, just grub, and the grub comes as clean parameters.
final DataModel datamodel = (DataModel) req.getSession().getAttribute(DataModel.KEY);
entrails.putAll(datamodel.getParameters().getValues());
kangerooGrub(entrails);
}
}
private void kangerooGrub(final Map<String,?> params){
final List<String> paramKeys = new ArrayList<String>(params.keySet());
Collections.sort(paramKeys);
final StringBuilder bob = new StringBuilder("<boomerang>");
for(String key : paramKeys){
try {
final String value = params.get(key) instanceof StringDataObject
? ((StringDataObject) params.get(key)).getXmlEscaped()
: StringEscapeUtils.escapeXml((String) params.get(key));
final String keyEscaped = StringEscapeUtils.escapeXml(URLDecoder.decode(key, "UTF-8"));
bob.append("<parameter key=\"" + keyEscaped + "\" value=\"" + value + "\"/>");
}catch (UnsupportedEncodingException ex) {
LOG.error("Failed to kangerooGrub " + key, ex);
}
}
bob.append("</boomerang>");
ACCESS.info(bob.toString());
}
}