/*
* Copyright 2011 cruxframework.org.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package org.cruxframework.crux.core.server.crawling;
import java.io.IOException;
import java.io.InputStream;
import java.net.URLDecoder;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.cruxframework.crux.core.server.Environment;
import org.cruxframework.crux.core.server.http.GZIPResponseWrapper;
import org.cruxframework.crux.core.utils.StreamUtils;
/**
* Filter to serve search engines, sending static snapshots in the place of DHTML based pages.
* See this {@link https://developers.google.com/webmasters/ajax-crawling/}
*
* The filter also ensure that the responses uses gzip compression and configure a cache for the snapshots.
*
* @author Thiago da Rosa de Bustamante
*
*/
public final class CrawlingFilter implements Filter
{
private static final String ACCEPT_ENCODING = "accept-encoding";
private static final int EXPIRES_DELTA = 86400000; // One day
private static final Log logger = LogFactory.getLog(CrawlingFilter.class);
private FilterConfig config;
private String defaultSnaphot;
private String baseFolder;
/**
*
*/
public void doFilter(ServletRequest req, ServletResponse res, FilterChain chain) throws IOException
{
try
{
if (!Environment.isProduction())
{
chain.doFilter(req, res);
return;
}
HttpServletRequest request = (HttpServletRequest) req;
HttpServletResponse response;
String ae = request.getHeader(ACCEPT_ENCODING);
boolean gzipped = false;
if (ae != null && ae.indexOf("gzip") != -1)
{
response = new GZIPResponseWrapper((HttpServletResponse) res);
gzipped = true;
}
else
{
response = (HttpServletResponse)res;
}
response.setContentType("text/html");
response.setCharacterEncoding("UTF-8");
String escapedFragmentEncoded = request.getParameter("_escaped_fragment_");
if (escapedFragmentEncoded != null)
{
if (logger.isInfoEnabled())
{
logger.info("A Snapshot for an application page was requested.");
}
if (escapedFragmentEncoded.length() == 0 && defaultSnaphot != null && defaultSnaphot.length() > 0)
{
escapedFragmentEncoded=defaultSnaphot;
}
String escapedFragment = URLDecoder.decode(escapedFragmentEncoded, "UTF-8");
String page = getRequestedPage(request);
String pagePath = CrawlingUtils.getStaticPageFor(page, escapedFragment);
if (pagePath != null && pagePath.length() >0)
{
if (StringUtils.isNotBlank(baseFolder))
{
pagePath = baseFolder + "/" + pagePath;
}
InputStream in = Thread.currentThread().getContextClassLoader().getResourceAsStream(pagePath);
if (in == null)
{
logger.error("snapshot for requested page ["+pagePath+"] not found.");
}
else
{
try
{
StreamUtils.write(in, response.getOutputStream(), true);
if (logger.isInfoEnabled())
{
logger.info("Snapshot for page ["+pagePath+"] was sent.");
}
return;
}
catch (IOException e)
{
logger.error("Error reading requested page ["+pagePath+"].", e);
}
}
}
else
{
logger.error("A snapshot was requested, but it is not possible to realize the target page.");
}
}
long current = System.currentTimeMillis();
long expires = current + EXPIRES_DELTA;
HttpServletResponse httpResponse = ((HttpServletResponse)response);
httpResponse.addDateHeader("Expires", expires);
httpResponse.addDateHeader("Last-Modified", current);
response.addHeader("Cache-Control", "public, max-age="+(EXPIRES_DELTA/1000));// seconds
chain.doFilter(request, response);
if (gzipped)
{
((GZIPResponseWrapper)response).finishResponse();
}
}
catch (ServletException e)
{
logger.error("Error processing request", e);
}
}
/**
*
* @param req
* @return
*/
protected String getRequestedPage(ServletRequest req)
{
HttpServletRequest request = (HttpServletRequest) req;
String result = request.getPathInfo();
if (result == null)
{
result = request.getRequestURI();
}
if (result != null && result.length() > 0)
{
if (result.endsWith(".html") && !result.endsWith("hosted.html") && !result.endsWith("cache.html"))
{
if (result.startsWith("/"))
{
result = result.substring(1);
}
String contextPath = config.getServletContext().getContextPath();
if (contextPath != null && contextPath.startsWith("/"))
{
contextPath = contextPath.substring(1);
}
if (StringUtils.isNotBlank(contextPath) && result.startsWith(contextPath))
{
result = StringUtils.removeStart(result, contextPath);
}
if (result.startsWith("/"))
{
result = result.substring(1);
}
}
else
{
result = null;
}
}
else
{
result = null;
}
return result;
}
/**
* @param input
* @param prefix
* @return
*/
protected String removeStringPrefix(String input, String prefix)
{
if (prefix.startsWith("/"))
{
prefix = prefix.substring(1);
}
if (input.startsWith(prefix))
{
input = input.substring(prefix.length());
}
if (input.startsWith("/"))
{
input = input.substring(1);
}
return input;
}
@Override
public void init(FilterConfig config) throws ServletException
{
this.config = config;
this.defaultSnaphot = config.getInitParameter("defaultSnaphot");
this.baseFolder = config.getInitParameter("baseFolder");
if (baseFolder != null && baseFolder.startsWith("/"))
{
baseFolder = baseFolder.substring(1);
}
}
@Override
public void destroy()
{
}
}