/* vim: set ts=2 et sw=2 cindent fo=qroca: */
package com.globant.katari.core.web;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.PrintWriter;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import javax.servlet.Filter;
import javax.servlet.FilterChain;
import javax.servlet.FilterConfig;
import javax.servlet.ServletException;
import javax.servlet.ServletRequest;
import javax.servlet.ServletResponse;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.LineIterator;
import org.apache.commons.lang.StringEscapeUtils;
import org.apache.commons.lang.Validate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.tidy.Tidy;
import org.w3c.tidy.TidyMessage;
import org.w3c.tidy.TidyMessageListener;
import org.w3c.tidy.Report;
/** Filter that passes all generated html through jtidy, an html validator.
*/
public class HtmlValidationFilter implements Filter {
/** The class logger.
*/
private static Logger log =
LoggerFactory.getLogger(HtmlValidationFilter.class);
/** Defines if the filter performs validation or not.
*
* The filter is enabled by default.
*/
private boolean enabled = true;
/** The list of url pattern to ignore, some frameworks or custom pages can
* conflict with validation, by default it is an empty list.
*/
private List<String> ignoredUrlpatterns = Collections.emptyList();
/** The list of element attribute patterns to ignore.
*
* Some frameworks or custom pages may need to introduce non valid markup
* (for example, data- attributes). By default it is an empty list.
*/
private List<String> ignoredAttributePatterns = Collections.emptyList();
/** A response wrapper that provides access to the data submitted to the
* client.
*/
private static class ResponseWrapper extends ServletOutputInterceptor {
/** The output stream that holds the data that has been sent to the client.
*
* It is never null.
*/
private ByteArrayOutputStream output = new ByteArrayOutputStream();
/** Constructor.
*
* @param response the wrapped response.
*/
public ResponseWrapper(final HttpServletResponse response) {
super(response, false);
}
/** Returns the generated response as a byte array.
*
* @return the byte array of the generated response, never null.
*/
public byte[] toByteArray() {
return output.toByteArray();
}
/** Prepares the content to be shown in an html page, escaping html tags,
* adding line numbers and line breaks.
*
* @return the formatted content, never null.
*/
public String getFormattedContent() {
StringBuilder result = new StringBuilder(output.size());
try {
InputStream capturedOutput;
capturedOutput = new ByteArrayInputStream(output.toByteArray());
LineIterator lines;
lines = IOUtils.lineIterator(capturedOutput, getCharacterEncoding());
try {
int lineNumber = 0;
while (lines.hasNext()) {
String line = lines.nextLine();
lineNumber ++;
/// do something with line
result.append(String.valueOf(lineNumber));
result.append(": ");
result.append(StringEscapeUtils.escapeHtml(line));
result.append("<br>");
}
} finally {
LineIterator.closeQuietly(lines);
}
} catch (IOException e) {
throw new RuntimeException("Error reading output", e);
}
return result.toString();
}
/** {@inheritDoc}
*/
protected OutputStream createOutputStream() {
return output;
}
};
/** Listens to the validation errors.
*/
private static final class ErrorListener implements TidyMessageListener {
/** The list of errors received from the validator.
*
* It is never null.
*/
private List<TidyMessage> errors = new LinkedList<TidyMessage>();
/** A list of attribute regular expressions to ignore.
*/
private List<String> ignoredAttributePatterns = Collections.emptyList();
/** Builds an ErrorListener.
*
* If any attribute matches one of the regex in ignoreValidatorAttribute,
* that attribute will not generate an error if invalid.
*
* @param theIgnoredAttributepatterns a list of patterns to match agains
* attributes to ignored.
*/
private ErrorListener(final List<String> theIgnoredAttributepatterns) {
ignoredAttributePatterns = theIgnoredAttributepatterns;
}
/** Called by tidy when a warning or error occurs.
*
* It skips errors that include an attribute that matches one of the
* ignoredAttributePatterns.
*
* @param message The error/warning message. It cannot be null.
*/
public void messageReceived(final TidyMessage message) {
log.trace("Entering messageReceived()");
Validate.notNull(message, "The message cannot be null.");
// Check if the error message corresponds to one of the attribute
// regexes.
for (String pattern : ignoredAttributePatterns) {
log.debug("Checking if attribute in {} matches {}.",
message.getMessage(), pattern);
String regex = ".*\"" + pattern + "\".*";
if (message.getMessage().matches(regex)
&& message.getErrorCode() == Report.UNKNOWN_ATTRIBUTE) {
// Just skip the tidy error.
log.trace("Leaving messageReceived() - skipped attribute");
return;
}
}
errors.add(message);
log.trace("Leaving messageReceived()");
}
/** Indicates if the filter found validation errors.
*
* @return true if there were validation errors.
*/
public boolean hasErrors() {
return !errors.isEmpty();
}
/** Formats all the errors received into a string.
*
* This operation creates a string with lines separated by \n, each line of
* the form:
*
* line 247 column 10 - Warning: unknown attribute "validator"
*
* It must only be called if there where errors, ie, hasErrors returns
* true.
*
* @return The errors as a string, never returns null.
*/
public String getErrorMessage() {
Validate.notEmpty(errors);
StringBuilder output = new StringBuilder();
for (TidyMessage message : errors) {
output.append("line ").append(message.getLine());
output.append(" column ").append(message.getColumn());
output.append(" - ").append(message.getLevel());
output.append("(").append(message.getErrorCode());
output.append("): ");
output.append(StringEscapeUtils.escapeHtml(message.getMessage()));
output.append("<br>\r");
}
return output.toString();
}
};
/** {@inheritDoc}
*
* It currently does nothing.
*/
public void init(final FilterConfig filterConfig) throws ServletException {
log.trace("Entering init");
// Do nothing.
log.trace("Leaving init");
}
/** {@inheritDoc}
*
* Validates that the output is valid html and throws a ServletException if
* not.
*
* It only processes text/html files.
*/
public void doFilter(final ServletRequest request, final ServletResponse
response, final FilterChain chain) throws IOException,
ServletException {
log.trace("Entering doFilter.");
if (!(response instanceof HttpServletResponse)) {
throw new ServletException(
"This filter can only be applied to http requests.");
}
if (!(request instanceof HttpServletRequest)) {
throw new ServletException(
"This filter can only be applied to http requests.");
}
HttpServletResponse httpResponse = (HttpServletResponse) response;
String requestUri = ((HttpServletRequest) request).getRequestURI();
boolean ignored = false;
Iterator<String> iterator = ignoredUrlpatterns.iterator();
while (iterator.hasNext() && !ignored) {
String regexp = iterator.next();
log.debug("Checking if {} matches {}.", requestUri, regexp);
ignored = requestUri.matches(regexp);
}
if (enabled && !ignored) {
log.debug("Checking {} for validation errors.", requestUri);
ResponseWrapper wrapper = new ResponseWrapper(httpResponse);
chain.doFilter(request, wrapper);
wrapper.flushBuffer();
String contentType = httpResponse.getContentType();
if (contentType != null && contentType.startsWith("text/html")) {
Tidy tidy = new Tidy();
tidy.setQuiet(true);
// Set the error output and ignore it.
tidy.setErrout(new PrintWriter(new ByteArrayOutputStream()));
ErrorListener errors = new ErrorListener(ignoredAttributePatterns);
tidy.setMessageListener(errors);
InputStream inputStream;
inputStream = new ByteArrayInputStream(wrapper.toByteArray());
// We ignore the output.
tidy.parse(inputStream, new ByteArrayOutputStream());
if (errors.hasErrors()) {
// jtidy found an error. Log it with the page to make it easier to
// trace.
//
// TODO This is using a non localized string conversion.
String message = "There where validation errors for "
+ requestUri + ":<br>\r"
+ errors.getErrorMessage() + "<br>\r"
+ "The html output was:<br>\r<pre>"
+ new String(wrapper.getFormattedContent())
+ "</pre>";
log.debug(message);
httpResponse.setStatus(500);
PrintWriter out = httpResponse.getWriter();
out.print("<html><head><title>Validation error</title></head>");
out.print("<body style='font-family: monospace;'/>");
out.print(message);
out.print("</body></html>");
} else {
// No error, send the response to the client.
log.debug("No errors found.");
response.getOutputStream().write(wrapper.toByteArray());
}
} else {
// Unknown content type, send the response to the client.
log.debug("Skipping validation because it is not text/html");
response.getOutputStream().write(wrapper.toByteArray());
}
} else {
chain.doFilter(request, response);
}
log.trace("Leaving doFilter.");
}
/** Called by the container when the filter is about to be destroyed.
*
* This implementation is empty.
*/
public void destroy() {
log.trace("Entering destroy");
// Do nothing.
log.trace("Leaving destroy");
}
/** Enables or disables the validation.
*
* @param isEnabled if true, it enables the filter, if false, it disables it.
*/
public void setEnabled(final boolean isEnabled) {
enabled = isEnabled;
}
/** Returns true if validation is enabled.
*
* @return if html validation is enabled.
*/
public boolean isEnabled() {
return enabled;
}
/** Configures the list of patterns for the urls that should be ignored on
* the validation process.
*
* @param theIgnoredUrlpatterns the list of url patterns, it cannot be null.
*/
public void setIgnoredUrlpatterns(final List<String> theIgnoredUrlpatterns) {
Validate.notNull(theIgnoredUrlpatterns, "The pattern list cannot be null.");
ignoredUrlpatterns = theIgnoredUrlpatterns;
}
/** Configures the list of patterns for the attributes that should be ignored
* on the validation process.
*
* @param patterns the list of attribute patterns, it cannot be null.
*/
public void setIgnoredAttributePatterns(final List<String> patterns) {
Validate.notNull(patterns, "The pattern list cannot be null.");
ignoredAttributePatterns = patterns;
}
}