/*
* Copyright (c) 2007-2015 Mozilla Foundation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
package nu.validator.servlet;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.StringReader;
import java.io.Writer;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import nu.validator.gnu.xml.aelfred2.SAXDriver;
import nu.validator.htmlparser.common.Heuristics;
import nu.validator.htmlparser.common.XmlViolationPolicy;
import nu.validator.io.BoundedInputStream;
import nu.validator.io.StreamBoundException;
import nu.validator.xml.ContentTypeParser;
import nu.validator.xml.NullEntityResolver;
import nu.validator.xml.PrudentHttpEntityResolver;
import nu.validator.xml.TypedInputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import io.mola.galimatias.URL;
import io.mola.galimatias.GalimatiasParseException;
public class ParseTreePrinter {
private static final String FORM_HTML = "<!DOCTYPE html><title>Parse Tree Dump</title><form><p><input type='url' name='doc' id='doc' pattern='(?:https?://.+)?'> <input name='submit' value='Print Tree' type='submit' id='submit'></form><hr><form><p><select id=parser name=parser><option value=xml>XML; don\u2019t load external entities</option><option value=html5 selected>HTML5</option></select><p><textarea name=content rows=20 cols=72></textarea> <input name='submit' value='Print Tree' type='submit' id='submit'></form>";
private static final long SIZE_LIMIT = Integer.parseInt(System.getProperty(
"nu.validator.servlet.max-file-size", "2097152"));
private final HttpServletRequest request;
private final HttpServletResponse response;
/**
* @param request
* @param response
*/
public ParseTreePrinter(final HttpServletRequest request,
final HttpServletResponse response) {
this.request = request;
this.response = response;
}
private String scrubUrl(String urlStr) {
if (urlStr == null) {
return null;
}
try {
return URL.parse(urlStr).toString();
} catch (GalimatiasParseException e) {
return null;
}
}
public void service() throws IOException {
request.setCharacterEncoding("utf-8");
String content = null;
String document = scrubUrl(request.getParameter("doc"));
document = ("".equals(document)) ? null : document;
try (Writer writer = new OutputStreamWriter(response.getOutputStream(), "UTF-8")) {
if (document == null && methodIsGet() && (content = request.getParameter("content")) == null) {
response.setContentType("text/html; charset=utf-8");
writer.write(FORM_HTML);
writer.flush();
return;
}
response.setContentType("text/plain; charset=utf-8");
try {
PrudentHttpEntityResolver entityResolver = new PrudentHttpEntityResolver(
2048 * 1024, false, null);
entityResolver.setAllowGenericXml(false);
entityResolver.setAcceptAllKnownXmlTypes(false);
entityResolver.setAllowHtml(true);
entityResolver.setAllowXhtml(true);
TypedInputSource documentInput;
if (methodIsGet()) {
if (content == null) {
documentInput = (TypedInputSource) entityResolver.resolveEntity(
null, document);
} else {
documentInput = new TypedInputSource(new StringReader(content));
if ("xml".equals(request.getParameter("parser"))) {
documentInput.setType("application/xhtml+xml");
} else {
documentInput.setType("text/html");
}
}
} else { // POST
String postContentType = request.getContentType();
if (postContentType == null) {
response.sendError(HttpServletResponse.SC_BAD_REQUEST,
"Content-Type missing");
return;
} else if (postContentType.trim().toLowerCase().startsWith(
"application/x-www-form-urlencoded")) {
response.sendError(
HttpServletResponse.SC_UNSUPPORTED_MEDIA_TYPE,
"application/x-www-form-urlencoded not supported. Please use multipart/form-data.");
return;
}
long len = request.getContentLength();
if (len > SIZE_LIMIT) {
throw new StreamBoundException("Resource size exceeds limit.");
}
ContentTypeParser contentTypeParser = new ContentTypeParser(null, false);
contentTypeParser.setAllowGenericXml(false);
contentTypeParser.setAcceptAllKnownXmlTypes(false);
contentTypeParser.setAllowHtml(true);
contentTypeParser.setAllowXhtml(true);
documentInput = contentTypeParser.buildTypedInputSource(document,
null, postContentType);
documentInput.setByteStream(len < 0 ? new BoundedInputStream(
request.getInputStream(), SIZE_LIMIT, document)
: request.getInputStream());
documentInput.setSystemId(request.getHeader("Content-Location"));
}
String type = documentInput.getType();
XMLReader parser;
if ("text/html".equals(type) || "text/html-sandboxed".equals(type)) {
writer.write("HTML parser\n\n#document\n");
parser = new nu.validator.htmlparser.sax.HtmlParser();
parser.setProperty("http://validator.nu/properties/heuristics", Heuristics.ALL);
parser.setProperty("http://validator.nu/properties/xml-policy", XmlViolationPolicy.ALLOW);
} else if ("application/xhtml+xml".equals(type)) {
writer.write("XML parser\n\n#document\n");
parser = new SAXDriver();
parser.setFeature(
"http://xml.org/sax/features/external-general-entities",
false);
parser.setFeature(
"http://xml.org/sax/features/external-parameter-entities",
false);
parser.setEntityResolver(new NullEntityResolver());
} else {
writer.write("Unsupported content type.\n");
writer.flush();
return;
}
TreeDumpContentHandler treeDumpContentHandler = new TreeDumpContentHandler(writer, false);
ListErrorHandler listErrorHandler = new ListErrorHandler();
parser.setContentHandler(treeDumpContentHandler);
parser.setProperty("http://xml.org/sax/properties/lexical-handler", treeDumpContentHandler);
parser.setErrorHandler(listErrorHandler);
parser.parse(documentInput);
writer.write("#errors\n");
for (String err : listErrorHandler.getErrors()) {
writer.write(err);
writer.write('\n');
}
} catch (SAXException e) {
writer.write("SAXException:\n");
writer.write(e.getMessage());
writer.write("\n");
} catch (IOException e) {
writer.write("IOException:\n");
writer.write(e.getMessage());
writer.write("\n");
} finally {
writer.flush();
}
}
}
private boolean methodIsGet() {
return "GET".equals(request.getMethod())
|| "HEAD".equals(request.getMethod());
}
}