/* * Copyright (c) 2017 Mozilla Foundation * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ package nu.validator.client; import java.io.BufferedOutputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.InputStream; import java.nio.charset.StandardCharsets; import java.nio.file.Files; import java.nio.file.Path; import java.util.concurrent.atomic.AtomicBoolean; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import nu.validator.htmlparser.sax.XmlSerializer; import nu.validator.json.Serializer; import nu.validator.messages.GnuMessageEmitter; import nu.validator.messages.JsonMessageEmitter; import nu.validator.messages.MessageEmitter; import nu.validator.messages.MessageEmitterAdapter; import nu.validator.messages.TextMessageEmitter; import nu.validator.messages.XmlMessageEmitter; import nu.validator.servlet.imagereview.ImageCollector; import nu.validator.validation.SimpleDocumentValidator; import nu.validator.xml.SystemErrErrorHandler; /** * Nu validator client for HTML validation from within another * application. */ public class EmbeddedValidator { public static final String SCHEMA_URL = "http://s.validator.nu/html5-rdfalite.rnc"; public static enum OutputFormat { TEXT, XML, JSON, GNU } private boolean asciiQuotes = false; private boolean detectLanguages = false; private boolean forceHTML = false; private int lineOffset = 0; private boolean loadEntities = false; private boolean noStream = false; private OutputFormat outputFormat = OutputFormat.JSON; private String schemaUrl = SCHEMA_URL; /** * Validate the file at the given path * * @param path * a valid {@link Path} to a readable file * @return validation output {@link String} * @throws IllegalStateException * @throws IOException * @throws SAXException */ public String validate(Path path) throws IOException, SAXException { try (OneOffValidator validator = new OneOffValidator(asciiQuotes, detectLanguages, forceHTML, lineOffset, loadEntities, noStream, outputFormat, schemaUrl)) { return validator.validate(path); } } /** * Validate the input source * * @param in * a valid {@link InputStream} to a readable file * @return validation output {@link String} * @throws IllegalStateException * @throws IOException * @throws SAXException */ public String validate(InputStream in) throws IOException, SAXException { try (OneOffValidator validator = new OneOffValidator(asciiQuotes, detectLanguages, forceHTML, lineOffset, loadEntities, noStream, outputFormat, schemaUrl)) { return validator.validate(in); } } public OutputFormat getOutputFormat() { return outputFormat; } /** * Set the output format for the validation * * @param outputFormat * {@link OutputFormat}, not {@code null} * @throws IllegalArgumentException * if argument is {@code null} */ public void setOutputFormat(OutputFormat outputFormat) { if (outputFormat == null) { throw new IllegalArgumentException("outputFormat can not be null"); } this.outputFormat = outputFormat; } public boolean isLoadEntities() { return loadEntities; } /** * @param loadEntities * {@code true} to have XML parser load remote DTDs, etc */ public void setLoadEntities(boolean loadEntities) { this.loadEntities = loadEntities; } public boolean isNoLangDetect() { return detectLanguages; } /** * @param detectLanguages * {@code true} to enable language detection, {@code false} to * disable language detection */ public void setNoLangDetect(boolean noLangDetect) { this.detectLanguages = noLangDetect; } public boolean isNoStream() { return noStream; } /** * @param noStream * if {@code true}, HTML parser will buffer instead of streaming */ public void setNoStream(boolean noStream) { this.noStream = noStream; } public boolean isForceHTML() { return forceHTML; } /** * @param forceHTML * if {@code true}, input will be validated as HTML regardless of * its actual document type */ public void setForceHTML(boolean forceHTML) { this.forceHTML = forceHTML; } public boolean isAsciiQuotes() { return asciiQuotes; } /** * @param asciiQuotes * {@code true} if curly quotes ({@code '\u2018'} & * {@code '\u2019'}) in emitted warnings and errors should be * replaced with ascii quotes */ public void setAsciiQuotes(boolean asciiQuotes) { this.asciiQuotes = asciiQuotes; } public int getLineOffset() { return lineOffset; } /** * @param lineOffset * offset to add or subtract from the line number in emitted * warnings and errors */ public void setLineOffset(int lineOffset) { this.lineOffset = lineOffset; } public String getSchemaUrl() { return schemaUrl; } /** * Default value is {@value #SCHEMA_URL} * * @param schemaUrl * url to the required schema */ public void setSchemaUrl(String schemaUrl) { if (schemaUrl != null && !schemaUrl.startsWith("http:")) { throw new IllegalArgumentException("schemaUrl should be a URL"); } this.schemaUrl = schemaUrl; } /** * Self-contained, single use class for encapsulated building of an embedded * validator. */ private class OneOffValidator implements AutoCloseable { private static final String MSG_SUCCESS = "Document checking completed. No errors found."; private static final String MSG_FAIL = "Document checking completed."; private static final String EXTENSION_ERROR = "File was not checked. Files must have .html, .xhtml, .htm, or .xht extensions."; private final AtomicBoolean used = new AtomicBoolean(false); private final ByteArrayOutputStream out = new ByteArrayOutputStream(); private final BufferedOutputStream bufOut = new BufferedOutputStream(out); private final SimpleDocumentValidator validator; private final MessageEmitterAdapter errorHandler; private final boolean forceHtml; private OneOffValidator(boolean asciiQuotes, boolean detectLanguages, boolean forceHtml, int lineOffset, boolean loadEntities, boolean noStream, OutputFormat outputFormat, String schemaUrl) throws SAXException { this.validator = new SimpleDocumentValidator(true, false, !detectLanguages); this.errorHandler = newErrorHandler(lineOffset, asciiQuotes, outputFormat); this.forceHtml = forceHtml; try { this.validator.setUpMainSchema(schemaUrl == null ? SCHEMA_URL : schemaUrl, new SystemErrErrorHandler()); } catch (Exception e) { throw new IllegalStateException(e); } this.validator.setUpValidatorAndParsers(errorHandler, noStream, loadEntities); } public MessageEmitterAdapter newErrorHandler(int lineOffset, boolean asciiQuotes, OutputFormat outputFormat) throws SAXException { boolean showSource = true; boolean batchMode = true; MessageEmitterAdapter adapter = new MessageEmitterAdapter(null, this.validator.getSourceCode(), showSource, new ImageCollector(this.validator.getSourceCode()), lineOffset, batchMode, newEmitter(asciiQuotes, outputFormat)); adapter.setErrorsOnly(false); adapter.setHtml(true); adapter.start(null); return adapter; } private MessageEmitter newEmitter(boolean asciiQuotes, OutputFormat outputFormat) { switch (outputFormat) { case TEXT: return new TextMessageEmitter(this.out, asciiQuotes); case GNU: return new GnuMessageEmitter(this.out, asciiQuotes); case JSON: return new JsonMessageEmitter(new Serializer(this.out), null); case XML: return new XmlMessageEmitter(new XmlSerializer(this.out)); default: throw new UnsupportedOperationException("OutputFormat " + outputFormat + " not supported"); } } private String validate(Path path) throws IOException, SAXException { if (!used.compareAndSet(false, true)) { throw new IllegalStateException("OneOffValidator instances are not reusable"); } try { if (Files.notExists(path) || !Files.isReadable(path)) { errorHandler.warning(new SAXParseException( "File not found.", null, path.toString(), -1, -1)); } else if (isXhtml(path.toFile())) { if (forceHtml) { validator.checkHtmlFile(path.toFile(), true); } else { validator.checkXmlFile(path.toFile()); } } else if (isHtml(path.toFile())) { validator.checkHtmlFile(path.toFile(), true); } else { errorHandler.warning(new SAXParseException(EXTENSION_ERROR, null, path.toString(), -1, -1)); } } catch (SAXException e) { errorHandler.warning(new SAXParseException(e.getMessage(), null, path.toString(), -1, -1)); } errorHandler.end(MSG_SUCCESS, MSG_FAIL, ""); return new String(out.toByteArray(), StandardCharsets.UTF_8); } private boolean isXhtml(File file) { String name = file.getName(); return name.endsWith(".xhtml") || name.endsWith(".xht"); } private boolean isHtml(File file) { String name = file.getName(); return name.endsWith(".html") || name.endsWith(".htm"); } private String validate(InputStream in) throws IOException, SAXException { if (!used.compareAndSet(false, true)) { throw new IllegalStateException("OneOffValidator instances are not reusable"); } validator.checkHtmlInputSource(new InputSource(in)); errorHandler.end(MSG_SUCCESS, MSG_FAIL, ""); return new String(out.toByteArray(), StandardCharsets.UTF_8); } @Override public void close() { try { bufOut.close(); } catch (IOException e) { } try { out.close(); } catch (IOException e) { } } } }