/* * GeoTools - The Open Source Java GIS Toolkit * http://geotools.org * * (C) 2010-2011, Open Source Geospatial Foundation (OSGeo) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; * version 2.1 of the License. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. */ package org.geotools.xml; import java.io.ByteArrayInputStream; import java.io.IOException; import java.io.InputStream; import java.io.UnsupportedEncodingException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import javax.xml.parsers.SAXParser; import javax.xml.parsers.SAXParserFactory; import org.geotools.xml.resolver.SchemaCatalog; import org.geotools.xml.resolver.SchemaResolver; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.SAXParseException; import org.xml.sax.XMLReader; import org.xml.sax.ext.EntityResolver2; /** * A class to perform XML schema validation against schemas found using an {@link SchemaResolver} * . * * @author Ben Caradoc-Davies (CSIRO Earth Science and Resource Engineering) * * * * @source $URL$ */ public class AppSchemaValidator { /** * Pattern matching a string that starts with an XML declaration with an encoding, with a single * group that contains the encoding. */ private static final Pattern XML_ENCODING_PATTERN = Pattern .compile("<\\?xml.*?encoding=[\"'](.+?)[\"'].*?\\?>.*"); /** * The resolver used to find XML schemas. */ private final SchemaResolver resolver; /** * Failures found during parsing of an XML instance document. */ private final List<String> failures = new ArrayList<String>(); /** * Are validation warnings considered failures? The default is true. */ private boolean failOnWarning = true; /** * Construct an {@link AppSchemaValidator} that performs schema validation against schemas found * on the classpath using the convention described in * {@link SchemaResolver#getSimpleHttpResourcePath(java.net.URI)}. */ private AppSchemaValidator() { this(new SchemaResolver()); } /** * Construct an {@link AppSchemaValidator} that performs schema validation against schemas found * using an {@link SchemaResolver}. * * @param resolver * resolver used to locate XML schemas */ private AppSchemaValidator(SchemaResolver resolver) { this.resolver = resolver; } /** * Construct an {@link AppSchemaValidator} that performs schema validation against schemas found * using an {@link SchemaResolver} with a {@link SchemaCatalog}. * * @param catalog * SchemaCatalog */ private AppSchemaValidator(SchemaCatalog catalog) { this(new SchemaResolver(catalog)); } /** * Return the list of failures found during parsing. */ public List<String> getFailures() { return Collections.unmodifiableList(failures); } /** * Are validation warnings considered failures? */ public boolean isFailOnWarning() { return failOnWarning; } /** * Should validation warnings be considered failures? */ public void setFailOnWarning(boolean failOnWarning) { this.failOnWarning = failOnWarning; } /** * Parse an XML instance document read from an {@link InputStream}, recording any validation * failures failures. * * @param input * stream from which XML instance document is read */ public void parse(InputStream input) { SAXParserFactory parserFactory = SAXParserFactory.newInstance(); parserFactory.setNamespaceAware(true); parserFactory.setValidating(true); XMLReader xmlReader; try { SAXParser parser = parserFactory.newSAXParser(); // Validation is against XML Schema parser.setProperty("http://java.sun.com/xml/jaxp/properties/schemaLanguage", "http://www.w3.org/2001/XMLSchema"); xmlReader = parser.getXMLReader(); } catch (Exception e) { throw new RuntimeException(e); } xmlReader.setEntityResolver(new AppSchemaEntityResolver()); // We principally care about the failures themselves, but it is also possible to install a // ContentHandler to output annotated XML that identifies the precise location of failures. // That can be done with a serializer that implements both ContentHandler and ErrorHandler. // It should be installed here (and used for the error handler): // parser.setContentHandler(contentHandler); xmlReader.setErrorHandler(new AppSchemaValidatorErrorHandler()); try { xmlReader.parse(new InputSource(input)); } catch (RuntimeException e) { // Avoid gratuitous exception chaining. // Resolver failures pass through this block. throw e; } catch (Exception e) { throw new RuntimeException(e); } } /** * Throw a {@link RuntimeException} if the validator has found any failures. The exception * detail contains the failure messages. */ public void checkForFailures() { if (failures.size() > 0) { throw new RuntimeException(buildFailureMessage()); } } /** * Build an exception detail message that contains all the validation failure messages. */ private String buildFailureMessage() { String newline = System.getProperty("line.separator"); StringBuilder builder = new StringBuilder(); builder.append("Schema validation failures: " + failures.size()); for (String failure : failures) { builder.append(newline); builder.append(failure); } return builder.toString(); } /** * Construct an {@link AppSchemaValidator} that performs schema validation against schemas found * on the classpath using the convention described in * {@link SchemaResolver#getSimpleHttpResourcePath(java.net.URI)}. */ public static AppSchemaValidator buildValidator() { return new AppSchemaValidator(); } /** * Construct an {@link AppSchemaValidator} that performs schema validation against schemas found * using an {@link SchemaResolver}. * * @param resolver * the resolver used to find schemas */ public static AppSchemaValidator buildValidator(SchemaResolver resolver) { return new AppSchemaValidator(resolver); } /** * Construct an {@link AppSchemaValidator} that performs schema validation against schemas found * using an {@link SchemaResolver} with a {@link SchemaCatalog}. * * @param catalog * SchemaCatalog */ public static AppSchemaValidator buildValidator(SchemaCatalog catalog) { return new AppSchemaValidator(catalog); } /** * * Perform schema validation of an XML instance document read from a classpath resource against * schemas found on the classpath using the convention described in * {@link SchemaResolver#getSimpleHttpResourcePath(java.net.URI)}. * * <p> * * If validation fails, a {@link RuntimeException} is thrown containing details of all failures. * * @param name * resource name of XML instance document * @param catalog * SchemaCatalog to aide local schema resolution or null */ public static void validateResource(String name, SchemaCatalog catalog) { InputStream input = null; try { input = AppSchemaValidator.class.getResourceAsStream(name); validate(input, catalog); } finally { if (input != null) { try { input.close(); } catch (IOException e) { // we tried } } } } /** * * Perform schema validation of an XML instance document in a string against schemas found on * the classpath using the convention described in * {@link SchemaResolver#getSimpleHttpResourcePath(java.net.URI)}. * * <p> * * If validation fails, a {@link RuntimeException} is thrown containing details of all failures. * * @param xml * string containing XML instance document * @param catalog * SchemaCatalog to aide local schema resolution or null */ public static void validate(String xml, SchemaCatalog catalog) { byte[] bytes = null; String encoding = getEncoding(xml); if (encoding != null) { try { bytes = xml.getBytes(encoding); } catch (UnsupportedEncodingException e) { // ignore, handled below } } if (bytes == null) { // no encoding in declaration or unsupported encoding // fall back to platform default bytes = xml.getBytes(); } InputStream input = null; try { input = new ByteArrayInputStream(bytes); validate(input, catalog); } finally { if (input != null) { try { input.close(); } catch (IOException e) { // we tried } } } } /** * Return the encoding from the XML declaration in an XML document, if present, or null if not * found. * * @param xml * string containing an XML document * @return declared encoding or null if not present */ static String getEncoding(String xml) { Matcher m = XML_ENCODING_PATTERN.matcher(xml); if (m.matches()) { return m.group(1); } else { return null; } } /** * * Perform schema validation of an XML instance document read from an input stream against * schemas found on the classpath using the convention described in * {@link SchemaResolver#getSimpleHttpResourcePath(java.net.URI)}. * * <p> * * If validation fails, a {@link RuntimeException} is thrown containing details of all failures. * * @param input * stream providing XML instance document * @param catalog * SchemaCatalog file to aide local schema resolution or null */ public static void validate(InputStream input, SchemaCatalog catalog) { AppSchemaValidator validator = buildValidator(catalog); validator.parse(input); validator.checkForFailures(); } /** * An {@link EntityResolver2} that uses the enclosing instance's {@link SchemaResolver} to look up XML entities (that is, XML schemas). */ private class AppSchemaEntityResolver implements EntityResolver2 { /** * Always throws {@link UnsupportedOperationException}. The {@link EntityResolver2} interface must be used so that relative URLs are resolved * correctly. If this method is called, it means that the parser is probably misconfigured. * * @see org.xml.sax.EntityResolver#resolveEntity(java.lang.String, java.lang.String) */ @Override public InputSource resolveEntity(String publicId, String systemId) throws SAXException, IOException { throw new UnsupportedOperationException( "Misconfigured parser: EntityResolver2 interface must be used " + "so that relative URLs are resolved correctly"); }; /** * Always returns null to indicate that there is no external subset. * * @see org.xml.sax.ext.EntityResolver2#getExternalSubset(java.lang.String, java.lang.String) */ @Override public InputSource getExternalSubset(String name, String baseURI) { return null; } /** * Return an {@link InputSource} for the resolved schema location. Note that the {@link EntityResolver2} interface must be used because * baseURI is needed to resolve relative URIs. The resolver uses baseURI to find the original unresolved context (which it has stored); this * is then used to construct the unresolved URI of the schema. In the case of downloaded schemas, the original URI is used to download the * schema into the cache; the resolved URI is the location of the cached schema. * * @see org.xml.sax.ext.EntityResolver2#resolveEntity(java.lang.String, java.lang.String, java.lang.String, java.lang.String) */ @Override public InputSource resolveEntity(String name, String publicId, String baseURI, String systemId) throws SAXException, IOException { return new InputSource(resolver.resolve(systemId, baseURI)); } } /** * An {@link ErrorHandler} that appends validation failure messages to the failure list in the * enclosing instance. */ private class AppSchemaValidatorErrorHandler implements ErrorHandler { /** * @see org.xml.sax.ErrorHandler#error(org.xml.sax.SAXParseException) */ public void error(SAXParseException exception) throws SAXException { failures.add("ERROR: " + exception.getMessage()); } /** * @see org.xml.sax.ErrorHandler#fatalError(org.xml.sax.SAXParseException) */ public void fatalError(SAXParseException exception) throws SAXException { failures.add("FATAL ERROR: " + exception.getMessage()); } /** * @see org.xml.sax.ErrorHandler#warning(org.xml.sax.SAXParseException) */ public void warning(SAXParseException exception) throws SAXException { if (failOnWarning) { failures.add("WARNING: " + exception.getMessage()); } } } }