/** * Copyright (c) Codice Foundation * <p> * This is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser * General Public License as published by the Free Software Foundation, either version 3 of the * License, or any later version. * <p> * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. A copy of the GNU Lesser General Public License * is distributed along with this program and can be found at * <http://www.gnu.org/licenses/lgpl.html>. */ package ddf.services.schematron; import java.io.File; import java.io.StringReader; import java.net.URL; import java.nio.file.Paths; import java.util.ArrayList; import java.util.List; import java.util.Optional; import java.util.Set; import java.util.Vector; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.Future; import java.util.concurrent.FutureTask; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import javax.xml.parsers.ParserConfigurationException; import javax.xml.transform.ErrorListener; import javax.xml.transform.Source; import javax.xml.transform.Templates; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerException; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMResult; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.sax.SAXSource; import javax.xml.transform.stream.StreamSource; import org.apache.commons.lang.StringUtils; import org.codice.ddf.platform.util.XMLUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import org.xml.sax.helpers.XMLFilterImpl; import org.xml.sax.helpers.XMLReaderFactory; import com.google.common.collect.ImmutableSet; import ddf.catalog.data.Metacard; import ddf.catalog.util.Describable; import ddf.catalog.validation.MetacardValidator; import ddf.catalog.validation.ReportingMetacardValidator; import ddf.catalog.validation.ValidationException; import ddf.catalog.validation.impl.ValidationExceptionImpl; import ddf.catalog.validation.impl.report.MetacardValidationReportImpl; import ddf.catalog.validation.impl.violation.ValidationViolationImpl; import ddf.catalog.validation.report.MetacardValidationReport; import ddf.catalog.validation.violation.ValidationViolation; import net.sf.saxon.Configuration; import net.sf.saxon.TransformerFactoryImpl; /** * This pre-ingest service provides validation of an ingested XML document against a Schematron * schema file. * <p> * When this service is instantiated at deployment time to the OSGi container it goes through 3 * different preprocessing stages on the Schematron schema file. (These steps are required by the * ISO Schematron implementation) * <ol> * <li>1. Preprocess the Schematron schema with iso_dsdl_include.xsl. This is a macro processor to * assemble the schema from various parts.</li> * <li>2. Preprocess the output from stage 1 with iso_abstract_expand.xsl. This is a macro processor * to convert abstract patterns to real patterns.</li> * <li>3. Compile the Schematron schema into an XSLT script. This will use iso_svrl_for_xslt2.xsl * (which in turn invokes iso_schematron_skeleton_for_saxon.xsl)</li> * </ol> * <p> * When XML documents are ingested, this service will run the XSLT generated by stage 3 against the * XML document, validating it against the "compiled" Schematron schema file. * <p> * This service is using the SVRL script, hence the output of the validation will be an * SVRL-formatted XML document. * * @author rodgersh * @see <a href="http://www.schematron.com">Schematron</a> */ public class SchematronValidationService implements MetacardValidator, Describable, ReportingMetacardValidator { public static final String DEFAULT_THREAD_POOL_SIZE = "16"; private static final String SCHEMATRON_BASE_FOLDER = Paths.get(System.getProperty("ddf.home"), "schematron") .toString(); private static final Logger LOGGER = LoggerFactory.getLogger(SchematronValidationService.class); private TransformerFactory transformerFactory; private Vector<String> warnings; private int priority = 10; private SchematronReport schematronReport; private List<String> schematronFileNames; private boolean suppressWarnings = false; private String namespace; private String id; private ExecutorService pool = getThreadPool(); private List<Future<Templates>> validators = new ArrayList<>(); private static ExecutorService getThreadPool() throws NumberFormatException { Integer threadPoolSize = Integer.parseInt(System.getProperty( "org.codice.ddf.system.threadPoolSize", DEFAULT_THREAD_POOL_SIZE)); return Executors.newFixedThreadPool(threadPoolSize); } /** * Replace tabs, literal carriage returns, and newlines with a single whitespace * * @param input * @return */ static String sanitize(final String input) { return input.replaceAll("[\t \r\n]+", " ") .trim(); } public void init() throws SchematronInitializationException { if (transformerFactory == null) { transformerFactory = TransformerFactory.newInstance(TransformerFactoryImpl.class.getName(), SchematronValidationService.class.getClassLoader()); } // DDF-855: set ErrorListener to catch any warnings/errors during loading of the // ruleset file and log (vs. Saxon default of writing to console) the warnings/errors Configuration config = ((TransformerFactoryImpl) transformerFactory).getConfiguration(); config.setErrorListener(new SaxonErrorListener(schematronFileNames)); updateValidators(); } private void updateValidators() throws SchematronInitializationException { validators.clear(); for (String schematronFileName : schematronFileNames) { FutureTask<Templates> task = new FutureTask<Templates>(() -> { return compileSchematronRules(schematronFileName); }); validators.add(task); pool.submit(task); } } private Templates compileSchematronRules(String schematronFileName) throws SchematronInitializationException { Templates template; File schematronFile = new File(schematronFileName); if (!schematronFile.exists()) { throw new SchematronInitializationException( "Could not locate schematron file " + schematronFileName); } try { URL schUrl = schematronFile.toURI() .toURL(); Source schSource = new StreamSource(schUrl.toString()); // Stage 1: Perform inclusion expansion on Schematron schema file DOMResult stage1Result = performStage(schSource, getClass().getClassLoader() .getResource("iso-schematron/iso_dsdl_include.xsl")); DOMSource stage1Output = new DOMSource(stage1Result.getNode()); // Stage 2: Perform abstract expansion on output file from Stage 1 DOMResult stage2Result = performStage(stage1Output, getClass().getClassLoader() .getResource("iso-schematron/iso_abstract_expand.xsl")); DOMSource stage2Output = new DOMSource(stage2Result.getNode()); // Stage 3: Compile the .sch rules that have been prepocessed by Stages 1 and 2 (i.e., // the output of Stage 2) DOMResult stage3Result = performStage(stage2Output, getClass().getClassLoader() .getResource("iso-schematron/iso_svrl_for_xslt2.xsl")); DOMSource stage3Output = new DOMSource(stage3Result.getNode()); // Setting the system ID let's us resolve relative paths in the schematron files. // We need the URL string so that the string is properly formatted (e.g. space = %20). stage3Output.setSystemId(schUrl.toString()); template = transformerFactory.newTemplates(stage3Output); } catch (Exception e) { throw new SchematronInitializationException( "Error trying to create SchematronValidationService using sch file " + schematronFileName, e); } return template; } private DOMResult performStage(Source input, URL preprocessorUrl) throws TransformerException, ParserConfigurationException, SchematronInitializationException { Source preprocessorSource = new StreamSource(preprocessorUrl.toString()); // Initialize container for warnings we may receive during transformation of input warnings = new Vector<>(); Transformer transformer = transformerFactory.newTransformer(preprocessorSource); // Setup an error listener to catch warnings and errors generated during transformation transformer.setErrorListener(new Listener()); // Transform the input using the preprocessor's transformer, capturing the output in a DOM DOMResult domResult = new DOMResult(); transformer.transform(input, domResult); return domResult; } public void setSuppressWarnings(boolean suppressWarnings) { this.suppressWarnings = suppressWarnings; } public void setSchematronFileNames(List<String> schematronFileNames) throws SchematronInitializationException { this.schematronFileNames = new ArrayList<>(); for (String filename : schematronFileNames) { String fullpath = Paths.get(filename) .toString(); if (!Paths.get(filename) .isAbsolute()) { fullpath = Paths.get(SCHEMATRON_BASE_FOLDER, fullpath) .toString(); } this.schematronFileNames.add(fullpath); } if (transformerFactory != null) { updateValidators(); } } public void setNamespace(String namespace) { this.namespace = namespace; } public void setPriority(int priority) { this.priority = priority; // 1 is the highest priority, 100 the lowest if (this.priority > 100) { this.priority = 100; } else if (this.priority < 1) { this.priority = 1; } } @Override public void validate(Metacard metacard) throws ValidationException { MetacardValidationReport report = generateReport(metacard); List<String> errors = new ArrayList<>(); List<String> warnings = new ArrayList<>(); report.getMetacardValidationViolations() .forEach(violation -> { if (violation.getSeverity() == ValidationViolation.Severity.ERROR) { errors.add(violation.getMessage()); } else { warnings.add(violation.getMessage()); } }); SchematronValidationException exception = new SchematronValidationException( "Schematron validation failed", errors, warnings); if (!errors.isEmpty()) { throw exception; } if (!suppressWarnings && !warnings.isEmpty()) { throw exception; } } private MetacardValidationReport generateReport(Metacard metacard) throws ValidationExceptionImpl { MetacardValidationReportImpl report = new MetacardValidationReportImpl(); Set<String> attributes = ImmutableSet.of("metadata"); String metadata = metacard.getMetadata(); boolean canBeValidated = !(StringUtils.isEmpty(metadata) || (namespace != null && !namespace.equals(XMLUtils.getRootNamespace(metadata)))); if (canBeValidated) { try { for (Future<Templates> validator : validators) { schematronReport = generateReport(metadata, validator.get(10, TimeUnit.MINUTES)); schematronReport.getErrors() .forEach(errorMsg -> report.addMetacardViolation(new ValidationViolationImpl( attributes, sanitize(errorMsg), ValidationViolation.Severity.ERROR))); schematronReport.getWarnings() .forEach(warningMsg -> report.addMetacardViolation(new ValidationViolationImpl( attributes, sanitize(warningMsg), ValidationViolation.Severity.WARNING))); } } catch (TimeoutException | ExecutionException | InterruptedException e) { throw new ValidationExceptionImpl(e); } } return report; } private SchematronReport generateReport(String metadata, Templates validator) throws SchematronValidationException { XMLReader xmlReader = null; try { XMLReader xmlParser = XMLReaderFactory.createXMLReader(); xmlParser.setFeature("http://xml.org/sax/features/external-general-entities", false); xmlParser.setFeature("http://xml.org/sax/features/external-parameter-entities", false); xmlParser.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); xmlReader = new XMLFilterImpl(xmlParser); } catch (SAXException e) { throw new SchematronValidationException(e); } SchematronReport report; try { Transformer transformer = validator.newTransformer(); DOMResult schematronResult = new DOMResult(); transformer.transform(new SAXSource(xmlReader, new InputSource(new StringReader(metadata))), schematronResult); report = new SvrlReport(schematronResult); } catch (TransformerException e) { throw new SchematronValidationException( "Could not setup validator to perform validation.", e); } return report; } @Override public String getVersion() { return null; } @Override public String getId() { return id; } public void setId(String id) { this.id = id; } @Override public String getTitle() { return null; } @Override public String getDescription() { return null; } @Override public String getOrganization() { return null; } @Override public Optional<MetacardValidationReport> validateMetacard(Metacard metacard) { try { return Optional.of(generateReport(metacard)); } catch (ValidationExceptionImpl e) { LOGGER.warn("Exception validating metacard ID {}", metacard.getId(), e); return Optional.empty(); } } /** * The Listener class which catches Saxon configuration errors. * <p> * DDF-855: These warnings and errors are logged so that they are * not displayed on the console. */ private static class SaxonErrorListener implements ErrorListener { private List<String> schematronFileNames; public SaxonErrorListener(List<String> schematronFileNames) { this.schematronFileNames = schematronFileNames; } @Override public void warning(TransformerException e) throws TransformerException { LOGGER.debug("Transformer warning: '{}' on file: {}", e.getMessage(), this.schematronFileNames); LOGGER.debug("Saxon exception", e); } @Override public void error(TransformerException e) throws TransformerException { LOGGER.debug("Transformer warning: '{}' on file: {}", e.getMessage(), this.schematronFileNames); LOGGER.debug("Saxon exception", e); } @Override public void fatalError(TransformerException e) throws TransformerException { LOGGER.info("Transformer error: (Schematron file = {}):", this.schematronFileNames, e); } } /** * The Listener class which catches xsl:messages during the transformation/stages of the * Schematron schema. */ private class Listener implements ErrorListener { public void warning(TransformerException e) throws TransformerException { warnings.add(e.getMessage()); } public void error(TransformerException e) throws TransformerException { throw e; } public void fatalError(TransformerException e) throws TransformerException { throw e; } } }