/** * ************************************************************************** * * Contributor(s): * C. Heazel (WiSC): Added Fortify adjudication changes * *************************************************************************** */ package com.occamlab.te.parsers; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStream; import java.io.PipedInputStream; import java.io.PipedOutputStream; import java.io.PrintWriter; import java.net.URL; import java.net.URLConnection; import java.net.URLDecoder; import java.util.logging.Level; import java.util.logging.Logger; import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.transform.Transformer; import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; import javax.xml.XMLConstants; // Addition for Fortify modifications import org.w3c.dom.Document; import org.w3c.dom.Element; import org.w3c.dom.NodeList; import org.xml.sax.ErrorHandler; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import com.occamlab.te.ErrorHandlerImpl; import com.thaiopensource.util.PropertyMap; import com.thaiopensource.util.PropertyMapBuilder; import com.thaiopensource.validate.SchemaReader; import com.thaiopensource.validate.SchemaReaderLoader; import com.thaiopensource.validate.ValidateProperty; import com.thaiopensource.validate.ValidationDriver; import com.thaiopensource.validate.prop.schematron.SchematronProperty; /** * Validates the given XML resource against the rules specified in a Schematron * (v1.5) file. Used in conjunction with standard XML Schema validator to * provide more thorough validation coverage. * * Diagnostic messages will be included if any are defined. * */ public class SchematronValidatingParser { private static final Logger LOGR = Logger .getLogger(SchematronValidatingParser.class.getName()); private PropertyMapBuilder configPropBuilder = null; private String schemaLocation = null; private File schemaFile = null; private String phase = null; private String type = null; private PrintWriter outputLogger = null; /** Namespace URI for the Schematron assertion language (v 1.5). */ public static final String SCHEMATRON_NS_URI = "http://www.ascc.net/xml/schematron"; /** Default constructor required for init */ public SchematronValidatingParser() { } /** Overloaded constructor required for init */ public SchematronValidatingParser(Document schema_link) throws Exception { getFileType(schema_link.getDocumentElement()); } /** * Parses the parser element to get the schematron file location and type of * resource (from ctl file). * * @param schema_links * Gets the location of the schema (and type of resource) and * saves to global parameter * @return The type of resource (URL, File, Resource) */ public String getFileType(Element schema_links) throws Exception { Document d = schema_links.getOwnerDocument(); NodeList nodes = d.getElementsByTagNameNS( "http://www.occamlab.com/te/parsers", "schema"); String localType = null; for (int i = 0; i < nodes.getLength(); i++) { Element e = (Element) nodes.item(i); localType = e.getAttribute("type"); this.type = e.getAttribute("type"); this.phase = e.getAttribute("phase"); this.schemaLocation = e.getTextContent().trim(); } return localType; } /** * Converts an org.w3c.dom.Document element to an java.io.InputStream. * * @param edoc * The org.w3c.dom.Document to be converted * @return The InputStream value of the passed doument */ public InputStream DocumentToInputStream(org.w3c.dom.Document edoc) throws IOException { final org.w3c.dom.Document doc = edoc; final PipedOutputStream pos = new PipedOutputStream(); PipedInputStream pis = new PipedInputStream(); pis.connect(pos); (new Thread(new Runnable() { public void run() { // Use the Transformer.transform() method to save the Document // to a StreamResult try { TransformerFactory tFactory = TransformerFactory.newInstance(); // Fortify Mod: prevent external entity injection tFactory.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true); Transformer transformer = tFactory.newTransformer(); transformer.setOutputProperty("encoding", "ISO-8859-1"); transformer.setOutputProperty("indent", "yes"); transformer.transform(new DOMSource(doc), new StreamResult( pos)); } catch (Exception _ex) { throw new RuntimeException( "Failed to tranform org.w3c.dom.Document to PipedOutputStream", _ex); } finally { try { pos.close(); } catch (IOException e) { } } } }, "MyClassName.convert(org.w3c.dom.Document edoc)")).start(); return pis; } /** * Checks the given schematron phase for the XML file and returns the * validation status. * * @param doc * The XML file to validate (Document) * @param schemaFile * The string path to the schematron file to use * @param phase * The string phase name (contained in schematron file) * @return Whether there were validation errors or not (boolean) */ public boolean checkSchematronRules(Document doc, String schemaFile, String phase) throws Exception { boolean isValid = false; if (doc == null || doc.getDocumentElement() == null) return isValid; try { ClassLoader loader = this.getClass().getClassLoader(); URL url = loader.getResource(schemaFile); this.schemaFile = new File( URLDecoder.decode(url.getFile(), "UTF-8")); } catch (Exception e) { assert false : "Entity body not found. " + e.toString(); } this.phase = phase; Document returnDoc = parse(doc, null, null); if (returnDoc != null) { isValid = true; } return isValid; } /** * Checks the given schematron phase for the XML file and returns the * validation status (takes schematron file, not string location). New and * ADVANCED! (team engine can't work with overloaded methods :P) * * @param doc * The XML file to validate (Document) * @param schemaFile * The file object of the schematron file to validate with * @param phase * The string phase name (contained in schematron file) * @return Whether there were validation errors or not (boolean) */ public boolean checkSchematronRulesAdv(InputSource inputDoc, File schemaFile, String phase) throws Exception { boolean isValid = false; if (inputDoc == null) return isValid; this.schemaFile = schemaFile; this.phase = phase; Document returnDoc = parse(inputDoc, null, null); if (returnDoc != null) { isValid = true; } return isValid; } /** * Runs the schematron file against the input source. */ public boolean executeSchematronDriver(InputSource inputDoc, File schemaFile, String phase) { boolean isValid = false; ValidationDriver driver = createSchematronDriver(phase); assert null != driver : "Unable to create Schematron ValidationDriver"; InputSource is = null; try { FileInputStream fis = new FileInputStream(schemaFile); is = new InputSource(fis); } catch (Exception e) { e.printStackTrace(); } try { if (driver.loadSchema(is)) { isValid = driver.validate(inputDoc); } else { assert false : ("Failed to load Schematron schema: " + schemaFile + "\nIs the schema valid? Is the phase defined?"); } } catch (SAXException e) { assert false : e.toString(); } catch (IOException e) { assert false : e.toString(); } return isValid; } /** * Sets up the schematron reader with all the necessary parameters. Calls * initSchematronReader() to do further setup of the validation driver. * * @param phase * The string phase name (contained in schematron file) * @return The ValidationDriver to use in validating the XML document */ ValidationDriver createSchematronDriver(String phase) { SchemaReaderLoader loader = new SchemaReaderLoader(); SchemaReader schReader = loader.createSchemaReader(SCHEMATRON_NS_URI); this.configPropBuilder = new PropertyMapBuilder(); SchematronProperty.DIAGNOSE.add(this.configPropBuilder); if (this.outputLogger == null) { this.outputLogger = new PrintWriter(System.out); } if (null != phase && !phase.isEmpty()) { this.configPropBuilder.put(SchematronProperty.PHASE, phase); } ErrorHandler eh = new ErrorHandlerImpl("Schematron", outputLogger); this.configPropBuilder.put(ValidateProperty.ERROR_HANDLER, eh); ValidationDriver validator = new ValidationDriver( this.configPropBuilder.toPropertyMap(), schReader); return validator; } /** * Parses and validates a resource obtained by dereferencing a URI. * * @param uc * A URLConnection to access an XML resource. * @param instruction * An element containing parser instructions. * @param logger * The PrintWriter used for logging errors. * @return A Document node if parsing succeeds, or {@code null} if it fails. * @throws Exception */ public Document parse(URLConnection uc, Element instruction, PrintWriter logger) throws Exception { return parse(uc.getInputStream(), instruction, logger); } Document parse(InputStream is, Element instruction, PrintWriter logger) throws Exception { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); // Fortify Mod: Disable entity expansion to foil External Entity Injections dbf.setExpandEntityReferences(false); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = null; try { doc = db.parse(is); } catch (Exception e) { logger.println(e.getMessage()); } return parse(doc, instruction, logger); } Document parse(InputSource is, Element instruction, PrintWriter logger) throws Exception { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); // Fortify Mod: Disable entity expansion to foil External Entity Injections dbf.setExpandEntityReferences(false); DocumentBuilder db = dbf.newDocumentBuilder(); Document doc = null; try { doc = db.parse(is); } catch (Exception e) { logger.println(e.getMessage()); } return parse(doc, instruction, logger); } /** * Checks the given Document against a Schematron schema. A schema reference * is conveyed by a DOM Element node as indicated below. * * <pre> * {@code * <tep:schemas xmlns:tep="http://www.occamlab.com/te/parsers"> * <tep:schema type="resource" phase="#ALL">/class/path/schema1.sch</tep:schema> * </tep:schemas> * } * </pre> * * @param doc * The document to be validated. * @param instruction * An Element containing schema information. * @param logger * A Writer used for logging error messages. * @return The valid document, or {@code null} if any errors were detected. * @throws Exception */ Document parse(Document doc, Element instruction, PrintWriter logger) throws Exception { this.outputLogger = logger; if (instruction != null) { getFileType(instruction); if (type.equals("url")) { URL schemaURL = new URL(this.schemaLocation); this.schemaFile = new File(schemaURL.toURI()); } else if (type.equals("file")) { this.schemaFile = new File(this.schemaLocation); } else if (type.equals("resource")) { URL url = this.getClass().getResource(this.schemaLocation); this.schemaFile = new File(URLDecoder.decode(url.getFile(), "UTF-8")); } } boolean isValid = false; if (doc != null) { InputSource xmlInputSource = null; try { InputStream inputStream = DocumentToInputStream(doc); xmlInputSource = new InputSource(inputStream); } catch (IOException e) { e.printStackTrace(); } isValid = executeSchematronDriver(xmlInputSource, this.schemaFile, this.phase); } if (!isValid) { return null; } else { return doc; } } /** * Checks the content of an XML entity against the applicable rules defined * in a Schematron schema. The designated phase identifies the active * patterns (rule sets); if not specified, the default phase is executed. * * @param xmlEntity * A DOM Document representing the XML entity to validate. * @param schemaRef * A (classpath) reference to a Schematron 1.5 schema. * @param phase * The phase to execute. * @return A NodeList containing validation errors (it may be empty). */ public NodeList validate(Document xmlEntity, String schemaRef, String phase) { if (xmlEntity == null || xmlEntity.getDocumentElement() == null) throw new IllegalArgumentException("No XML entity supplied (null)."); InputSource xmlInputSource = null; try { InputStream inputStream = DocumentToInputStream(xmlEntity); xmlInputSource = new InputSource(inputStream); } catch (IOException e) { throw new RuntimeException(e); } PropertyMapBuilder builder = new PropertyMapBuilder(); SchematronProperty.DIAGNOSE.add(builder); if (null != phase && !phase.isEmpty()) { builder.put(SchematronProperty.PHASE, phase); } XmlErrorHandler errHandler = new XmlErrorHandler(); builder.put(ValidateProperty.ERROR_HANDLER, errHandler); ValidationDriver driver = createDriver(builder.toPropertyMap()); InputStream schStream = this.getClass().getResourceAsStream(schemaRef); try { InputSource input = new InputSource(schStream); try { boolean loaded = driver.loadSchema(input); if (!loaded) { throw new Exception("Failed to load schema at " + schemaRef + "\nIs the schema valid? Is the phase defined?"); } } finally { schStream.close(); } driver.validate(xmlInputSource); } catch (Exception e) { throw new RuntimeException("Schematron validation failed.", e); } NodeList errList = errHandler.toNodeList(); if (LOGR.isLoggable(Level.FINER)) { LOGR.finer(String.format( "Found %d Schematron rule violation(s):\n %s", errList.getLength(), errHandler.toString())); } return errList; } /** * Creates and initializes a ValidationDriver to perform Schematron * validation. A schema must be loaded before an instance can be validated. * * @param configProps * A PropertyMap containing properties to configure schema * construction and validation behavior; it typically includes * {@code SchematronProperty} and {@code ValidationProperty} * items. * @return A ValidationDriver that is ready to load a Schematron schema. */ ValidationDriver createDriver(PropertyMap configProps) { SchemaReaderLoader loader = new SchemaReaderLoader(); SchemaReader schReader = loader.createSchemaReader(SCHEMATRON_NS_URI); ValidationDriver validator = new ValidationDriver(configProps, schReader); return validator; } }