/**
* **************************************************************************
*
* Contributor(s):
* C. Heazel (WiSC): Added Fortify adjudication changes
*
***************************************************************************
*/
package com.occamlab.te.parsers;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.CharArrayReader;
import java.io.CharArrayWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.PrintWriter;
import java.net.URI;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.xml.XMLConstants;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Result;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;
import org.w3c.dom.Document;
import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.ErrorHandler;
import org.xml.sax.SAXException;
import com.occamlab.te.ErrorHandlerImpl;
import com.occamlab.te.util.DomUtils;
import com.occamlab.te.util.URLConnectionUtils;
/**
* Validates an XML resource against a set of W3C XML Schema or DTD schemas.
*
*/
public class XMLValidatingParser {
static SchemaFactory SF = null;
static TransformerFactory TF = null;
static DocumentBuilderFactory nonValidatingDBF = null;
static DocumentBuilderFactory schemaValidatingDBF = null;
static DocumentBuilderFactory dtdValidatingDBF = null;
ArrayList<Object> schemaList = new ArrayList<Object>();
ArrayList<Object> dtdList = new ArrayList<Object>();
private static Logger jlogger = Logger
.getLogger("com.occamlab.te.parsers.XMLValidatingParser");
private void loadSchemaList(Document schemaLinks,
ArrayList<Object> schemas, String schemaType) throws Exception {
NodeList nodes = schemaLinks.getElementsByTagNameNS(
"http://www.occamlab.com/te/parsers", schemaType);
if (nodes.getLength() == 0) {
return;
}
for (int i = 0; i < nodes.getLength(); i++) {
Element e = (Element) nodes.item(i);
Object schema = null;
String type = e.getAttribute("type");
// URL, File, or Resource
if (type.equals("url")) {
schema = new URL(e.getTextContent());
} else if (type.equals("file")) {
schema = new File(e.getTextContent());
} else if (type.equals("resource")) {
ClassLoader cl = getClass().getClassLoader();
String resource = e.getTextContent();
URL url = cl.getResource(resource);
if (url == null) {
String msg = "Can't find schema resource on classpath at "
+ resource;
jlogger.warning(msg);
throw new Exception(msg);
}
schema = new File(url.getFile());
} else {
throw new Exception("Unknown schema resource type " + type);
}
jlogger.finer("Adding schema reference " + schema.toString());
schemas.add(schema);
}
}
private void loadSchemaLists(Node schemaLinks, ArrayList<Object> schemas,
ArrayList<Object> dtds) throws Exception {
if (null == schemaLinks) {
return;
}
jlogger.finer("Received schemaLinks\n"
+ DomUtils.serializeNode(schemaLinks));
Document configDoc;
if (schemaLinks instanceof Document) {
configDoc = (Document) schemaLinks;
} else {
configDoc = schemaLinks.getOwnerDocument();
}
loadSchemaList(configDoc, schemas, "schema");
loadSchemaList(configDoc, dtds, "dtd");
// If instruction body is an embedded xsd:schema, add it to the
// ArrayList
NodeList nodes = configDoc.getElementsByTagNameNS(
"http://www.w3.org/2001/XMLSchema", "schema");
for (int i = 0; i < nodes.getLength(); i++) {
Element e = (Element) nodes.item(i);
CharArrayWriter caw = new CharArrayWriter();
Transformer t = TF.newTransformer();
t.transform(new DOMSource(e), new StreamResult(caw));
schemas.add(caw.toCharArray());
}
}
public XMLValidatingParser() {
if (SF == null) {
String property_name = "javax.xml.validation.SchemaFactory:"
+ XMLConstants.W3C_XML_SCHEMA_NS_URI;
String oldprop = System.getProperty(property_name);
System.setProperty(property_name,
"org.apache.xerces.jaxp.validation.XMLSchemaFactory");
SF = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
try {
SF.setFeature(
"http://apache.org/xml/features/validation/schema-full-checking",
false);
} catch (Exception e) {
jlogger.warning("Unable to set feature '*/schema-full-checking'");
}
if (oldprop == null) {
System.clearProperty(property_name);
} else {
System.setProperty(property_name, oldprop);
}
}
if (nonValidatingDBF == null) {
String property_name = "javax.xml.parsers.DocumentBuilderFactory";
String oldprop = System.getProperty(property_name);
System.setProperty(property_name,
"org.apache.xerces.jaxp.DocumentBuilderFactoryImpl");
nonValidatingDBF = DocumentBuilderFactory.newInstance();
// Fortify Mod: Disable entity expansion to foil External Entity Injections
nonValidatingDBF.setExpandEntityReferences(false);
nonValidatingDBF.setNamespaceAware(true);
schemaValidatingDBF = DocumentBuilderFactory.newInstance();
schemaValidatingDBF.setNamespaceAware(true);
schemaValidatingDBF.setValidating(true);
schemaValidatingDBF.setAttribute(
"http://java.sun.com/xml/jaxp/properties/schemaLanguage",
"http://www.w3.org/2001/XMLSchema");
dtdValidatingDBF = DocumentBuilderFactory.newInstance();
dtdValidatingDBF.setNamespaceAware(true);
dtdValidatingDBF.setValidating(true);
// Fortify Mod: Disable entity expansion to foil External Entity Injections
dtdValidatingDBF.setExpandEntityReferences(false);
if (oldprop == null) {
System.clearProperty(property_name);
} else {
System.setProperty(property_name, oldprop);
}
}
if (TF == null) {
// Fortify Mod: prevent external entity injection
// includes try block to capture exceptions to setFeature.
TF = TransformerFactory.newInstance();
try {
TF.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (Exception e) {
jlogger.warning("Failed to secure Transformer");
}
}
}
public XMLValidatingParser(Document schema_links) throws Exception {
this();
if (null != schema_links) {
loadSchemaLists(schema_links, this.schemaList, this.dtdList);
}
}
/**
* Attempts to parse a resource read using the given connection to a URL.
*
* @param uc
* A connection for reading from some URL.
* @param instruction
* An Element node (ctlp:XMLValidatingParser) containing
* instructions, usually schema references.
* @param logger
* A log writer.
* @return A Document, or null if the resource could not be parsed.
*/
public Document parse(URLConnection uc, Element instruction,
PrintWriter logger) {
if (null == uc) {
throw new NullPointerException(
"Unable to parse resource: URLConnection is null.");
}
jlogger.fine("Received URLConnection object for " + uc.getURL());
Document doc = null;
try (InputStream inStream = URLConnectionUtils.getInputStream(uc)) {
doc = parse(inStream, instruction, logger);
} catch (Exception e) {
throw new RuntimeException(String.format(
"Failed to parse resource from %s \n %s", uc.getURL(),
e.getMessage()));
}
return doc;
}
/**
* Parses and validates an XML resource using the given schema references.
*
* @param input
* The XML input to parse and validate. It must be either an
* InputStream or a Document object.
* @param parserConfig
* An Element
* ({http://www.occamlab.com/te/parsers}XMLValidatingParser)
* containing configuration info. If it is {@code null} or empty
* validation will be performed by using location hints in the
* input document.
* @param logger
* The PrintWriter to log all results to
* @return {@code null} If any non-ignorable errors or warnings occurred;
* otherwise the resulting Document.
*
*/
Document parse(Object input, Element parserConfig, PrintWriter logger)
throws Exception {
jlogger.finer("Received XML resource of type "
+ input.getClass().getName());
ArrayList<Object> schemas = new ArrayList<Object>();
ArrayList<Object> dtds = new ArrayList<Object>();
schemas.addAll(this.schemaList);
dtds.addAll(this.dtdList);
loadSchemaLists(parserConfig, schemas, dtds);
Document resultDoc = null;
ErrorHandlerImpl errHandler = new ErrorHandlerImpl("Parsing", logger);
if (input instanceof InputStream) {
DocumentBuilderFactory dbf = nonValidatingDBF;
DocumentBuilder db = dbf.newDocumentBuilder();
db.setErrorHandler(errHandler);
try (InputStream xmlInput = (InputStream) input) {
resultDoc = db.parse(xmlInput);
} catch (Exception e) {
jlogger.log(Level.INFO, "Error parsing InputStream", e);
}
} else if (input instanceof Document) {
resultDoc = (Document) input;
} else {
throw new IllegalArgumentException(
"XML input must be an InputStream or a Document object.");
}
if (null == resultDoc) {
throw new RuntimeException("Failed to parse input: "
+ input.getClass().getName());
}
errHandler.setRole("Validation");
if (null == resultDoc.getDoctype() && dtds.isEmpty()) {
validateAgainstXMLSchemaList(resultDoc, schemas, errHandler);
} else {
validateAgainstDTDList(resultDoc, dtds, errHandler);
}
int error_count = errHandler.getErrorCount();
int warning_count = errHandler.getWarningCount();
if (error_count > 0 || warning_count > 0) {
String msg = "";
if (error_count > 0) {
msg += error_count + " validation error"
+ (error_count == 1 ? "" : "s");
if (warning_count > 0)
msg += " and ";
}
if (warning_count > 0) {
msg += warning_count + " warning"
+ (warning_count == 1 ? "" : "s");
}
msg += " detected.";
logger.println(msg);
}
if (error_count > 0) {
String s = (null != parserConfig) ? parserConfig
.getAttribute("ignoreErrors") : "false";
if (s.length() == 0 || Boolean.parseBoolean(s) == false) {
resultDoc = null;
}
}
if (warning_count > 0) {
String s = (null != parserConfig) ? parserConfig
.getAttribute("ignoreWarnings") : "true";
if (s.length() > 0 && Boolean.parseBoolean(s) == false) {
resultDoc = null;
}
}
return resultDoc;
}
/**
* A method to validate a pool of schemas outside of the request element.
*
* @param Document
* doc The file document to validate
* @param Document
* instruction The xml encapsulated schema information (file
* locations)
* @return false if there were errors, true if none.
*
*/
public boolean checkXMLRules(Document doc, Document instruction)
throws Exception {
if (doc == null || doc.getDocumentElement() == null)
return false;
Element e = instruction.getDocumentElement();
PrintWriter logger = new PrintWriter(System.out);
Document parsedDoc = parse(doc, e, logger);
return (parsedDoc != null);
}
/**
* Validates the given document against the schema references supplied in
* the accompanying instruction document.
*
* @param doc
* The document to be validated.
* @param instruction
* A document containing schema references; may be null, in which
* case embedded schema references will be used instead.
* @return A list of Element nodes ({@code <error>}) containing error
* messages.
* @throws Exception
* If any error occurs.
*/
public NodeList validate(Document doc, Document instruction)
throws Exception {
return schemaValidation(doc, instruction).toNodeList();
}
public Element validateSingleResult(Document doc, Document instruction)
throws Exception {
return schemaValidation(doc, instruction).toRootElement();
}
XmlErrorHandler schemaValidation(Document doc, Document instruction)
throws Exception {
if (doc == null || doc.getDocumentElement() == null) {
throw new NullPointerException("Input document is null.");
}
ArrayList<Object> schemas = new ArrayList<Object>();
ArrayList<Object> dtds = new ArrayList<Object>();
schemas.addAll(schemaList);
dtds.addAll(dtdList);
loadSchemaLists(instruction, schemas, dtds);
XmlErrorHandler errHandler = new XmlErrorHandler();
if (null == doc.getDoctype() && dtds.isEmpty()) {
validateAgainstXMLSchemaList(doc, schemas, errHandler);
} else {
validateAgainstDTDList(doc, dtds, errHandler);
}
return errHandler;
}
/**
* Validates an XML resource against a list of XML Schemas. Validation
* errors are reported to the given handler.
*
* @param doc
* The input Document node.
* @param xsdList
* A list of XML schema references. If the list is {@code null}
* or empty, validation will be performed by using location hints
* found in the input document.
* @param errHandler
* An ErrorHandler that collects validation errors.
* @throws SAXException
* If a schema cannot be read for some reason.
* @throws IOException
* If an I/O error occurs.
*/
void validateAgainstXMLSchemaList(Document doc, ArrayList<Object> xsdList,
ErrorHandler errHandler) throws SAXException, IOException {
jlogger.finer("Validating XML resource from " + doc.getDocumentURI());
Schema schema = SF.newSchema();
if (null != xsdList && !xsdList.isEmpty()) {
Source[] schemaSources = new Source[xsdList.size()];
for (int i = 0; i < xsdList.size(); i++) {
Object ref = xsdList.get(i);
if (ref instanceof File) {
schemaSources[i] = new StreamSource((File) ref);
} else if (ref instanceof URL) {
schemaSources[i] = new StreamSource(ref.toString());
} else if (ref instanceof char[]) {
schemaSources[i] = new StreamSource(new CharArrayReader(
(char[]) ref));
} else {
throw new IllegalArgumentException(
"Unknown schema reference: " + ref.toString());
}
}
schema = SF.newSchema(schemaSources);
}
Validator validator = schema.newValidator();
validator.setErrorHandler(errHandler);
DOMSource source = new DOMSource(doc, doc.getBaseURI());
validator.validate(source);
}
/**
* Validates an XML resource against a list of DTD schemas or as indicated
* by a DOCTYPE declaration. Validation errors are reported to the given
* handler. If no DTD list is provided the external schema reference in the
* DOCTYPE declaration is used (Note: an internal subset is ignored).
*
* @param doc
* The input Document.
* @param dtdList
* A list of DTD schema references (may be null or empty).
* @param errHandler
* An ErrorHandler that collects validation errors.
* @throws Exception
* If any errors occur while attempting to validate the
* document.
*/
void validateAgainstDTDList(Document doc, ArrayList<Object> dtdList,
ErrorHandler errHandler) throws Exception {
jlogger.finer("Validating XML resource from " + doc.getDocumentURI());
DocumentBuilder db = dtdValidatingDBF.newDocumentBuilder();
db.setErrorHandler(errHandler);
// Fortify Mod: prevent external entity injection
// includes try block to capture exceptions to setFeature.
TransformerFactory tf = TransformerFactory.newInstance();
try {
tf.setFeature(XMLConstants.FEATURE_SECURE_PROCESSING, true);
} catch (Exception e) {
jlogger.warning("Failed to secure Transformer");
}
// End Fortify Mod
Transformer copier = tf.newTransformer();
ByteArrayOutputStream content = new ByteArrayOutputStream();
Result copy = new StreamResult(content);
if (null == dtdList || dtdList.isEmpty()) {
DocumentType doctype = doc.getDoctype();
if (null == doctype) {
return;
}
URI systemId = URI.create(doctype.getSystemId());
if (!systemId.isAbsolute() && null != doc.getBaseURI()) {
systemId = URI.create(doc.getBaseURI()).resolve(systemId);
}
copier.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
systemId.toString());
copier.transform(new DOMSource(doc), copy);
db.parse(new ByteArrayInputStream(content.toByteArray()));
} else {
for (Object dtdRef : dtdList) {
content.reset();
copier.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
dtdRef.toString());
copier.transform(new DOMSource(doc), copy);
db.parse(new ByteArrayInputStream(content.toByteArray()));
}
}
}
}