/*-
* Copyright © 2009 Diamond Light Source Ltd.
*
* This file is part of GDA.
*
* GDA is free software: you can redistribute it and/or modify it under the
* terms of the GNU General Public License version 3 as published by the Free
* Software Foundation.
*
* GDA is distributed in the hope that it will be useful, but WITHOUT ANY
* WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
* details.
*
* You should have received a copy of the GNU General Public License along
* with GDA. If not, see <http://www.gnu.org/licenses/>.
*/
package uk.ac.gda.util.beans.xml;
import java.io.BufferedReader;
import java.io.CharArrayReader;
import java.io.CharArrayWriter;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.UnsupportedEncodingException;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.xerces.parsers.SAXParser;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.helpers.DefaultHandler;
/**
* XML instance file validation.
* <p>
* This class enables validation of an XML instance file, via JAXP or Xerces2.
* <p>
* Castor can do both high-level Java Bean validation (in setters/getters) and parser-level validation against a schema
* (using Xerces2). However this validation only occurs during (un)marshalling.
* <p>
* We also want to be able to validate an XML instance file against a schema, without having to do it during the
* unmarshalling process.
*/
public class XMLObjectConfigFileValidator {
private static final Logger logger = LoggerFactory.getLogger(XMLObjectConfigFileValidator.class);
private static final String SAX_REPORT_VALIDATION_ERRORS = "http://xml.org/sax/features/validation";
private static final String XERCES_REPORT_SCHEMA_ERRORS = "http://apache.org/xml/features/validation/schema";
private static final String XERCES_FULL_SCHEMA_GRAMMAR_CHECKING = "http://apache.org/xml/features/validation/schema-full-checking";
// private static final String XERCES_SCHEMA_WITH_NAMESPACE =
// "http://apache.org/xml/properties/schema/external-schemaLocation";
private static final String XERCES_SCHEMA_WITHOUT_NAMESPACE = "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation";
private static final String JAXP_DOCBUILDER_FACTORY = "javax.xml.parsers.DocumentBuilderFactory";
private static final String JAXP_DOCBUILDER_FACTORY_XERCES_IMPL = "org.apache.xerces.jaxp.DocumentBuilderFactoryImpl";
private static final String JAXP_PROPS_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage";
private static final String JAXP_PROPS_SCHEMA_SOURCE = "http://java.sun.com/xml/jaxp/properties/schemaSource";
private static final String W3_SCHEMA_LANGUAGE_URI = "http://www.w3.org/2001/XMLSchema";
// private static final String W3_SCHEMA_LANGUAGE_URL =
// "http://www.w3.org/2001/XMLSchema.xsd";
/**
* URL pathname to XML schema XSD file
*/
private String SchemaUrl = null;
/**
* URL pathname to XML instance file
*/
private String xmlInstanceDocumentURL = null;
/**
* SAX InputSource to XML instance file
*/
private InputSource xmlInstanceDocumentSource = null;
/**
* Extends default handler to do some error handling. Implements the DefaultHandler's ErrorHandler interface. So it
* can be registered with a Xerces XML parser or the JAXP DocumentBuilder, and used during parsing to provide error
* handling support.
*/
private class Validator extends DefaultHandler {
private SAXParseException saxParseException = null;
@Override
public void error(SAXParseException exception) throws SAXException {
saxParseException = exception;
}
@Override
public void fatalError(SAXParseException exception) throws SAXException {
saxParseException = exception;
}
@Override
public void warning(SAXParseException exception) throws SAXException {
saxParseException = exception;
}
public void throwIfRequired() throws Exception {
if (saxParseException!=null) throw saxParseException;
}
}
/**
* Uses Xerces to validate an XML document against a specified XML schema. Should be able to pass in
* "file://d:/gda/dev/blah.xml" type of URL.
*
* @return True if document has validated successfully. False if it fails to validate.
*/
private boolean xerces2Validate() throws Exception {
SAXParser parser = new SAXParser();
// enable reporting of validation errors - using schema or DTD
parser.setFeature(SAX_REPORT_VALIDATION_ERRORS, true);
// report validation errors against a schema
parser.setFeature(XERCES_REPORT_SCHEMA_ERRORS, true);
// enable full schema, grammar-constraint checking
parser.setFeature(XERCES_FULL_SCHEMA_GRAMMAR_CHECKING, true);
// Specify a validation schema for the parser to use.
// N.B. parser is not required to locate any schema specified
// here.
parser.setProperty(
// schema with namespace - may supply a list to SchemaUrl
// XERCES_SCHEMA_WITH_NAMESPACE,
// schema without a namespace
XERCES_SCHEMA_WITHOUT_NAMESPACE, SchemaUrl);
// README - could use this to locate schema so GDASchema.xsd can
// be
// validated on loadup
// parser.setProperty(XERCES_SCHEMA_WITH_NAMESPACE,
// W3_SCHEMA_LANGUAGE_URI + " " +
// W3_SCHEMA_LANGUAGE_URL);
// register our custom handler for the parser's error handling.
// ie override the default handler which ignores errors.
// Exceptions occurring are stored in the Validator's class
// attributes.
Validator handler = new Validator();
parser.setErrorHandler(handler);
if (xmlInstanceDocumentSource != null) {
parser.parse(xmlInstanceDocumentSource);
} else {
parser.parse(xmlInstanceDocumentURL);
}
handler.throwIfRequired();
return true;
}
/**
* Uses JAXP to validate an XML document against a specified XML schema.
*
* @return True if document has validated successfully. False if it fails to validate.
*/
private boolean jaxpValidate() throws Exception {
System.setProperty(JAXP_DOCBUILDER_FACTORY, JAXP_DOCBUILDER_FACTORY_XERCES_IMPL);
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
// do this if XML doc has a namespace
// factory.setNamespaceAware(true);
// make parser validating
factory.setValidating(true);
factory.setAttribute(JAXP_PROPS_SCHEMA_LANGUAGE, W3_SCHEMA_LANGUAGE_URI);
factory.setAttribute(JAXP_PROPS_SCHEMA_SOURCE, SchemaUrl);
// README - could use this to locate schema, so GDASchema.xsd can be
// validated on loadup
// factory.setAttribute(XERCES_SCHEMA_WITH_NAMESPACE,
// W3_SCHEMA_LANGUAGE_URI + " " +
// W3_SCHEMA_LANGUAGE_URL);
DocumentBuilder builder = factory.newDocumentBuilder();
// register our custom handler for the parser's error handling.
// ie override the default handler which ignores errors.
// Exceptions occurring are stored in the Validator's class
// attributes.
Validator handler = new Validator();
builder.setErrorHandler(handler);
// parse XML instance document using DocumentBuilder parser.
if (xmlInstanceDocumentSource != null) {
builder.parse(xmlInstanceDocumentSource);
} else {
builder.parse(xmlInstanceDocumentURL);
}
handler.throwIfRequired();
return false;
}
/**
* Validate an XML document file against a specified XML schema. Caller can select between JAXP and Xerces to do the
* validation. Should be able to pass in "file://d:/gda/dev/blah.xml" type of URL.
* <p>
*
* @param SchemaUrl
* URL pathname to XML schema XSD file
* @param xmlInstanceDocumentURL
* URL pathname to XML instance file
* @param useXercesValidation
* true to use Xerces, false to use JAXP
* @return True if document has validated successfully. False if it fails to validate.
* @throws Exception
*/
public boolean validateFile(String SchemaUrl, String xmlInstanceDocumentURL, boolean useXercesValidation) throws Exception {
this.SchemaUrl = getPath(SchemaUrl);
this.xmlInstanceDocumentURL = getPath(xmlInstanceDocumentURL);
this.xmlInstanceDocumentSource = null;
// perform validation using requested method
if (useXercesValidation) {
return this.xerces2Validate();
}
return this.jaxpValidate();
}
private String getPath(String fileOrUrl) {
if (fileOrUrl==null) return null;
if (fileOrUrl.indexOf(":")>-1) return fileOrUrl;
if (fileOrUrl.startsWith("/")) return fileOrUrl;
return new File(fileOrUrl).getAbsolutePath();
}
/**
* Read in all data from InputSource, storing it in a CharArrayWriter, so we can create multiple instances of
* InputSource from it. Helper method used by validateSource()
*
* @param source
* InputSource to read data from
* @return CharArrayWriter containing copy of data read in from source
* @throws UnsupportedEncodingException
*/
private CharArrayWriter getCharArrayWriterFromInputSource(InputSource source) throws UnsupportedEncodingException {
// Create a BufferedReader, for reading from a SAX InputSource
final Reader cs = source.getCharacterStream();
Reader in = null;
if (cs == null) {
final InputStream i = source.getByteStream();
in = new BufferedReader(new InputStreamReader(i, "UTF-8"));
} else{
in = new BufferedReader(cs);
}
try {
// Data read in is to be stored in a CharArrayWriter
CharArrayWriter out = new CharArrayWriter();
try {
while (true) {
// Read in data from InputSource using BufferedReader
int charRead = in.read();
if (charRead > -1) {
// store data in the CharArrayWriter
out.write(charRead);
} else {
break;
}
}
} catch (IOException e) {
logger.error("Cannot read file", e);
out = null;
}
return out;
} finally {
// Must close stream or file system errors can occur.
try {
in.close();
} catch (IOException e) {
logger.error("Cannot close stream", e);
}
}
}
/**
* Validate an XML document SAX InputSource against a specified XML schema. Caller can select between JAXP and
* Xerces to do the validation. Should be able to pass in "file://d:/gda/dev/blah.xml" type of URL.
* <p>
* Returns the original source if validation didn't occur. Returns a new character array InputSource, containing the
* same data, if validation has occurred. This is necessary, since an InputSource cannot be re-read, so a cloned
* source must be passed back if the original was read in.
* <p>
*
* @param SchemaUrl
* URL pathname to XML schema XSD file
* @param xmlInstanceDocumentSource
* SAX InputSource to XML instance file
* @param useXercesValidation
* true to use Xerces, false to use JAXP
* @return a useable InputSource guaranteed to contain same data as input, if validation passed. null if validation
* failed.
* @throws Exception
* @throws SAXException
*/
public InputSource validateSource(String SchemaUrl,
InputSource xmlInstanceDocumentSource,
boolean useXercesValidation) throws Exception {
// README - InputSource could be based on a Reader (characters) or
// InputStream (bytes) - ideally, validation needs to cope with both!
// Read in all data from InputSource, storing it in a CharArrayWriter,
// so
// we can create multiple instances of InputSource from it.
CharArrayWriter data = getCharArrayWriterFromInputSource(xmlInstanceDocumentSource);
// Create a SAX InputSource from a new CharArrayReader, which
// contains a copy of the data stored in CharArrayWriter.
return validateSource(SchemaUrl, data.toCharArray(), useXercesValidation);
}
/**
*
* @param SchemaUrl
* @param xmlCharacters
* @param useXercesValidation
* @return InputSource
* @throws Exception
*/
public InputSource validateSource(String SchemaUrl,
char [] xmlCharacters,
boolean useXercesValidation) throws Exception {
InputSource source = new InputSource(new CharArrayReader(xmlCharacters));
this.SchemaUrl = SchemaUrl;
this.xmlInstanceDocumentURL = null;
this.xmlInstanceDocumentSource = source;
boolean valid = false;
// perform validation using requested method
if (useXercesValidation) {
valid = this.xerces2Validate();
} else {
valid = this.jaxpValidate();
}
if (valid == true) {
// README since the original InputSource
// "xmlInstanceDocumentSource"
// has been read once, it may not be possible to reset() &
// re-read it.
// "source" created for validation parse has been read, so can't
// reuse
// it either. So create new InputSource, to replace original
// InputSource with, so user can still use it.
// Create a SAX InputSource from a new CharArrayReader, which
// contains a copy of the data stored in CharArrayWriter.
return new InputSource(new CharArrayReader(xmlCharacters));
}
return null;
}
}