/**
* JHOVE2 - Next-generation architecture for format-aware characterization
* <p>
* Copyright (c) 2009 by The Regents of the University of California, Ithaka
* Harbors, Inc., and The Board of Trustees of the Leland Stanford Junior
* University. All rights reserved.
* </p>
* <p>
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* </p>
* <ul>
* <li>Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.</li>
* <li>Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.</li>
* <li>Neither the name of the University of California/California Digital
* Library, Ithaka Harbors/Portico, or Stanford University, nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.</li>
* </ul>
* <p>
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* </p>
*/
package org.jhove2.module.format.xml;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.apache.xerces.util.XMLCatalogResolver;
import org.jhove2.annotation.ReportableProperty;
import org.jhove2.core.JHOVE2;
import org.jhove2.core.JHOVE2Exception;
import org.jhove2.core.Message;
import org.jhove2.core.Message.Context;
import org.jhove2.core.Message.Severity;
import org.jhove2.core.io.Input;
import org.jhove2.core.reportable.AbstractReportable;
import org.jhove2.core.source.FileSource;
import org.jhove2.core.source.Source;
import org.jhove2.core.source.URLSource;
import org.jhove2.module.format.Validator.Validity;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLReaderFactory;
import com.sleepycat.persist.model.NotPersistent;
import com.sleepycat.persist.model.Persistent;
/**
* This class provides an wrapper for methods used to create and initialize the
* SAX2 parser used to characterize an XML instance.
*
* @author rnanders
*
* @see <a href="http://www.saxproject.org/">official website for SAX</a> <br />
* @see <a href="https://jaxp.dev.java.net/">Sun's JAXP website</a> <br />
* @see <a href="http://xerces.apache.org/xerces2-j/">Xerces2 Java Parser</a>
*/
@Persistent
public class SaxParser
extends AbstractReportable
{
/** The XmlModule object that is invoking the parser. */
@NotPersistent
protected XmlModule xmlModule;
/**
* The XMLReader object (the actual parser) created and initialized in this
* class.
*/
@NotPersistent
protected XMLReader xmlReader;
/** The explicit class name of the SAX driver being used. */
protected String parser;
/**
* The set of SAX features (toggles) that have been set to fine tune the
* behavior of the parser.
*/
protected Map<String, String> features;
/**
* The set of SAX properties (such as event handlers) that have been
* registered with the parser.
*/
@NotPersistent
protected Map<String, Object> properties = new HashMap<String, Object>();
/**
* "String-ified" version of SAX properties registered with parser
*/
protected List<String> saxProperties;
/** If true, use XML Catalog files and external entity lookup */
protected boolean useXmlCatalog;
/** The object that does entity resolution. */
@NotPersistent
XMLCatalogResolver resolver;
/** An ordered array list of absolute URIs for the catalog files to be used by the external entity resolver */
protected String[] xmlCatalogList;
public SaxParser(){
super();
}
/**
* The name of the SAX driver class to be used for parsing can optionally be
* set in the Spring config file that instantiates this class.
*
* @param parser
* class name of the specific SAX driver being used
*/
public void setParser(String parser) {
this.parser = parser;
}
/**
* Gets the parser's class name.
*
* @return the parser's class name
*/
@ReportableProperty(order = 1, value = "Java class used to parse the XML")
public String getParser() {
return parser;
}
/**
* The set of SAX features that specify the behavior of the parser can
* optionally be set in the Spring config file that instantiates this class.
*
* @param features
* the set of (feture name, boolean value) pairs that will
* fine-tune parser behavior
*/
public void setFeatures(Map<String, String> features) {
this.features = features;
}
/**
* Gets the list of SAX features that are currently in effect
*
* @return a list of strings that specify the (feature name, boolean value)
* settings
*/
@ReportableProperty(order = 2, value = "SAX Parser Feature Settings")
public List<String> getSaxFeatures() {
ArrayList<String> list = new ArrayList<String>();
for (Entry<String, String> entry : features.entrySet()) {
list.add(entry.getKey() + " = " + entry.getValue());
}
return list;
}
/**
* Gets the list of SAX properties that are currently in effect
*
* @return a list of strings that specify the (feature name, class name)
* settings
*/
@ReportableProperty(order = 3, value = "SAX Parser Property Settings")
public List<String> getSaxProperties() {
return this.saxProperties;
}
/** If set true, use XML Catalog files and external entity lookup */
public void setUseXmlCatalog(boolean useXmlCatalog) {
this.useXmlCatalog = useXmlCatalog;
}
/** Setter for the array of catalog files to be used by the external entity resolver */
public void setXmlCatalogList(String[] xmlCatalogList) {
this.xmlCatalogList = xmlCatalogList;
}
/**
* Gets the list of XML Catalogs that were used for entity resolution
*
* @return a list of XML Catalogs that were used for entity resolution
*/
@ReportableProperty(order = 4, value = "XML Catalogs used for resolving entities")
public List<String> getXmlCatalogs() {
if (useXmlCatalog) {
return Arrays.asList(xmlCatalogList);
} else {
return null;
}
}
/**
* Sets a pointer to the XmlModule that has invoked this class.
*
* @param xmlModule
* he XmlModule that has invoked this class
*/
protected void setXmlModule(XmlModule xmlModule) {
this.xmlModule = xmlModule;
}
/**
* Creates and initializes the SAX2 XMLReader object (the actual parser).
*
* @return the the SAX2 XMLReader object
* @throws JHOVE2Exception
*
* @throws JHOVE2Exception
*/
protected XMLReader getXmlReader() throws JHOVE2Exception {
if (xmlReader == null) {
createXmlReader();
specifyXmlReaderFeatures();
specifyXmlReaderHandlers();
specifyXmlReaderHandlers2(); // updates properties
specifyXmlCatalog(); // updates properties
specifyXmlReaderProperties();// updates properties
// now update the "String" version of properties list that is returned as Reportable object
ArrayList<String> list = new ArrayList<String>();
for (Entry<String, Object> entry : properties.entrySet()) {
if (entry.getValue() instanceof String) {
list.add(entry.getKey() + " = " + entry.getValue().toString());
}
else {
list.add(entry.getKey() + " = "
+ entry.getValue().getClass().getName());
}
}
this.setSaxProperties(list);
}
return xmlReader;
}
protected void setSaxProperties(ArrayList<String> list) {
this.saxProperties = list;
}
/**
* Creates the SAX2 XMLReader object.
*
* @throws JHOVE2Exception
*/
private void createXmlReader() throws JHOVE2Exception {
try {
if (parser != null) {
/*
* SAX Parser class name has been specified in the Spring config
* file
*/
xmlReader = XMLReaderFactory.createXMLReader(parser);
}
else {
/*
* SAX Parser class name will be determined from value of
* org.xml.sax.driver set as an environmental variable or a
* META-INF value from a jar file in the classpath
*/
xmlReader = XMLReaderFactory.createXMLReader();
parser = xmlReader.getClass().getName();
}
}
catch (SAXException e) {
throw new JHOVE2Exception("Could not create a SAX parser", e);
}
}
/**
* Initialize SAX2 XMLReader features.
*/
private void specifyXmlReaderFeatures() {
if (features != null) {
for (Entry<String, String> entry : features.entrySet()) {
try {
xmlReader.setFeature(entry.getKey(), Boolean
.parseBoolean(entry.getValue()));
}
catch (SAXNotRecognizedException e) {
entry.setValue("Feature not recognized by parser");
}
catch (SAXNotSupportedException e) {
entry.setValue("Feature not supported by parser");
}
}
}
}
/**
* test whether a given feature is set in the parser.
*/
protected boolean hasFeature(String featureName) {
try {
boolean value = xmlReader.getFeature(featureName);
return value;
}
catch (Exception e) {
return false;
}
}
/**
* Initialize core event handlers.
*/
private void specifyXmlReaderHandlers() {
xmlReader.setContentHandler(new SaxParserContentHandler(xmlReader,
xmlModule));
xmlReader.setDTDHandler(new SaxParserDtdHandler(xmlModule));
xmlReader.setErrorHandler(new SaxParserErrorHandler(xmlModule));
}
/**
* Initialize event handlers unique to SAX2.
*/
private void specifyXmlReaderHandlers2() {
properties.put("http://xml.org/sax/properties/declaration-handler",
new SaxParserDeclHandler(xmlModule));
properties.put("http://xml.org/sax/properties/lexical-handler",
new SaxParserLexicalHandler(xmlModule));
}
/**
* Initialize XML Catalog Resolver.<br />
* @see <a href="http://xerces.apache.org/xerces2-j/javadocs/xerces2/org/apache/xerces/util/XMLCatalogResolver.html"
* >XMLCatalogResolver</a>
*/
private void specifyXmlCatalog() {
if (useXmlCatalog && (xmlCatalogList != null)) {
// Create catalog resolver.
XMLCatalogResolver resolver = new XMLCatalogResolver();
this.resolver = resolver;
// Set public identifier matches are preferred to system identifier matches
resolver.setPreferPublic(true);
// catalog list should be set from the Spring config file
resolver.setCatalogList(xmlCatalogList);
// Set the resolver on the parser.
properties.put("http://apache.org/xml/properties/internal/entity-resolver", resolver);
}
}
/**
* Set parser properties (callback objects) using the list previously filled
*/
private void specifyXmlReaderProperties() {
for (Entry<String, Object> entry : properties.entrySet()) {
try {
xmlReader.setProperty(entry.getKey(), entry.getValue());
}
catch (SAXNotRecognizedException e) {
entry.setValue("Property not recognized by parser");
}
catch (SAXNotSupportedException e) {
entry.setValue("Property not supported by parser");
}
}
}
protected void parse(JHOVE2 jhove2, Source source, Input input)
throws JHOVE2Exception, IOException
{
/* The XMLReader does the parsing of the XML */
XMLReader xmlReader = getXmlReader();
/* Create the InputSource object containing the XML entity to be parsed */
InputStream stream = source.getInputStream();
InputSource saxInputSource = new InputSource(stream);
/* Provide the BASE path of the source file, in case relative paths need to be resolved */
if (source instanceof URLSource){
saxInputSource.setSystemId(((URLSource)source).getSourceName());
}
else {
File sourceFile = source.getFile();
if (sourceFile != null){
saxInputSource.setSystemId (sourceFile.getAbsolutePath());
}
}
/* Here's where the SAX parsing takes place */
try {
xmlReader.parse(saxInputSource);
xmlModule.wellFormed = Validity.True;
}
catch (SAXException e) {
xmlModule.wellFormed = Validity.False;
xmlModule.validity = Validity.False;
Object[]messageArgs = new Object[]{e.getMessage()};
xmlModule.saxParserMessages.add(new Message(Severity.ERROR,
Context.OBJECT,
"org.jhove2.module.format.xml.XmlModule.saxParserMessage",
messageArgs, jhove2.getConfigInfo()));
}
catch (IOException e) {
xmlModule.wellFormed = Validity.Undetermined;
xmlModule.validity = Validity.Undetermined;
Object[]messageArgs = new Object[]{e.getMessage()};
xmlModule.saxParserMessages.add(new Message(Severity.ERROR,
Context.OBJECT,
"org.jhove2.module.format.xml.XmlModule.entityReferenceNotResolved",
messageArgs, jhove2.getConfigInfo()));
}
finally {
if (stream != null) {
stream.close();
}
}
}
}