/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package org.fcrepo.server.validation;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
import javax.xml.XMLConstants;
import javax.xml.validation.SchemaFactory;
import org.fcrepo.common.Constants;
import org.fcrepo.server.errors.GeneralException;
import org.fcrepo.server.errors.ObjectValidityException;
import org.fcrepo.server.errors.ServerException;
import org.fcrepo.server.storage.types.Validation;
import org.fcrepo.utilities.FileUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
/**
* The implementation of the digital object validation module (see
* DOValidator.class and DOValidatorModule.class). The validator operates on
* digital object XML files encoded in one of the Fedora-supported encoding
* formats (i.e., FOXML, Fedora METS, and possibly others in the future). The
* following types of validation can be run:
*
* <pre>
* 0=VALDIATE_ALL : All validation will be done.
* 1=VALIDATE_XML_SCHEMA : the digital object will be validated against
* the the appropriate XML Schema. An ObjectValidityException
* will be thrown if the object fails the schema test.
* 2=VALIDATE_SCHEMATRON : the digital object will be validated
* against a set of rules expressed by a Schematron schema.
* These rules are beyond what can be expressed in XML Schema.
* The Schematron schema expresses rules for different phases
* of the object. There are rules appropriate to a digital
* object when it is first ingested into the repository
* (ingest phase). There are additional rules that must be met
* before a digital object is considered valid for permanent
* storage in the repository (completed phase). These rules
* pertain to aspects of the object that are system assigned,
* such as created dates and state codes.
* An ObjectValidityException will be thrown if the object fails
* the Fedora rules test.
* </pre>
*
* @author Sandy Payette
* @version $Id$
*/
public class DOValidatorImpl
implements DOValidator {
private static final Logger logger =
LoggerFactory.getLogger(DOValidatorImpl.class);
protected static boolean debug = true;
/** Configuration variable: tempdir is a working area for validation */
protected static String tempDir = null;
/**
* Configuration variable: xmlSchemaPath is the location of the XML Schema.
*/
protected static String xmlSchemaPath = null;
/**
* Configuration variable: schematronPreprocessorPath is the Schematron
* stylesheet that is used to transform a Schematron schema into a
* validating stylesheet based on the rules in the schema.
*/
protected static String schematronPreprocessorPath = null;
/**
* Configuration variable: schematronSchemaPath is the Schematron schema
* that expresses Fedora-specific validation rules. It is transformed into a
* validating stylesheet by the Schematron preprocessing stylesheet.
*/
protected static String schematronSchemaPath = null;
/**
* Map of XML Schemas configured with the Fedora Repository. key = format
* uri value = schema file path
*/
private final Map<String, DOValidatorXMLSchema> m_xmlSchemaMap;
/**
* Map of Schematron rule schemas configured with the Fedora Repository. key =
* format uri value = schema file path
*/
private final Map<String, String> m_ruleSchemaMap;
private final File m_tempDir;
private final String m_absoluteTempPath;
/**
* <p>
* Constructs a new DOValidatorImpl to support all forms of digital object
* validation, using specified values for configuration values.
* </p>
* <p>
* Any parameter may be given as null, in which case the default value is
* assumed.
* </p>
*
* @param tempDir
* Working area for validation, default is <i>temp/</i>
* @param xmlSchemaMap
* Location of XML Schemas (W3 Schema) configured with Fedora (see
* Fedora.fcfg). Current options are <i>xsd/foxml1-1.xsd</i> for
* FOXML or <i>xsd/mets-fedora-ext1-1.xsd</i> for METS (Fedora
* extension)
* @param schematronPreprocessorPath
* Location of the Schematron pre-processing stylesheet configured
* with Fedora.
* @param ruleSchemaMap
* Location of rule schemas (Schematron), configured with Fedora (see
* Fedora.fcfg). Current options are <i>schematron/foxmlRules1-0.xml</i>
* for FOXML or <i>schematron/metsExtRules1-0.xml</i> for METS
* @throws ServerException
* If construction fails for any reason.
*/
public DOValidatorImpl(String tempDir,
Map<String, String> xmlSchemaMap,
String schematronPreprocessorPath,
Map<String, String> ruleSchemaMap)
throws ServerException {
logger.debug("VALIDATE: Initializing object validation...");
m_xmlSchemaMap = new HashMap<String, DOValidatorXMLSchema>(xmlSchemaMap.size());
SchemaFactory schemaFactory =
SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);
for (Entry<String,String> entry: xmlSchemaMap.entrySet()) {
try {
m_xmlSchemaMap.put(
entry.getKey(),
new DOValidatorXMLSchema(
schemaFactory.newSchema(new File(entry.getValue()))));
} catch (SAXException e) {
throw new GeneralException("Cannot read or create schema at " +
entry.getValue(),e);
}
}
m_ruleSchemaMap = ruleSchemaMap;
if (tempDir == null) {
throw new ObjectValidityException("[DOValidatorImpl] ERROR in constructor: "
+ "tempDir is null.");
}
if (schematronPreprocessorPath == null) {
throw new ObjectValidityException("[DOValidatorImpl] ERROR in constructor. "
+ "schematronPreprocessorPath is null.");
}
m_tempDir = new File(tempDir);
if (!m_tempDir.exists() && !m_tempDir.mkdirs()) {
throw new GeneralException("Cannot read or create tempDir at " +
tempDir);
}
m_absoluteTempPath = m_tempDir.getAbsolutePath();
DOValidatorImpl.tempDir = tempDir;
DOValidatorImpl.schematronPreprocessorPath = schematronPreprocessorPath;
}
/**
* <p>
* Validates a digital object.
* </p>
*
* @param objectAsStream
* The digital object provided as a stream.
* @param format
* The format URI of the object serialization.
* @param validationType
* The level of validation to perform on the digital object. This is
* an integer from 0-2 with the following meanings: 0 = VALIDATE_ALL
* (do all validation levels) 1 = VALIDATE_XML_SCHEMA (perform only
* XML Schema validation) 2 = VALIDATE_SCHEMATRON (perform only
* Schematron Rules validation)
* @param phase
* The stage in the workflow for which the validation should be
* contextualized. "ingest" = the object is encoded for ingest into
* the repository "store" = the object is encoded with all final
* assignments so that it is appropriate for storage as the
* authoritative serialization of the object.
* @throws ObjectValidityException
* If validation fails for any reason.
* @throws GeneralException
* If validation fails for any reason.
*/
public void validate(InputStream objectAsStream,
String format,
int validationType,
String phase) throws ObjectValidityException,
GeneralException {
if (validationType == VALIDATE_NONE) return;
checkFormat(format);
switch (validationType) {
case VALIDATE_NONE:
break;
case VALIDATE_ALL:
try {
// FIXME We need to use the object Inputstream twice, once for XML
// Schema validation and once for Schematron validation.
// We may want to consider implementing some form of a rewindable
// InputStream. For now, I will just write the object InputStream to
// disk so I can read it multiple times.
if (logger.isDebugEnabled()) {
logger.debug(
"Validating streams against schema and schematron" +
" requires caching tempfiles to disk; consider" +
"calling validations seperately with a buffered" +
"InputStream"
);
}
File objectAsFile = streamtoFile(objectAsStream);
validate(objectAsFile, format, validationType, phase);
} catch (IOException ioe) {
throw new ObjectValidityException("[DOValidatorImpl]: "
+ "ERROR in validate(InputStream objectAsStream...). " + ioe.getMessage());
}
break;
case VALIDATE_XML_SCHEMA:
validateXMLSchema(objectAsStream, m_xmlSchemaMap.get(format));
break;
case VALIDATE_SCHEMATRON:
validateByRules(objectAsStream,
m_ruleSchemaMap.get(format),
schematronPreprocessorPath,
phase);
break;
default:
String msg = "VALIDATE: ERROR - missing or invalid validationType";
logger.error(msg);
throw new GeneralException("[DOValidatorImpl] " + msg + ":"
+ validationType);
}
return;
}
/**
* <p>
* Validates a digital object.
* </p>
*
* @param objectAsFile
* The digital object provided as a file.
* @param validationType
* The level of validation to perform on the digital object. This is
* an integer from 0-2 with the following meanings: 0 = VALIDATE_ALL
* (do all validation levels) 1 = VALIDATE_XML_SCHEMA (perform only
* XML Schema validation) 2 = VALIDATE_SCHEMATRON (perform only
* Schematron Rules validation)
* @param phase
* The stage in the work flow for which the validation should be
* contextualized. "ingest" = the object is in the submission format
* for the ingest phase "store" = the object is in the authoritative
* format for the final storage phase
* @throws ObjectValidityException
* If validation fails for any reason.
* @throws GeneralException
* If validation fails for any reason.
*/
public void validate(File objectAsFile,
String format,
int validationType,
String phase) throws ObjectValidityException,
GeneralException {
logger.debug("VALIDATE: Initiating validation: phase={} format={}",
phase, format);
if (validationType == VALIDATE_NONE) return;
checkFormat(format);
if (format.equals(Constants.ATOM_ZIP1_1.uri)) {
// If the object serialization is a Zip file with an atom
// manifest, extract the manifest for validation.
try {
File manifest = null;
ZipInputStream zip = new ZipInputStream(new FileInputStream(objectAsFile));
ZipEntry entry;
while ((entry = zip.getNextEntry()) != null) {
if (entry.getName().equals("atommanifest.xml")) {
manifest = streamtoFile(zip);
break;
}
}
zip.close();
objectAsFile = manifest;
} catch(IOException e) {
throw new GeneralException(e.getMessage(), e);
}
}
try {
FileInputStream objectAsStream = new FileInputStream(objectAsFile);
if (validationType == VALIDATE_ALL) {
validateByRules(objectAsStream,
m_ruleSchemaMap.get(format),
schematronPreprocessorPath,
phase);
validateXMLSchema(new FileInputStream(objectAsFile),
m_xmlSchemaMap.get(format));
} else if (validationType == VALIDATE_XML_SCHEMA) {
validateXMLSchema(objectAsStream, m_xmlSchemaMap.get(format));
} else if (validationType == VALIDATE_SCHEMATRON) {
validateByRules(objectAsStream,
m_ruleSchemaMap.get(format),
schematronPreprocessorPath,
phase);
} else {
String msg = "VALIDATE: ERROR - missing or invalid validationType";
logger.error(msg);
throw new GeneralException("[DOValidatorImpl] " + msg + ":"
+ validationType);
}
} catch (IOException ioe) {
logger.error("VALIDATE: ERROR - failed validations.", ioe);
throw new ObjectValidityException("[DOValidatorImpl]: validate(File input...). "
+ ioe.getMessage());
} finally {
cleanUp(objectAsFile);
}
}
private void checkFormat(String format) throws ObjectValidityException {
if (!m_xmlSchemaMap.containsKey(format)) {
Validation validation = new Validation("unknown");
String problem = "Unsupported format: ".concat(format);
validation.setObjectProblems(
Collections.singletonList(problem));
throw new ObjectValidityException(problem, validation);
}
}
/**
* Do XML Schema validation on the Fedora object.
*
* @param objectAsFile
* The digital object provided as a file.
* @throws ObjectValidityException
* If validation fails for any reason.
* @throws GeneralException
* If validation fails for any reason.
*/
private void validateXMLSchema(InputStream objectAsStream, DOValidatorXMLSchema xsv)
throws ObjectValidityException, GeneralException {
try {
xsv.validate(objectAsStream);
} catch (ObjectValidityException e) {
logger.error("VALIDATE: ERROR - failed XML Schema validation.", e);
throw e;
} catch (Exception e) {
logger.error("VALIDATE: ERROR - failed XML Schema validation.", e);
throw new ObjectValidityException("[DOValidatorImpl]: validateXMLSchema. "
+ e.getMessage());
}
logger.debug("VALIDATE: SUCCESS - passed XML Schema validation.");
}
/**
* Do Schematron rules validation on the Fedora object. Schematron
* validation tests the object against a set of rules expressed using XPATH
* in a Schematron schema. These test for things that are beyond what can be
* expressed using XML Schema.
*
* @param objectAsFile
* The digital object provided as a file.
* @param schemaPath
* Location of the Schematron rules file.
* @param preprocessorPath
* Location of Schematron preprocessing stylesheet
* @param phase
* The workflow phase (ingest, store) for the object.
* @throws ObjectValidityException
* If validation fails for any reason.
* @throws GeneralException
* If validation fails for any reason.
*/
private void validateByRules(InputStream objectAsStream,
String ruleSchemaPath,
String preprocessorPath,
String phase) throws ObjectValidityException,
GeneralException {
try {
DOValidatorSchematron schtron =
new DOValidatorSchematron(ruleSchemaPath,
preprocessorPath,
phase);
schtron.validate(objectAsStream);
} catch (ObjectValidityException e) {
logger.error("VALIDATE: ERROR - failed Schematron rules validation.",
e);
throw e;
} catch (Exception e) {
logger.error("VALIDATE: ERROR - failed Schematron rules validation.",
e);
throw new ObjectValidityException("[DOValidatorImpl]: "
+ "failed Schematron rules validation. " + e.getMessage());
}
logger.debug("VALIDATE: SUCCESS - passed Schematron rules validation.");
}
private File streamtoFile(InputStream objectAsStream)
throws IOException {
File objectAsFile = null;
try {
objectAsFile = File.createTempFile("validation", "tmp", m_tempDir);
FileOutputStream fos = new FileOutputStream(objectAsFile);
FileUtils.copy(objectAsStream, fos);
return objectAsFile;
} catch (IOException e) {
if (objectAsFile != null && objectAsFile.exists()) {
objectAsFile.delete();
}
throw e;
}
}
// Distinguish temporary object files from real object files
// that were passed in for validation. This is a bit ugly as it stands,
// but it should only blow away files in the temp directory.
private void cleanUp(File f) {
if (f != null && f.getParentFile() != null) {
if (m_absoluteTempPath.equalsIgnoreCase(f
.getParentFile().getAbsolutePath())) {
f.delete();
}
}
}
}