/* The contents of this file are subject to the license and copyright terms * detailed in the license directory at the root of the source tree (also * available online at http://fedora-commons.org/license/). */ package fedora.server.validation; import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import java.util.Map; import java.util.zip.ZipEntry; import java.util.zip.ZipInputStream; import org.apache.log4j.Logger; import fedora.common.Constants; import fedora.server.errors.GeneralException; import fedora.server.errors.ObjectValidityException; import fedora.server.errors.ServerException; import fedora.utilities.FileUtils; /** * The implementation of the digital object validation module (see * DOValidator.class and DOValidatorModule.class). The validator operates on * digital object XML files encoded in one of the Fedora-supported encoding * formats (i.e., FOXML, Fedora METS, and possibly others in the future). The * following types of validation can be run: * * <pre> * 0=VALDIATE_ALL : All validation will be done. * 1=VALIDATE_XML_SCHEMA : the digital object will be validated against * the the appropriate XML Schema. An ObjectValidityException * will be thrown if the object fails the schema test. * 2=VALIDATE_SCHEMATRON : the digital object will be validated * against a set of rules expressed by a Schematron schema. * These rules are beyond what can be expressed in XML Schema. * The Schematron schema expresses rules for different phases * of the object. There are rules appropriate to a digital * object when it is first ingested into the repository * (ingest phase). There are additional rules that must be met * before a digital object is considered valid for permanent * storage in the repository (completed phase). These rules * pertain to aspects of the object that are system assigned, * such as created dates and state codes. * An ObjectValidityException will be thrown if the object fails * the Fedora rules test. * </pre> * * @author Sandy Payette * @version $Id$ */ public class DOValidatorImpl implements DOValidator { /** Logger for this class. */ private static final Logger LOG = Logger.getLogger(DOValidatorImpl.class.getName()); protected static boolean debug = true; public static final int VALIDATE_ALL = 0; public static final int VALIDATE_XML_SCHEMA = 1; public static final int VALIDATE_SCHEMATRON = 2; /** Configuration variable: tempdir is a working area for validation */ protected static String tempDir = null; /** * Configuration variable: xmlSchemaPath is the location of the XML Schema. */ protected static String xmlSchemaPath = null; /** * Configuration variable: schematronPreprocessorPath is the Schematron * stylesheet that is used to transform a Schematron schema into a * validating stylesheet based on the rules in the schema. */ protected static String schematronPreprocessorPath = null; /** * Configuration variable: schematronSchemaPath is the Schematron schema * that expresses Fedora-specific validation rules. It is transformed into a * validating stylesheet by the Schematron preprocessing stylesheet. */ protected static String schematronSchemaPath = null; /** * Map of XML Schemas configured with the Fedora Repository. key = format * uri value = schema file path */ private final Map<String, String> m_xmlSchemaMap; /** * Map of Schematron rule schemas configured with the Fedora Repository. key = * format uri value = schema file path */ private final Map<String, String> m_ruleSchemaMap; /** * <p> * Constructs a new DOValidatorImpl to support all forms of digital object * validation, using specified values for configuration values. * </p> * <p> * Any parameter may be given as null, in which case the default value is * assumed. * </p> * * @param tempDir * Working area for validation, default is <i>temp/</i> * @param xmlSchemaMap * Location of XML Schemas (W3 Schema) configured with Fedora (see * Fedora.fcfg). Current options are <i>xsd/foxml1-1.xsd</i> for * FOXML or <i>xsd/mets-fedora-ext1-1.xsd</i> for METS (Fedora * extension) * @param schematronPreprocessorPath * Location of the Schematron pre-processing stylesheet configured * with Fedora.</i> * @param ruleSchemaMap * Location of rule schemas (Schematron), configured with Fedora (see * Fedora.fcfg). Current options are <i>schematron/foxmlRules1-0.xml</i> * for FOXML or <i>schematron/metsExtRules1-0.xml</i> for METS * @throws ServerException * If construction fails for any reason. */ public DOValidatorImpl(String tempDir, Map<String, String> xmlSchemaMap, String schematronPreprocessorPath, Map<String, String> ruleSchemaMap) throws ServerException { LOG.debug("VALIDATE: Initializing object validation..."); m_xmlSchemaMap = xmlSchemaMap; m_ruleSchemaMap = ruleSchemaMap; if (tempDir == null) { throw new ObjectValidityException("[DOValidatorImpl] ERROR in constructor: " + "tempDir is null."); } if (schematronPreprocessorPath == null) { throw new ObjectValidityException("[DOValidatorImpl] ERROR in constructor. " + "schematronPreprocessorPath is null."); } DOValidatorImpl.tempDir = tempDir; DOValidatorImpl.schematronPreprocessorPath = schematronPreprocessorPath; } /** * <p> * Validates a digital object. * </p> * * @param objectAsStream * The digital object provided as a stream. * @param format * The format URI of the object serialization. * @param validationType * The level of validation to perform on the digital object. This is * an integer from 0-2 with the following meanings: 0 = VALIDATE_ALL * (do all validation levels) 1 = VALIDATE_XML_SCHEMA (perform only * XML Schema validation) 2 = VALIDATE_SCHEMATRON (perform only * Schematron Rules validation) * @param phase * The stage in the workflow for which the validation should be * contextualized. "ingest" = the object is encoded for ingest into * the repository "store" = the object is encoded with all final * assignments so that it is appropriate for storage as the * authoritative serialization of the object. * @throws ObjectValidityException * If validation fails for any reason. * @throws GeneralException * If validation fails for any reason. */ public void validate(InputStream objectAsStream, String format, int validationType, String phase) throws ObjectValidityException { checkFormat(format); // FIXME We need to use the object Inputstream twice, once for XML // Schema validation and once for Schematron validation. // We may want to consider implementing some form of a rewindable // InputStream. For now, I will just write the object InputStream to // disk so I can read it multiple times. try { File objectAsFile = streamtoFile(tempDir, objectAsStream); validate(objectAsFile, format, validationType, phase); } catch (ObjectValidityException e) { throw e; } catch (Exception e) { throw new ObjectValidityException("[DOValidatorImpl]: " + "ERROR in validate objectAsStream. " + e.getMessage()); } } /** * <p> * Validates a digital object. * </p> * * @param objectAsFile * The digital object provided as a file. * @param validationType * The level of validation to perform on the digital object. This is * an integer from 0-2 with the following meanings: 0 = VALIDATE_ALL * (do all validation levels) 1 = VALIDATE_XML_SCHEMA (perform only * XML Schema validation) 2 = VALIDATE_SCHEMATRON (perform only * Schematron Rules validation) * @param phase * The stage in the work flow for which the validation should be * contextualized. "ingest" = the object is in the submission format * for the ingest phase "store" = the object is in the authoritative * format for the final storage phase * @throws ObjectValidityException * If validation fails for any reason. * @throws GeneralException * If validation fails for any reason. */ public void validate(File objectAsFile, String format, int validationType, String phase) throws ObjectValidityException, GeneralException { LOG.debug("Validation phase=" + phase + " format=" + format); LOG.debug("VALIDATE: Initiating validation: " + " phase=" + phase + " format=" + format); checkFormat(format); if (format.equals(Constants.ATOM_ZIP1_1.uri)) { // If the object serialization is a Zip file with an atom // manifest, extract the manifest for validation. try { File manifest = null; ZipInputStream zip = new ZipInputStream(new FileInputStream(objectAsFile)); ZipEntry entry; while ((entry = zip.getNextEntry()) != null) { if (entry.getName().equals("atommanifest.xml")) { manifest = streamtoFile(tempDir, zip); break; } } zip.close(); objectAsFile = manifest; } catch(IOException e) { throw new GeneralException(e.getMessage(), e); } } if (validationType == VALIDATE_ALL) { validateByRules(objectAsFile, m_ruleSchemaMap.get(format), schematronPreprocessorPath, phase); validateXMLSchema(objectAsFile, m_xmlSchemaMap.get(format)); } else if (validationType == VALIDATE_XML_SCHEMA) { validateXMLSchema(objectAsFile, m_xmlSchemaMap.get(format)); } else if (validationType == VALIDATE_SCHEMATRON) { validateByRules(objectAsFile, m_ruleSchemaMap.get(format), schematronPreprocessorPath, phase); } else { String msg = "VALIDATE: ERROR - missing or invalid validationType"; LOG.error(msg); cleanUp(objectAsFile); throw new GeneralException("[DOValidatorImpl] " + msg + ":" + validationType); } cleanUp(objectAsFile); } private void checkFormat(String format) throws ObjectValidityException { if (!m_xmlSchemaMap.containsKey(format)) { throw new ObjectValidityException("Unsupported format: " + format); } } /** * Do XML Schema validation on the Fedora object. * * @param objectAsFile * The digital object provided as a file. * @throws ObjectValidityException * If validation fails for any reason. * @throws GeneralException * If validation fails for any reason. */ private void validateXMLSchema(File objectAsFile, String xmlSchemaPath) throws ObjectValidityException, GeneralException { try { DOValidatorXMLSchema xsv = new DOValidatorXMLSchema(xmlSchemaPath); xsv.validate(objectAsFile); } catch (ObjectValidityException e) { LOG.error("VALIDATE: ERROR - failed XML Schema validation.", e); cleanUp(objectAsFile); throw e; } catch (Exception e) { LOG.error("VALIDATE: ERROR - failed XML Schema validation.", e); cleanUp(objectAsFile); throw new ObjectValidityException("[DOValidatorImpl]: validateXMLSchema. " + e.getMessage()); } LOG.debug("VALIDATE: SUCCESS - passed XML Schema validation."); } /** * Do Schematron rules validation on the Fedora object. Schematron * validation tests the object against a set of rules expressed using XPATH * in a Schematron schema. These test for things that are beyond what can be * expressed using XML Schema. * * @param objectAsFile * The digital object provided as a file. * @param schemaPath * Location of the Schematron rules file. * @param preprocessorPath * Location of Schematron preprocessing stylesheet * @param phase * The workflow phase (ingest, store) for the object. * @throws ObjectValidityException * If validation fails for any reason. * @throws GeneralException * If validation fails for any reason. */ private void validateByRules(File objectAsFile, String ruleSchemaPath, String preprocessorPath, String phase) throws ObjectValidityException, GeneralException { try { DOValidatorSchematron schtron = new DOValidatorSchematron(ruleSchemaPath, preprocessorPath, phase); schtron.validate(objectAsFile); } catch (ObjectValidityException e) { LOG.error("VALIDATE: ERROR - failed Schematron rules validation.", e); cleanUp(objectAsFile); throw e; } catch (Exception e) { LOG.error("VALIDATE: ERROR - failed Schematron rules validation.", e); cleanUp(objectAsFile); throw new ObjectValidityException("[DOValidatorImpl]: " + "failed Schematron rules validation. " + e.getMessage()); } LOG.debug("VALIDATE: SUCCESS - passed Schematron rules validation."); } private File streamtoFile(String dirname, InputStream objectAsStream) throws IOException { File objectAsFile = null; try { File tempDir = new File(dirname); File fileLocation = null; if (tempDir.exists() || tempDir.mkdirs()) { fileLocation = File.createTempFile("validation", "tmp", tempDir); FileOutputStream fos = new FileOutputStream(fileLocation); if (FileUtils.copy(objectAsStream, fos)) { objectAsFile = fileLocation; } } } catch (IOException e) { if (objectAsFile.exists()) { objectAsFile.delete(); } throw e; } return objectAsFile; } // Distinguish temporary object files from real object files // that were passed in for validation. This is a bit ugly as it stands, // but it should only blow away files in the temp directory. private void cleanUp(File f) { if (f.getParentFile() != null) { if ((new File(tempDir)).getAbsolutePath().equalsIgnoreCase(f .getParentFile().getAbsolutePath())) { f.delete(); } } } }