/**
* JHOVE2 - Next-generation architecture for format-aware characterization
* <p>
* Copyright (c) 2010 by The Regents of the University of California, Ithaka
* Harbors, Inc., and The Board of Trustees of the Leland Stanford Junior
* University. All rights reserved.
* </p>
* <p>
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* </p>
* <ul>
* <li>Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.</li>
* <li>Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.</li>
* <li>Neither the name of the University of California/California Digital
* Library, Ithaka Harbors/Portico, or Stanford University, nor the names of its
* contributors may be used to endorse or promote products derived from this
* software without specific prior written permission.</li>
* </ul>
* <p>
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
* </p>
*/
package org.jhove2.module.format.sgml;
import java.io.EOFException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.jhove2.annotation.ReportableProperty;
import org.jhove2.core.JHOVE2;
import org.jhove2.core.JHOVE2Exception;
import org.jhove2.core.Message;
import org.jhove2.core.Message.Context;
import org.jhove2.core.Message.Severity;
import org.jhove2.core.format.Format;
import org.jhove2.core.io.Input;
import org.jhove2.core.source.Source;
import org.jhove2.module.format.BaseFormatModule;
import org.jhove2.module.format.Validator;
import org.jhove2.persist.FormatModuleAccessor;
import com.sleepycat.persist.model.NotPersistent;
import com.sleepycat.persist.model.Persistent;
/**
* JHOVE2 SGML module. This module will parse and validate an SGML document instance,
* and capture selected characterization information.
*
* @author smorrissey
*/
@Persistent
public class SgmlModule
extends BaseFormatModule
implements Validator
{
/** Directory module version identifier. */
public static final String VERSION = "2.0.0";
/** Directory module release date. */
public static final String RELEASE = "2010-09-10";
/** Directory module rights statement. */
public static final String RIGHTS = "Copyright 2010 by Ithaka Harbors, Inc."
+ "Available under the terms of the BSD license.";
/** Module validation coverage. */
public static final Coverage COVERAGE = Coverage.Inclusive;
/** SGML validation status. */
protected Validity validity;
/** parser directive -- should sgmlnorm be run in order to extract doctype statement; default is false */
protected boolean shouldFindDoctype;
/** Parser engine for parsing SGML files and extracting significant properties */
@NotPersistent
protected SgmlParser sgmlParser;
/** Container for SGML document properties extracted by parser */
protected SgmlDocumentProperties documentProperties;
/** Error messages generated by SgmlParser */
protected List<Message> SgmlParserErrorMessages = new ArrayList<Message>();
/** Warning messages generated by SgmlParser */
protected List<Message> SgmlParserWarningMessages = new ArrayList<Message>();
/**
* Instantiates a new SgmlModule instance.
*
* @param format
* the Format object
*/
public SgmlModule(Format format,
FormatModuleAccessor formatModuleAccessor) {
super(VERSION, RELEASE, RIGHTS, format, formatModuleAccessor);
this.validity = Validity.Undetermined;
}
/** Instantiate a new <code>SgmlModule</code>. */
public SgmlModule() {
this(null, null);
}
/** Parse the format.
* @param jhove2 JHOVE2 framework
* @param source Source unit
* @param input Source input
* @return Number of bytes consumed
*/
@Override
public long parse(JHOVE2 jhove2, Source source, Input input)
throws EOFException, IOException, JHOVE2Exception
{
this.documentProperties = sgmlParser.parseFile(this, jhove2, source);
if (this.documentProperties != null){
if (this.isShouldFindDoctype()){
sgmlParser.determineDoctype(this, jhove2, source);
}
}
this.sgmlParser.cleanUp();
this.sgmlParser = null;
return 0;
}
/** Validate the SGML source unit.
* @param jhove2 JHOVE2 framework
* @param source SGML source unit
* @param input SGML source input
* @see org.jhove2.module.format.Validator#validate(org.jhove2.core.JHOVE2, org.jhove2.core.source.Source, org.jhove2.core.io.Input)
* There are no profiles of the SGML format; this method will return the validation status of the SGML document
*/
@Override
public Validity validate(JHOVE2 jhove2, Source source, Input input)
throws JHOVE2Exception
{
if (this.getDocumentProperties() != null){
if (this.getDocumentProperties().getParseErrors() != null &&
this.getDocumentProperties().getParseErrors().size() > 0){
this.validity = Validity.Undetermined;
Message message = new Message(
Severity.ERROR,
Context.PROCESS,
"org.jhove2.module.format.sgml.SgmlModule.OpenSpParseErrorsDetected",
jhove2.getConfigInfo());
this.getSgmlParserErrorMessages().add(message);
}
else {
if (this.getSgmlParserErrorMessages() != null &&
this.getSgmlParserErrorMessages().size()>0){
this.validity = Validity.Undetermined;
}
else if (this.getDocumentProperties().isSgmlValid()){
this.validity = Validity.True;
}
else {
this.validity = Validity.False;
}
}
}
return this.validity;
}
/** Get validation coverage.
* @see org.jhove2.module.format.Validator#getCoverage()
*/
@Override
public Coverage getCoverage() {
return COVERAGE;
}
/** Get validation status.
* @see org.jhove2.module.format.Validator#isValid()
*/
@Override
public Validity isValid() {
return this.validity;
}
/**
* @return the sgmlParser
*/
public SgmlParser getSgmlParser() {
return this.sgmlParser;
}
/**
* @param sgmlParser the sgmlParser to set
*/
public void setSgmlParser(SgmlParser sgmlParser) {
this.sgmlParser = sgmlParser;
}
/**
* @return the findDoctype
*/
@ReportableProperty(order = 25, value = "Parser setting: Run normalizer to construct DOCTYPE statement")
public boolean isShouldFindDoctype() {
return shouldFindDoctype;
}
/**
* @param findDoctype the findDoctype to set
*/
public void setShouldFindDoctype(boolean findDoctype) {
this.shouldFindDoctype = findDoctype;
}
/**
* @return the documentProperties
*/
@ReportableProperty(order = 26, value = "SGML document properties")
public SgmlDocumentProperties getDocumentProperties() {
return documentProperties;
}
/**
* @param documentProperties the documentProperties to set
*/
public void setDocumentProperties(SgmlDocumentProperties documentProperties) {
this.documentProperties = documentProperties;
}
/**
* @return the sgmlParserErrorMessages
*/
@ReportableProperty(order = 28, value = "SgmlParser Error Messages")
public List<Message> getSgmlParserErrorMessages() {
return SgmlParserErrorMessages;
}
/**
* @return the sgmlParserWarningMessages
*/
@ReportableProperty(order = 28, value = "SgmlParser Warning Messages")
public List<Message> getSgmlParserWarningMessages() {
return SgmlParserWarningMessages;
}
/**
* @param sgmlParserErrorMessages the sgmlParserErrorMessages to set
*/
public void setSgmlParserErrorMessages(List<Message> sgmlParserErrorMessages) {
SgmlParserErrorMessages = sgmlParserErrorMessages;
}
/**
* @param sgmlParserWarningMessages the sgmlParserWarningMessages to set
*/
public void setSgmlParserWarningMessages(List<Message> sgmlParserWarningMessages) {
SgmlParserWarningMessages = sgmlParserWarningMessages;
}
}