/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package fedora.server.validation;
import java.io.InputStream;
import java.net.URI;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.TimeZone;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.apache.axis.types.NCName;
import org.apache.log4j.Logger;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
import fedora.common.Constants;
import fedora.common.FaultException;
import fedora.common.PID;
import fedora.server.errors.ValidationException;
/**
* Validates the RDF/XML content of the RELS-EXT and RELS-INT datastreams.
* <p>
* The following restrictions are enforced:
* <ul>
* <li>The RDF must follow a prescribed RDF/XML authoring style where there is
* ONE (RELS-EXT) or more (RELS-INT) subjects encoded as RDF <Description>
* elements with RDF <code>about</code> attribute containing either a digital
* object URI (RELS-EXT) or a datastream URI (RELS-INT). The sub-elements are
* the relationship properties of the subject. Each relationship may refer to
* any resource (identified by URI) via an RDF 'resource' attribute, or a
* literal. Relationship assertions can be from the default Fedora relationship
* ontology, or from other namespaces.
* <ul>
* <li>RELS-EXT example:
*
* <pre><rdf:Description about="info:fedora/demo:5">
* <fedora:isMemberOfCollection resource="info:fedora/demo:100"/>
* <nsdl:isAugmentedBy resource="info:fedora/demo:333"/>
* <example:source resource="http://example.org/bsmith/article1.html"/>
* <example:primaryAuthor>Bob Smith</example:primaryAuthor>
* </rdf:Description>
* </pre>
* </li>
* <li>RELS-INT example:
*
* <pre><rdf:Description about="info:fedora/demo:5/DS1">
* <nsdl:isAugmentedBy resource="info:fedora/demo:333"/>
* <example:source resource="http://example.org/bsmith/article1.html"/>
* <example:primaryAuthor>Bob Smith</example:primaryAuthor>
* </rdf:Description>
* </pre>
* </li>
* </ul>
* </li>
* <li>For RELS-EXT, there must be only ONE <rdf:Description> element.</li>
* <li>There must be NO nesting of assertions. In terms of XML depth, the RDF
* root element is considered depth of 0. Then, the <rdf:Description>
* element must be at depth of 1, and the relationship properties must exist at
* depth of 2. That's it.</li>
* <li>For RELS-EXT, The RDF <code>about</code> attribute of the RDF
* <Description> must be the URI of the digital object in which the
* RELS-EXT datastream resides. This means that all relationships are FROM
* "this" object to other objects.</li>
* <li>For RELS-INT, the RDF <code>about</code> attribute(s) of the RDF
* <Description> element(s) must be valid URIs of datastreams for the
* digital object in which the RELS-EXT datastream resides. The datastreams do
* not actually have to exist, but these URIs must be syntactically valid. This
* means that all relationships are FROM datastreams in "this" object to other
* objects.</li>
* <li>If the target of the statement is a resource (identified by a URI), the
* RDF <code>resource</code> attribute must specify a syntactically valid,
* absolute URI.</li>
* <li>For RELS-EXT, there must NOT be any assertion of properties from the DC
* namespace.</li>
* <li>There must NOT be any assertions of properties from the Fedora object
* properties namespaces (model and view), with the following exceptions for
* RELS-EXT only:
*
* <pre>
* fedora-model:hasService
* fedora-model:hasModel
* fedora-model:isDeploymentOf
* fedora-model:isContractorOf
* These assertions are allowed in the RELS-EXT datastream, but all
* others from the <code>fedora-model</code> and <code>fedora-view</code>
* namespaces are inferred from values expressed elsewhere in the
* digital object, and we do not want duplication.
* </li>
* </ul>
*
* @author Sandy Payette
* @author Eddie Shin
* @author Chris Wilper
* @author Stephen Bayliss
*/
public class RelsValidator
extends DefaultHandler
implements Constants {
/** Logger for this class. */
private static final Logger LOG =
Logger.getLogger(RelsValidator.class.getName());
// state variables
private String m_doURI;
private String m_dsId;
private boolean m_rootRDFFound;
// total number of description elements encountered
private int m_descriptionCount;
// if currently within a description element
private boolean m_withinDescription;
private int m_depth;
private String m_literalType;
private StringBuffer m_literalValue;
// SAX parser
private final SAXParser m_parser;
private static final String RELS_EXT = "RELS-EXT";
private static final String RELS_INT = "RELS-INT";
public RelsValidator() {
try {
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
m_parser = spf.newSAXParser();
} catch (Exception wontHappen) {
throw new FaultException(wontHappen);
}
}
public void validate(PID pid, String dsId, InputStream content)
throws ValidationException {
try {
m_rootRDFFound = false;
m_descriptionCount = 0;
m_withinDescription = false;
m_depth = 0;
m_doURI = pid.toURI();
if (!dsId.equals(RELS_EXT) && !dsId.equals(RELS_INT)) {
throw new ValidationException("Relationships datastream ID must be RELS-EXT or RELS-INT ("
+ dsId + ")");
}
m_dsId = dsId;
m_parser.parse(content, this);
} catch (Exception e) {
throw new ValidationException(dsId + " validation failed: "
+ e.getMessage(), e);
}
}
@Override
public void startElement(String nsURI,
String localName,
String qName,
Attributes a) throws SAXException {
if (nsURI.equals(RDF.uri) && localName.equalsIgnoreCase("RDF")) {
m_rootRDFFound = true;
} else if (m_rootRDFFound) {
if (nsURI.equals(RDF.uri)
&& localName.equalsIgnoreCase("Description")) {
// are we not already within a Description element?
if (!m_withinDescription) {
m_withinDescription = true;
m_descriptionCount++;
m_depth++;
if ((m_descriptionCount > 1) && m_dsId.equals(RELS_EXT)) {
throw new SAXException("RelsExtValidator:"
+ " Only ONE RDF <Description> element is allowed"
+ " in the RELS-EXT datastream.");
}
checkDepth(m_depth, qName);
checkAboutURI(grab(a, RDF.uri, "about"));
} else {
throw new SAXException("RelsExtValidator:"
+ " RDF <Description> elements may not be nested"
+ " in the " + m_dsId + " datastream.");
}
} else if (m_withinDescription) {
m_depth++;
checkDepth(m_depth, qName);
checkBadAssertion(nsURI, localName, qName);
String resourceURI = grab(a, RDF.uri, "resource");
if (resourceURI.length() > 0) {
checkResourceURI(resourceURI, qName);
m_literalType = null;
m_literalValue = null;
} else {
if (nsURI.equals(MODEL.uri)) {
// if it's not a resource, the predicate cannot
// be fedora-model:hasService, hasModel,
// isContractor, or is DeploymentOf
if (localName.equals(MODEL.HAS_SERVICE.localName)
|| localName.equals(MODEL.HAS_MODEL.localName)
|| localName
.equals(MODEL.IS_CONTRACTOR_OF.localName)
|| localName
.equals(MODEL.IS_DEPLOYMENT_OF.localName)) {
throw new SAXException("RelsExtValidator: "
+ "Target of " + qName + " statement "
+ "MUST be an rdf:resource");
}
}
String datatypeURI = grab(a, RDF.uri, "datatype");
if (datatypeURI.length() == 0) {
m_literalType = null;
} else {
m_literalType = datatypeURI;
}
m_literalValue = new StringBuffer();
}
} else {
throw new SAXException("RelsExtValidator:"
+ " Invalid element " + localName
+ " found in the RELS-EXT datastream.\n"
+ " Relationship assertions must be built"
+ " upon an RDF <Description> element.");
}
} else {
throw new SAXException("RelsExtValidator:"
+ " The 'RDF' root element was not found "
+ " in the RELS-EXT datastream.\n"
+ " Relationship metadata must be encoded using RDF/XML.");
}
}
@Override
public void characters(char[] ch, int start, int length) {
if (m_literalValue != null) {
m_literalValue.append(ch, start, length);
}
}
@Override
public void endElement(String nsURI, String localName, String qName)
throws SAXException {
if (m_rootRDFFound) {
m_depth--;
}
if (nsURI.equals(RDF.uri) && localName.equalsIgnoreCase("Description")) {
m_withinDescription = false;
}
if (m_literalType != null && m_literalValue != null) {
checkTypedValue(m_literalType, m_literalValue.toString(), qName);
}
m_literalType = null;
m_literalValue = null;
}
private static String grab(Attributes a,
String namespace,
String elementName) {
String ret = a.getValue(namespace, elementName);
if (ret == null) {
ret = a.getValue(elementName);
}
// set null attribute value to empty string since it's
// generally helpful in the code to avoid null pointer exception
// when operations are performed on attributes values.
if (ret == null) {
ret = "";
}
return ret;
}
/**
* checkDepth: checks that there is NO nesting of relationship assertions.
* In terms of XML depth, the RDF root element is considered depth of 0.
* Then, the RDF <Description> must be at depth of 1, and the relationship
* properties must exist at depth of 2. That's it.
*
* @param depth
* the depth of the XML element being evaluated
* @param qName
* the name of the relationship property being evaluated
* @throws SAXException
*/
private void checkDepth(int depth, String qName) throws SAXException {
if (depth > 2) {
throw new SAXException("RelsExtValidator:"
+ " The RELS-EXT datastream has improper"
+ " nesting in its relationship assertions.\n"
+ " (The XML depth is " + depth
+ " which must not exceed a depth of 2.\n"
+ " The root <RDF> element should be level 0,"
+ " the <Description> element should be level 1,"
+ " and relationship elements should be level 2.)");
}
}
/**
* checkBadAssertion: checks that the DC and fedora-view namespace are not
* being used in RELS-EXT, and that if fedora-model is used, the localName
* is hasService, hasModel, isDeploymentOf, or isContractorOf. Also ensures
* that fedora-model:hasContentModel is only used once.
*
* @param nsURI
* the namespace URI of the predicate being evaluated
* @param localName
* the local name of the predicate being evaluated
* @param qName
* the qualified name of the predicate being evaluated
*/
private void checkBadAssertion(String nsURI, String localName, String qName)
throws SAXException {
if (m_dsId.equals(RELS_EXT)
&& (nsURI.equals(DC.uri) || nsURI.equals(OAI_DC.uri))) {
throw new SAXException("RelsExtValidator:"
+ " The RELS-EXT datastream has improper"
+ " relationship assertion: " + qName + ".\n"
+ " No Dublin Core assertions allowed"
+ " in Fedora relationship metadata.");
} else if (nsURI.equals(MODEL.uri)) {
if ((m_dsId.equals(RELS_INT)
&& !localName.equals(MODEL.DOWNLOAD_FILENAME.localName) )
||
(m_dsId.equals(RELS_EXT)
&& !localName.equals(MODEL.HAS_SERVICE.localName)
&& !localName.equals(MODEL.IS_CONTRACTOR_OF.localName)
&& !localName.equals(MODEL.HAS_MODEL.localName)
&& !localName.equals(MODEL.IS_DEPLOYMENT_OF.localName)
)) {
throw new SAXException("RelsExtValidator:"
+ " Disallowed predicate in " + m_dsId + ": "
+ qName
+ "\n"
+ " The only predicates from the fedora-model namespace"
+ " allowed in RELS-EXT are "
+ MODEL.HAS_SERVICE.localName + ", "
+ MODEL.IS_CONTRACTOR_OF.localName + ", "
+ MODEL.HAS_MODEL.localName + ", "
+ MODEL.IS_DEPLOYMENT_OF.localName + ". The only predicate allowed "
+ "in RELS-INT is "
+ MODEL.DOWNLOAD_FILENAME.localName +"."
);
}
} else if (nsURI.equals(VIEW.uri)) {
throw new SAXException("RelsExtValidator:"
+ " Disallowed predicate in RELS-EXT: " + qName + "\n"
+ " The fedora-view namespace is reserved by Fedora.");
}
}
/**
* checkAboutURI: ensure that the RDF <Description> is about the digital
* object that contains the RELS-EXT datastream, since the REL-EXT
* datastream is only supposed to capture relationships about "this" digital
* object.
*
* @param aboutURI
* the URI value of the RDF 'about' attribute
* @throws SAXException
*/
private void checkAboutURI(String aboutURI) throws SAXException {
if (m_dsId.equals(RELS_EXT)) {
if (!m_doURI.equals(aboutURI)) {
throw new SAXException("RelsExtValidator:"
+ " The RELS-EXT datastream refers to"
+ " an improper URI in the 'about' attribute of the"
+ " RDF <Description> element.\n"
+ " The URI must be that of the digital object"
+ " in which the RELS-EXT datastream resides" + " ("
+ m_doURI + ").");
}
} else if (m_dsId.equals(RELS_INT)) {
if (!aboutURI.startsWith(m_doURI + "/")) {
throw new SAXException("RelsExtValidator:"
+ " The RELS-INT datastream refers to"
+ " an improper URI in the 'about' attribute of the"
+ " RDF <Description> element.\n"
+ " The URI must be a datastream in the digital object"
+ " in which the RELS-INT datastream resides" + " ("
+ m_doURI + ", " + aboutURI + ").");
}
String dsId = aboutURI.replace(m_doURI + "/", "");
// datastream ID must be an XML NCName, implemented using axis NCName class
if (dsId.length() > ValidationConstants.DATASTREAM_ID_MAXLEN
|| dsId.length() < 1 || !NCName.isValid(dsId)) {
throw new SAXException("RelsExtValidator:"
+ " The RELS-INT datastream refers to"
+ " an improper URI in the 'about' attribute of the"
+ " RDF <Description> element.\n" + " " + dsId
+ " is not a valid datastream ID");
}
}
}
/**
* checkResourceURI: ensure that the target resource is a proper URI.
*
* @param resourceURI
* the URI value of the RDF 'resource' attribute
* @param relName
* the name of the relationship property being evaluated
* @throws SAXException
*/
private void checkResourceURI(String resourceURI, String relName)
throws SAXException {
URI uri;
try {
uri = new URI(resourceURI);
} catch (Exception e) {
throw new SAXException("RelsExtValidator:"
+ "Error in relationship '" + relName + "'."
+ " The RDF 'resource' is not a valid URI.");
}
if (!uri.isAbsolute()) {
throw new SAXException("RelsValidator:" + "Error in relationship '"
+ relName + "'."
+ " The specified RDF 'resource' is not an absolute URI.");
}
}
/**
* checkTypedValue: ensure that the datatype of a literal is one of the
* supported types and that it's a valid value for that type.
*
* @param datatypeURI
* the URI value of the RDF 'datatype' attribute
* @param value
* the value
* @param relName
* the name of the property being evaluated
* @throws SAXException
*/
private void checkTypedValue(String datatypeURI,
String value,
String relName) throws SAXException {
if (datatypeURI.equals(RDF_XSD.INT.uri)) {
try {
Integer.parseInt(value);
} catch (Exception e) {
throw new SAXException("RelsExtValidator:"
+ " The value specified for " + relName
+ " is not a valid 'int' value");
}
} else if (datatypeURI.equals(RDF_XSD.LONG.uri)) {
try {
Long.parseLong(value);
} catch (Exception e) {
throw new SAXException("RelsExtValidator:"
+ " The value specified for " + relName
+ " is not a valid 'long' value");
}
} else if (datatypeURI.equals(RDF_XSD.FLOAT.uri)) {
try {
Float.parseFloat(value);
} catch (Exception e) {
throw new SAXException("RelsExtValidator:"
+ " The value specified for " + relName
+ " is not a valid 'float' value");
}
} else if (datatypeURI.equals(RDF_XSD.DOUBLE.uri)) {
try {
Double.parseDouble(value);
} catch (Exception e) {
throw new SAXException("RelsExtValidator:"
+ " The value specified for " + relName
+ " is not a valid 'double' value");
}
} else if (datatypeURI.equals(RDF_XSD.DATE_TIME.uri)) {
if (!isValidDateTime(value)) {
throw new SAXException("RelsExtValidator:"
+ " The value specified for " + relName
+ " is not a valid 'dateTime' value.\n"
+ "The following dateTime formats are allowed:\n"
+ " yyyy-MM-ddTHH:mm:ss\n"
+ " yyyy-MM-ddTHH:mm:ss.S\n"
+ " yyyy-MM-ddTHH:mm:ss.SS\n"
+ " yyyy-MM-ddTHH:mm:ss.SSS\n"
+ " yyyy-MM-ddTHH:mm:ss.SSSZ");
}
} else {
throw new SAXException("RelsExtValidator:"
+ " Error in relationship '"
+ relName
+ "'.\n"
+ " The RELS-EXT datastream does not support the specified"
+ " datatype.\n"
+ "If specified, the RDF 'datatype' must be the URI of one of\n"
+ "the following W3C XML Schema data types: int, long, float,\n"
+ "double, or dateTime");
}
}
/**
* Tells whether the given string is a valid lexical representation of a
* dateTime value. Passing this test will ensure successful indexing later.
*/
private static boolean isValidDateTime(String lex) {
SimpleDateFormat format = new SimpleDateFormat();
format.setTimeZone(TimeZone.getTimeZone("UTC"));
int length = lex.length();
if (lex.startsWith("-")) {
length--;
}
if (lex.endsWith("Z")) {
if (length == 20) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss'Z'");
} else if (length == 22) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss.S'Z'");
} else if (length == 23) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss.SS'Z'");
} else if (length == 24) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
} else {
LOG.warn("Not a valid dateTime: " + lex);
return false;
}
} else {
if (length == 19) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss");
} else if (length == 21) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss.S");
} else if (length == 22) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss.SS");
} else if (length == 23) {
format.applyPattern("yyyy-MM-dd'T'HH:mm:ss.SSS");
} else {
LOG.warn("Not a valid dateTime: " + lex);
return false;
}
}
try {
format.parse(lex);
if (LOG.isTraceEnabled()) {
LOG.trace("Validated dateTime: " + lex);
}
return true;
} catch (ParseException e) {
LOG.warn("Not a valid dateTime: " + lex);
return false;
}
}
}