/* The contents of this file are subject to the license and copyright terms
* detailed in the license directory at the root of the source tree (also
* available online at http://fedora-commons.org/license/).
*/
package fedora.server.storage.translation;
import fedora.common.Constants;
import fedora.common.Models;
import static fedora.common.Models.CONTENT_MODEL_3_0;
import static fedora.common.Models.FEDORA_OBJECT_3_0;
import static fedora.common.Models.SERVICE_DEFINITION_3_0;
import static fedora.common.Models.SERVICE_DEPLOYMENT_3_0;
import fedora.common.rdf.RDFName;
import fedora.common.xml.namespace.QName;
import fedora.server.Server;
import fedora.server.config.ServerConfiguration;
import fedora.server.errors.ObjectIntegrityException;
import fedora.server.errors.StreamIOException;
import fedora.server.storage.types.AuditRecord;
import fedora.server.storage.types.Datastream;
import fedora.server.storage.types.DatastreamXMLMetadata;
import fedora.server.storage.types.DigitalObject;
import fedora.server.storage.types.Disseminator;
import fedora.server.utilities.DateUtility;
import fedora.server.utilities.StreamUtility;
import org.apache.log4j.Logger;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.Characters;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
/**
* Utility methods for usage by digital object serializers and deserializers.
* This class provides methods for detecting various forms of relative
* repository URLs, which are URLs that point to the hostname and port of the
* local repository. Methods will detect these kinds of URLS in datastream
* location fields and in special cases of inline XML. Methods are available to
* convert these URLS back and forth from relative URL syntax, to Fedora's
* internal local URL syntax, and to absolute URL sytnax. This utility class
* defines different "translation contexts" and the format of these relative
* URLs will be set appropriately to the context. Currently defined translation
* contexts are: 0=Deserialize XML into java object appropriate for in-memory
* usage 1=Serialize java object to XML appropriate for "public" export
* (absolute URLs) 2=Serialize java object to XML appropriate for move/migrate
* to another repository 3=Serialize java object to XML appropriate for internal
* storage</b> </p> The public "normalize*" methods in this class should be
* called to make the right decisions about what conversions should occur for
* what contexts. Other utility methods set default values for datastreams and
* disseminators.
*
* @author Sandy Payette
* @version $Id$
*/
@SuppressWarnings("deprecation")
public abstract class DOTranslationUtility
implements Constants {
/** Logger for this class. */
private static final Logger LOG =
Logger.getLogger(DOTranslationUtility.class.getName());
/**
* DESERIALIZE_INSTANCE: Deserialize XML into a java object appropriate for
* in-memory usage. This will make the value of relative repository URLs
* appropriate for instantiations of the digital object in memory. For
* External (E) and Redirected (R) datastreams, any URLs that are relative
* to the local repository are converted to absolute URLs using the
* currently configured hostname:port of the repository. To do this, the
* dsLocation is searched for instances the Fedora local URL string
* ("http://local.fedora.server") which is the way Fedora internally keeps
* track of instances of relative repository URLs. For Managed Content (M)
* datastreams, the internal identifiers are instantiated as is. Also,
* certain reserved inline XML datastreams (WSDL and SERVICE_PROFILE) are
* searched for relative repository URLs and they are made absolute.
*/
public static final int DESERIALIZE_INSTANCE = 0;
/**
* SERIALIZE_EXPORT_PUBLIC: Serialize digital object to XML appropriate for
* "public" external use. This is context is appropriate when the exporting
* repository will continue to exist and will continue to support callback
* URLs for datastream content and disseminations. This gives a "public"
* export of an object in which all relative repository URLs AND internal
* identifiers are converted to absolute callback URLs. For External (E) and
* Redirected (R) datastreams, any URLs that are relative to the local
* repository are converted to absolute URLs using the currently configured
* hostname:port of the repository. For Managed Content (M) datastreams, the
* internal identifiers in dsLocation are converted to default dissemination
* URLs so they can serve as callbacks to the repository to obtain the
* internally managed content. Also, selected inline XML datastreams (i.e.,
* WSDL and SERVICE_PROFILE) are searched for relative repository URLs and
* they are made absolute.
*/
public static final int SERIALIZE_EXPORT_PUBLIC = 1;
/**
* SERIALIZE_EXPORT_MIGRATE: Serialize digital object to XML in a manner
* appropriate for migrating or moving objects from one repository to
* another. This context is appropriate when the local repository will NOT
* be available after objects have been migrated to a new repository. For
* External (E) and Redirected (R)datastreams, any URLs that are relative to
* the local repository will be expressed with the Fedora local URL syntax
* (which consists of the string "local.fedora.server" standing in place of
* the actual "hostname:port"). This enables a new repository to ingest the
* serialization and maintain the relative nature of the URLs (they will
* become relative to the *new* repository. Also, for Managed Content (M)
* datastreams, the internal identifiers in dsLocation are converted to
* default dissemination URLs. This enables the new repository to callback
* to the old repository to obtain the content bytestream to be stored in
* the new repository. Also, within selected inline XML datastreams (i.e.,
* WSDL and SERVICE_PROFILE) any URLs that are relative to the local
* repository will also be expressed with the Fedora local URL syntax.
*/
public static final int SERIALIZE_EXPORT_MIGRATE = 2;
/**
* SERIALIZE_STORAGE_INTERNAL: Serialize java object to XML appropriate for
* persistent storage in the repository, ensuring that any URLs that are
* relative to the local repository are stored with the Fedora local URL
* syntax. The Fedora local URL syntax consists of the string
* "local.fedora.server" standing in place of the actual "hostname:port" on
* the URL). Managed Content (M) datastreams are stored with internal
* identifiers in dsLocation. Also, within selected inline XML datastreams
* (i.e., WSDL and SERVICE_PROFILE) any URLs that are relative to the local
* repository will also be stored with the Fedora local URL syntax. Note
* that a view of the storage serialization can be obtained via the
* getObjectXML method of API-M.
*/
public static final int SERIALIZE_STORAGE_INTERNAL = 3;
/**
* SERIALIZE_EXPORT_ARCHIVE: Serialize digital object to XML in a manner
* appropriate for creating a stand alone archive of objects from a
* repository that will NOT be available after objects have been exported.
* For External (E) and Redirected (R)datastreams, any URLs that are
* relative to the local repository will be expressed with the Fedora local
* URL syntax (which consists of the string "local.fedora.server" standing
* in place of the actual "hostname:port"). This enables a new repository to
* ingest the serialization and maintain the relative nature of the URLs
* (they will become relative to the *new* repository. Also, for Managed
* Content (M) datastreams, the internal identifiers in dsLocation are
* converted to default dissemination URLs, and the contents of the URL's
* are included inline via base-64 encoding. This enables the new repository
* recreate the content bytestream to be stored in the new repository, when
* the original repository is no longer available. Also, within selected
* inline XML datastreams (i.e., WSDL and SERVICE_PROFILE) any URLs that are
* relative to the local repository will also be expressed with the Fedora
* local URL syntax.
*/
public static final int SERIALIZE_EXPORT_ARCHIVE = 4;
/**
* Deserialize or Serialize as is. This context doesn't attempt to do any
* conversion of URLs.
*/
public static final int AS_IS = 5;
// Fedora URL LOCALIZATION Pattern:
// Pattern that is used as the internal replacement syntax for URLs that
// refer back to the local repository. This pattern virtualized the
// repository server address, so that if the host:port of the repository is
// changed, objects that have URLs that refer to the local repository won't break.
private static final Pattern s_fedoraLocalPattern =
Pattern.compile("http://local.fedora.server/");
// Fedora Application Context Localization pattern
// Specifically refers to the current fedora application context (host:port/context)
private static final Pattern s_fedoraLocalAppContextPattern =
Pattern.compile("http://local.fedora.server/fedora/");
// PATTERN FOR DEPRECATED METHOD (getItem of the Default Disseminator), for example:
public static Pattern s_getItemPattern =
Pattern.compile("/fedora-system:3/getItem\\?itemID=");
// ABSOLUTE REPOSITORY URL Patterns:
// Patterns of how the protocol and repository server address may be encoded
// in a URL that points back to the local repository.
private static Pattern s_concreteLocalUrl;
private static Pattern s_concreteLocalUrlAppContext;
private static Pattern s_concreteLocalUrlNoPort;
private static Pattern s_concreteLocalUrlAppContextNoPort;
// CALLBACK DISSEMINATION URL Pattern (for M datastreams in export files):
// Pattern of how protocol, repository server address, and path is encoded
// for a callback dissemination URL to the local repository.
// This is used for encoding datastream location URLs for Managed Content
// datastreams inside an export file. Internal Fedora identifiers for
// the Managed Content datastreams are replaced with public callback URLS.
private static String s_localDissemUrlStart; // "http://hostname:port/fedora/get/"
// The actual host and port of the Fedora repository server
private static String s_hostInfo = null;
// Host, port, and Fedora context
private static String s_hostContextInfo;
private static boolean m_serverOnPort80 = false;
private static boolean m_serverOnRedirectPort443 = false;
private static XMLInputFactory m_xmlInputFactory =
XMLInputFactory.newInstance();
// initialize static class with stuff that's used by all DO Serializerers
static {
// get host port from system properties (for testing without server instance)
String fedoraServerHost = System.getProperty("fedora.hostname");
String fedoraServerPort = System.getProperty("fedora.port");
String fedoraServerPortSSL = System.getProperty("fedoraRedirectPort");
String fedoraAppServerContext =
System.getProperty("fedora.appServerContext");
if (fedoraServerPort != null) {
if (fedoraServerPort.equals("80")) {
m_serverOnPort80 = true;
}
}
if (fedoraServerPortSSL != null) {
if (fedoraServerPortSSL.equals("443")) {
m_serverOnRedirectPort443 = true;
}
}
// otherwise, get host port from the server instance if they are null
if (fedoraServerHost == null || fedoraServerPort == null
|| fedoraAppServerContext == null) {
// if fedoraServerHost or fedoraServerPort system properties
// are not defined, read them from server configuration
ServerConfiguration config = Server.getConfig();
fedoraServerHost =
config.getParameter("fedoraServerHost").getValue();
fedoraServerPort =
config.getParameter("fedoraServerPort").getValue();
fedoraAppServerContext =
config.getParameter("fedoraAppServerContext").getValue();
fedoraServerPortSSL =
config.getParameter("fedoraRedirectPort").getValue();
if (fedoraServerPort.equals("80")) {
m_serverOnPort80 = true;
}
if (fedoraServerPortSSL.equals("443")) {
m_serverOnRedirectPort443 = true;
}
}
// set the currently configured host:port of the repository
s_hostInfo = "http://" + fedoraServerHost;
if (!fedoraServerPort.equals("80") && !fedoraServerPort.equals("443")) {
s_hostInfo = s_hostInfo + ":" + fedoraServerPort;
}
s_hostInfo = s_hostInfo + "/";
s_hostContextInfo = s_hostInfo + fedoraAppServerContext + "/";
// compile the pattern for public dissemination URLs at local server
s_localDissemUrlStart = s_hostInfo + fedoraAppServerContext + "/get/";
s_concreteLocalUrl =
Pattern.compile("https?://(localhost|" + fedoraServerHost
+ "):" + fedoraServerPort + "/");
s_concreteLocalUrlAppContext =
Pattern.compile("https?://(localhost|" + fedoraServerHost
+ "):" + fedoraServerPort + "/("
+ fedoraAppServerContext + "|fedora)/");
s_concreteLocalUrlNoPort =
Pattern.compile("https?://(localhost|" + fedoraServerHost
+ ")/");
s_concreteLocalUrlAppContextNoPort =
Pattern.compile("https?://(localhost|" + fedoraServerHost
+ ")/(" + fedoraAppServerContext + "|fedora)/");
}
/**
* Make URLs that are relative to the local Fedora repository ABSOLUTE URLs.
* First, see if any URLs are expressed in relative URL syntax (beginning
* with "fedora/get" or "fedora/search") and convert these to the special
* Fedora local URL syntax ("http://local.fedora.server/..."). Then look for
* all URLs that contain the special Fedora local URL syntax and replace
* instances of this string with the actual host:port configured for the
* repository. This ensures that all forms of relative repository URLs are
* converted to proper absolute URLs that reference the hostname:port of the
* local Fedora repository. Examples:
* "http://local.fedora.server/fedora/get/demo:1/DS1" is converted to
* "http://myrepo.com:8080/fedora/get/demo:1/DS1" "fedora/get/demo:1/DS1" is
* converted to "http://myrepo.com:8080/fedora/get/demo:1/DS1"
* "http://local.fedora.server/fedora/get/demo:1/sdef:1/getFoo?in="
* http://local.fedora.server/fedora/get/demo:2/DC" is converted to
* "http://myrepo.com:8080/fedora/get/demo:1/sdef:1/getFoo?in="
* http://myrepo.com:8080/fedora/get/demo:2/DC"
*
* @param xmlContent
* @return String with all relative repository URLs and Fedora local URLs
* converted to absolute URL syntax.
*/
public static String makeAbsoluteURLs(String input) {
String output = input;
// First pass: convert fedora app context URLs via variable substitution
output =
s_fedoraLocalAppContextPattern.matcher(output)
.replaceAll(s_hostContextInfo);
// Second pass: convert non-fedora-app-context URLs via variable substitution
output = s_fedoraLocalPattern.matcher(output).replaceAll(s_hostInfo);
LOG.debug("makeAbsoluteURLs: input=" + input + ", output=" + output);
return output;
}
/**
* Detect all forms of URLs that point to the local Fedora repository and
* make sure they are encoded in the special Fedora local URL syntax
* (http://local.fedora.server/..."). First, look for relative URLs that
* begin with "fedora/get" or "fedora/search" replaces instances of these
* string patterns with the special Fedora relative URL syntax. Then, look
* for absolute URLs that have a host:port equal to the host:port currently
* configured for the Fedora repository and replace host:port with the
* special string. The special Fedora relative URL string provides a
* consistent unique string be easily searched for and either converted back
* to an absolute URL or a relative URL to the repository. Examples:
* "http://myrepo.com:8080/fedora/get/demo:1/DS1" is converted to
* "http://local.fedora.server/fedora/get/demo:1/DS1"
* "https://myrepo.com:8443/fedora/get/demo:1/sdef:1/getFoo?in="
* http://myrepo.com:8080/fedora/get/demo:2/DC" is converted to
* "http://local.fedora.server/fedora/get/demo:1/sdef:1/getFoo?in="
* http://local.fedora.server/fedora/get/demo:2/DC"
* "http://myrepo.com:8080/saxon..." (internal service in sDep WSDL) is
* converted to "http://local.fedora.server/saxon..."
*
* @param input
* @return String with all forms of relative repository URLs converted to
* the Fedora local URL syntax.
*/
public static String makeFedoraLocalURLs(String input) {
String output = input;
// Detect any absolute URLs that refer to the local repository
// and convert them to the Fedora LOCALIZATION URL syntax
// (i.e., "http://local.fedora.server/...")\
if (m_serverOnPort80 || m_serverOnRedirectPort443) {
output = s_concreteLocalUrlAppContextNoPort.matcher(output)
.replaceAll(s_fedoraLocalAppContextPattern.pattern());
output = s_concreteLocalUrlNoPort.matcher(output)
.replaceAll(s_fedoraLocalPattern.pattern());
} else {
output = s_concreteLocalUrlAppContext.matcher(output)
.replaceAll(s_fedoraLocalAppContextPattern.pattern());
output = s_concreteLocalUrl.matcher(output)
.replaceAll(s_fedoraLocalPattern.pattern());
}
LOG.debug("makeFedoraLocalURLs: input=" + input + ", output=" + output);
return output;
}
/**
* Utility method to detect instances of of dsLocation URLs that use a
* deprecated default disseminator method
* (/fedora/get/{PID}/fedora-system:3/getItem?itemID={DSID} and replace it
* with the new API-A-LITE syntax for getting a datastream
* (/fedora/get/{PID}/{DSID}
*
* @param input
* @return
*/
private static String convertGetItemURLs(String input) {
String output = input;
// Detect the old default disseminator syntax for getting datastreams
// (i.e., getItem), and replace with new API-A-LITE syntax.
output = s_getItemPattern.matcher(input).replaceAll("/");
LOG.debug("convertGetItemURLs: input=" + input + ", output=" + output);
return output;
}
/*
* Utility method to normalize the value of datastream location depending on
* the translation context. This is mainly to deal with External (E) and
* Redirected (R) datastream locations that are self-referential to the
* local repository (i.e., relative repository URLs) and with Managed
* Content (M) datastreams whose location is an internal identifier. @param
* PID The PID of the object that contains the datastream @param ds The
* datastream whose location is to be processed @param transContext Integer
* value indicating the serialization or deserialization context. Valid
* values are defined as constants in
* fedora.server.storage.translation.DOTranslationUtility:
* 0=DOTranslationUtility.DESERIALIZE_INSTANCE
* 1=DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC
* 2=DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE
* 3=DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL
* 2=DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE @return
*/
public static Datastream normalizeDSLocationURLs(String PID,
Datastream origDS,
int transContext) {
Datastream ds = origDS.copy();
if (transContext == AS_IS) {
return ds;
}
if (transContext == DOTranslationUtility.DESERIALIZE_INSTANCE) {
if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
// MAKE ABSOLUTE REPO URLs
ds.DSLocation = makeAbsoluteURLs(ds.DSLocation);
}
} else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC) {
if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
// MAKE ABSOLUTE REPO URLs
ds.DSLocation = makeAbsoluteURLs(ds.DSLocation);
} else if (ds.DSControlGrp.equals("M")) {
//if (!ds.DSLocation.startsWith("http://localhost:8080/fedora-demo")) {
// MAKE DISSEMINATION URLs
if (ds.DSCreateDT == null) {
ds.DSLocation =
s_localDissemUrlStart + PID + "/" + ds.DatastreamID;
} else {
ds.DSLocation =
s_localDissemUrlStart
+ PID
+ "/"
+ ds.DatastreamID
+ "/"
+ DateUtility
.convertDateToString(ds.DSCreateDT);
}
//}
}
} else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE) {
if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
// MAKE FEDORA LOCAL REPO URLs
ds.DSLocation = makeFedoraLocalURLs(ds.DSLocation);
} else if (ds.DSControlGrp.equals("M")) {
// MAKE DISSEMINATION URLs
if (ds.DSCreateDT == null) {
ds.DSLocation =
s_localDissemUrlStart + PID + "/" + ds.DatastreamID;
} else {
ds.DSLocation =
s_localDissemUrlStart
+ PID
+ "/"
+ ds.DatastreamID
+ "/"
+ DateUtility
.convertDateToString(ds.DSCreateDT);
}
}
} else if (transContext == DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL) {
//String relativeLoc=ds.DSLocation;
if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
// MAKE FEDORA LOCAL REPO URLs
ds.DSLocation = makeFedoraLocalURLs(ds.DSLocation);
} else if (ds.DSControlGrp.equals("M")) {
// MAKE INTERNAL IDENTIFIERS (PID+DSID+DSVersionID)
ds.DSLocation =
PID + "+" + ds.DatastreamID + "+" + ds.DSVersionID;
}
} else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE) {
if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
// MAKE FEDORA LOCAL REPO URLs
ds.DSLocation = makeFedoraLocalURLs(ds.DSLocation);
} else if (ds.DSControlGrp.equals("M")) {
// MAKE DISSEMINATION URLs
if (ds.DSCreateDT == null) {
ds.DSLocation =
s_localDissemUrlStart + PID + "/" + ds.DatastreamID;
} else {
ds.DSLocation =
s_localDissemUrlStart
+ PID
+ "/"
+ ds.DatastreamID
+ "/"
+ DateUtility
.convertDateToString(ds.DSCreateDT);
}
}
}
// In any event, look for the deprecated getItem method of the default disseminator
// (i.e., "/fedora-system:3/getItem?itemID=") and convert to new API-A-LITE syntax.
if (ds.DSControlGrp.equals("E") || ds.DSControlGrp.equals("R")) {
ds.DSLocation = convertGetItemURLs(ds.DSLocation);
}
return ds;
}
/**
* Utility method to normalize a chunk of inline XML depending on the
* translation context. This is mainly to deal with certain inline XML
* datastreams found in Service Deployment objects that may contain a
* service URL that references the host:port of the local Fedora server.
* This method will usually only ever be called to check WSDL and
* SERVICE_PROFILE inline XML datastream, but is of general utility for
* dealing with any datastreams that may contain URLs that reference the
* local Fedora server. However, it this method should be used sparingly,
* and only on inline XML datastreams where the impact of the conversions is
* well understood.
*
* @param xml
* a chunk of XML that's contents of an inline XML datastream
* @param transContext
* Integer value indicating the serialization or deserialization
* context. Valid values are defined as constants in
* fedora.server.storage.translation.DOTranslationUtility:
* 0=DOTranslationUtility.DESERIALIZE_INSTANCE
* 1=DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC
* 2=DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE
* 3=DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL
* 4=DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE
* @return the inline XML contents with appropriate conversions.
*/
public static String normalizeInlineXML(String xml, int transContext) {
if (transContext == AS_IS) {
return xml;
}
if (transContext == DOTranslationUtility.DESERIALIZE_INSTANCE) {
// MAKE ABSOLUTE REPO URLs
return makeAbsoluteURLs(xml);
} else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_PUBLIC) {
// MAKE ABSOLUTE REPO URLs
return makeAbsoluteURLs(xml);
} else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_MIGRATE) {
// MAKE FEDORA LOCAL REPO URLs
return makeFedoraLocalURLs(xml);
} else if (transContext == DOTranslationUtility.SERIALIZE_STORAGE_INTERNAL) {
// MAKE FEDORA LOCAL REPO URLs
return makeFedoraLocalURLs(xml);
} else if (transContext == DOTranslationUtility.SERIALIZE_EXPORT_ARCHIVE) {
// MAKE FEDORA LOCAL REPO URLs
return makeFedoraLocalURLs(xml);
}
return xml;
}
/**
* Check for null values in attributes and set them to empty string so
* 'null' does not appear in XML attribute values. This helps in XML
* validation of required attributes. If 'null' is the attribute value then
* validation would incorrectly consider in a valid non-empty value. Also,
* we set some other default values here.
*
* @param ds
* The Datastream object to work on.
* @return The Datastream value with default set.
* @throws ObjectIntegrityException
*/
public static Datastream setDatastreamDefaults(Datastream ds)
throws ObjectIntegrityException {
if ((ds.DSMIME == null || ds.DSMIME.equals(""))
&& ds.DSControlGrp.equalsIgnoreCase("X")) {
ds.DSMIME = "text/xml";
}
if (ds.DSState == null || ds.DSState.equals("")) {
ds.DSState = "A";
}
// For METS backward compatibility
if (ds.DSInfoType == null || ds.DSInfoType.equals("")
|| ds.DSInfoType.equalsIgnoreCase("OTHER")) {
ds.DSInfoType = "UNSPECIFIED";
}
// LOOK! For METS backward compatibility:
// If we have a METS MDClass value, and DSFormatURI isn't already
// assigned, preserve MDClass and MDType in a DSFormatURI.
// Note that the system is taking over the DSFormatURI in this case.
// Therefore, if a client subsequently modifies the DSFormatURI
// this METS legacy informatin will be lost, in which case the inline
// datastream will default to amdSec/techMD in a subsequent METS export.
if (ds.DSControlGrp.equalsIgnoreCase("X")) {
if (((DatastreamXMLMetadata) ds).DSMDClass != 0
&& ds.DSFormatURI == null) {
String mdClassName = "";
String mdType = ds.DSInfoType;
String otherType = "";
if (((DatastreamXMLMetadata) ds).DSMDClass == 1) {
mdClassName = "techMD";
} else if (((DatastreamXMLMetadata) ds).DSMDClass == 2) {
mdClassName = "sourceMD";
} else if (((DatastreamXMLMetadata) ds).DSMDClass == 3) {
mdClassName = "rightsMD";
} else if (((DatastreamXMLMetadata) ds).DSMDClass == 4) {
mdClassName = "digiprovMD";
} else if (((DatastreamXMLMetadata) ds).DSMDClass == 5) {
mdClassName = "descMD";
}
if (!mdType.equals("MARC") && !mdType.equals("EAD")
&& !mdType.equals("DC") && !mdType.equals("NISOIMG")
&& !mdType.equals("LC-AV") && !mdType.equals("VRA")
&& !mdType.equals("TEIHDR") && !mdType.equals("DDI")
&& !mdType.equals("FGDC")) {
mdType = "OTHER";
otherType = ds.DSInfoType;
}
ds.DSFormatURI =
"info:fedora/fedora-system:format/xml.mets."
+ mdClassName + "." + mdType + "." + otherType;
}
}
return ds;
}
/**
* Appends XML to a PrintWriter. Essentially, just appends all text content
* of the inputStream, trimming any leading and trailing whitespace. It does
* his in a streaming fashion, with resource consumption entirely comprised
* of fixed internal buffers.
*
* @param in
* InputStreaming containing serialized XML.
* @param writer
* PrintWriter to write XML content to.
* @param encoding
* Character set encoding.
*/
protected static void appendXMLStream(InputStream in,
PrintWriter writer,
String encoding)
throws ObjectIntegrityException, UnsupportedEncodingException,
StreamIOException {
if (in == null) {
throw new ObjectIntegrityException("Object's inline xml "
+ "stream cannot be null.");
}
try {
InputStreamReader chars =
new InputStreamReader(in, Charset.forName(encoding));
/* Content buffer */
char[] charBuf = new char[4096];
/* Beginning/ending whitespace buffer */
char[] wsBuf = new char[4096];
int len;
int start;
int end;
int wsLen = 0;
boolean atBeginning = true;
while ((len = chars.read(charBuf)) != -1) {
start = 0;
end = len - 1;
/* Strip out any leading whitespace */
if (atBeginning) {
while (start < len) {
if (charBuf[start] > 0x20) break;
start++;
}
if (start < len) atBeginning = false;
}
/*
* Hold aside any whitespace at the end of the current chunk. If
* we make it to the next chunk, then append our whitespace to
* the buffer. Using this methodology, we may "trim" at most
* {buffer length} characters from the end.
*/
if (wsLen > 0) {
/* Commit previous ending whitespace */
writer.write(wsBuf, 0, wsLen);
wsLen = 0;
}
while (end > start) {
/* Buffer current ending whitespace */
if (charBuf[end] > 0x20) break;
wsBuf[wsLen] = charBuf[end];
wsLen++;
end--;
}
if (start < len) {
writer.write(charBuf, start, end + 1 - start);
}
}
} catch (UnsupportedEncodingException uee) {
throw uee;
} catch (IOException ioe) {
throw new StreamIOException("Error reading from inline xml datastream.");
} finally {
try {
in.close();
} catch (IOException closeProb) {
throw new StreamIOException("Error closing read stream.");
}
}
}
/*
* Certain serviceDeployment datastreams require special processing to
* fix/complete URLs and do variable substitution (such as replacing
* 'local.fedora.server' with fedora's baseURL)
*/
public static void normalizeDatastreams(DigitalObject obj,
int transContext,
String characterEncoding)
throws UnsupportedEncodingException {
if (transContext == AS_IS) {
return;
}
if (obj.hasContentModel( Models.SERVICE_DEPLOYMENT_3_0)) {
Iterator<String> datastreams = obj.datastreamIdIterator();
while (datastreams.hasNext()) {
String dsid = datastreams.next();
if (dsid.equals("WSDL") || dsid.equals("SERVICE-PROFILE")) {
for (Datastream d : obj.datastreams(dsid)) {
if (!(d instanceof DatastreamXMLMetadata)) {
LOG
.warn(obj.getPid()
+ " : Refusing to normalize URLs in datastream "
+ dsid
+ " because it is not inline XML");
continue;
}
DatastreamXMLMetadata xd = (DatastreamXMLMetadata) d;
LOG.debug(obj.getPid() + " : normalising URLs in "
+ dsid);
xd.xmlContent =
DOTranslationUtility
.normalizeInlineXML(new String(xd.xmlContent,
"UTF-8"),
transContext)
.getBytes(characterEncoding);
}
}
}
}
}
@Deprecated
public static Disseminator setDisseminatorDefaults(Disseminator diss)
throws ObjectIntegrityException {
// Until future when we implement selective versioning,
// set default to true.
diss.dissVersionable = true;
if (diss.dissState == null || diss.dissState.equals("")) {
diss.dissState = "A";
}
return diss;
}
protected static String oneString(String[] idList) {
StringBuffer out = new StringBuffer();
for (int i = 0; i < idList.length; i++) {
if (i > 0) {
out.append(' ');
}
out.append(idList[i]);
}
return out.toString();
}
/** Reads the state attribute from a DigitalObject.
* <p>
* Null or empty strings are interpteted as "Active".
* </p>
* @param obj Object that potentially contains object state data.
* @return String containing full state value (Active, Inactive, or Deleted)
* @throws ObjectIntegrityException thrown when the state cannot be parsed.
*/
public static String getStateAttribute(DigitalObject obj) throws ObjectIntegrityException {
if (obj.getState() == null || obj.getState().equals("")) {
return MODEL.ACTIVE.localName;
} else {
switch (obj.getState().charAt(0)) {
case 'D':
return MODEL.DELETED.localName;
case 'I':
return MODEL.INACTIVE.localName;
case 'A':
return MODEL.ACTIVE.localName;
default:
throw new ObjectIntegrityException("Could not determine "
+ "state attribute from '"
+ obj.getState() + "'");
}
}
}
/** Parse and read the object state value from raw text.
* <p>
* Reads a text representation of object state, and returns a "state code"
* abbreviation corresponding to that state. Null or empty values are interpreted
* as "Active".
* </p>
*
* XXX: It might clearer to nix state codes altogether and just use the full value
*
* @param rawValue Raw string to parse. May be null
* @return String containing the state code (A, D, or I)
* @throws ParseException thrown when state value cannot be determined
*/
public static String readStateAttribute(String rawValue) throws ParseException {
if (MODEL.DELETED.looselyMatches(rawValue, true)) {
return "D";
} else if (MODEL.INACTIVE.looselyMatches(rawValue, true)) {
return "I";
} else if (MODEL.ACTIVE.looselyMatches(rawValue, true)
|| rawValue == null
|| rawValue.equals("")) {
return "A";
} else {
throw new ParseException("Could not interpret state value of '"
+ rawValue + "'", 0);
}
}
public static RDFName getTypeAttribute(DigitalObject obj)
throws ObjectIntegrityException {
if (obj.hasContentModel(SERVICE_DEFINITION_3_0)) {
return MODEL.BDEF_OBJECT;
}
if (obj.hasContentModel(SERVICE_DEPLOYMENT_3_0)) {
return MODEL.BMECH_OBJECT;
}
if (obj.hasContentModel( CONTENT_MODEL_3_0)) {
// FOXML 1.0 doesn't support this type; down-convert
return MODEL.DATA_OBJECT;
}
if (obj.hasContentModel( FEDORA_OBJECT_3_0)) {
return MODEL.DATA_OBJECT;
}
return null;
}
/**
* The audit record is created by the system, so programmatic validation
* here is o.k. Normally, validation takes place via XML Schema and
* Schematron.
*
* @param audit
* @throws ObjectIntegrityException
*/
protected static void validateAudit(AuditRecord audit)
throws ObjectIntegrityException {
if (audit.id == null || audit.id.equals("")) {
throw new ObjectIntegrityException("Audit record must have id.");
}
if (audit.date == null || audit.date.equals("")) {
throw new ObjectIntegrityException("Audit record must have date.");
}
if (audit.processType == null || audit.processType.equals("")) {
throw new ObjectIntegrityException("Audit record must have processType.");
}
if (audit.action == null || audit.action.equals("")) {
throw new ObjectIntegrityException("Audit record must have action.");
}
if (audit.componentID == null) {
audit.componentID = ""; // for backwards compatibility, no error on null
// throw new ObjectIntegrityException("Audit record must have componentID.");
}
if (audit.responsibility == null || audit.responsibility.equals("")) {
throw new ObjectIntegrityException("Audit record must have responsibility.");
}
}
protected static String getAuditTrail(DigitalObject obj)
throws ObjectIntegrityException {
StringWriter buf = new StringWriter();
appendAuditTrail(obj, new PrintWriter(buf));
return buf.toString();
}
protected static void appendAuditTrail(DigitalObject obj, PrintWriter writer)
throws ObjectIntegrityException {
appendOpenElement(writer, AUDIT.AUDIT_TRAIL, true);
for (AuditRecord audit : obj.getAuditRecords()) {
DOTranslationUtility.validateAudit(audit);
appendOpenElement(writer, AUDIT.RECORD, AUDIT.ID, audit.id);
appendFullElement(writer,
AUDIT.PROCESS,
AUDIT.TYPE,
audit.processType);
appendFullElement(writer, AUDIT.ACTION, audit.action);
appendFullElement(writer, AUDIT.COMPONENT_ID, audit.componentID);
appendFullElement(writer,
AUDIT.RESPONSIBILITY,
audit.responsibility);
appendFullElement(writer, AUDIT.DATE, DateUtility
.convertDateToString(audit.date));
appendFullElement(writer, AUDIT.JUSTIFICATION, audit.justification);
appendCloseElement(writer, AUDIT.RECORD);
}
appendCloseElement(writer, AUDIT.AUDIT_TRAIL);
}
protected static List<AuditRecord> getAuditRecords(XMLEventReader reader)
throws XMLStreamException {
List<AuditRecord> records = new ArrayList<AuditRecord>();
String inElement = null;
while (reader.hasNext()) {
XMLEvent event = reader.nextEvent();
if (event.isStartElement()) {
StartElement element = (StartElement) event;
inElement = element.getName().getLocalPart();
if (inElement.equals(AUDIT.RECORD.localName)) {
AuditRecord record = new AuditRecord();
java.util.Iterator<?> it = element.getAttributes();
while (it.hasNext()) {
Attribute attr = (Attribute) it.next();
if (attr.getName().getLocalPart()
.equals(AUDIT.ID.localName)) {
record.id = attr.getValue();
}
}
records.add(record);
} else if (inElement.equals(AUDIT.PROCESS.localName)) {
java.util.Iterator<?> it = element.getAttributes();
while (it.hasNext()) {
Attribute attr = (Attribute) it.next();
if (attr.getName().getLocalPart()
.equals(AUDIT.TYPE.localName)) {
records.get(records.size() - 1).processType =
attr.getValue();
}
}
}
}
if (event.isEndElement()) {
inElement = "";
}
if (event.isCharacters()) {
Characters characters = (Characters) event;
if (!records.isEmpty()) {
AuditRecord record = records.get(records.size() - 1);
if (inElement.equals(AUDIT.ACTION.localName)) {
record.action = characters.getData();
} else if (inElement.equals(AUDIT.COMPONENT_ID.localName)) {
record.componentID = characters.getData();
} else if (inElement.equals(AUDIT.DATE.localName)) {
record.date =
DateUtility.convertStringToDate(characters
.getData());
} else if (inElement.equals(AUDIT.JUSTIFICATION.localName)) {
record.justification = characters.getData();
} else if (inElement.equals(AUDIT.RESPONSIBILITY.localName)) {
record.responsibility = characters.getData();
}
}
}
}
return records;
}
/**
* Parse an audit:auditTrail and return a list of AuditRecords.
*
* @since 3.0
* @param auditTrail
* @return
* @throws XMLStreamException
*/
protected static List<AuditRecord> getAuditRecords(InputStream auditTrail)
throws XMLStreamException {
XMLEventReader eventReader;
synchronized (m_xmlInputFactory) {
eventReader = m_xmlInputFactory.createXMLEventReader(auditTrail);
}
List<AuditRecord> records = getAuditRecords(eventReader);
eventReader.close();
return records;
}
protected static List<AuditRecord> getAuditRecords(Reader auditTrail)
throws XMLStreamException {
XMLEventReader eventReader;
synchronized (m_xmlInputFactory) {
eventReader = m_xmlInputFactory.createXMLEventReader(auditTrail);
}
List<AuditRecord> records = getAuditRecords(eventReader);
eventReader.close();
return records;
}
private static void appendOpenElement(PrintWriter writer,
QName element,
boolean declareNamespace) {
writer.print("<");
writer.print(element.qName);
if (declareNamespace) {
writer.print(" xmlns:");
writer.print(element.namespace.prefix);
writer.print("=\"");
writer.print(element.namespace.uri);
writer.print("\"");
}
writer.print(">\n");
}
private static void appendOpenElement(PrintWriter writer,
QName element,
QName attribute,
String attributeContent) {
writer.print("<");
writer.print(element.qName);
writer.print(" ");
writer.print(attribute.localName);
writer.print("=\"");
writer.print(StreamUtility.enc(attributeContent));
writer.print("\">\n");
}
private static void appendCloseElement(PrintWriter writer, QName element) {
writer.print("</");
writer.print(element.qName);
writer.print(">\n");
}
private static void appendFullElement(PrintWriter writer,
QName element,
QName attribute,
String attributeContent) {
writer.print("<");
writer.print(element.qName);
writer.print(" ");
writer.print(attribute.localName);
writer.print("=\"");
writer.print(StreamUtility.enc(attributeContent));
writer.print("\"/>\n");
}
private static void appendFullElement(PrintWriter writer,
QName element,
String elementContent) {
writer.print("<");
writer.print(element.qName);
writer.print(">");
writer.print(StreamUtility.enc(elementContent));
writer.print("</");
writer.print(element.qName);
writer.print(">\n");
}
}