/*
* OAIDCCrosswalk.java
*
* Version: $Revision: 4386 $
*
* Date: $Date: 2009-10-06 20:00:23 +0000 (Tue, 06 Oct 2009) $
*
* Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts
* Institute of Technology. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are
* met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* - Neither the name of the Hewlett-Packard Company nor the name of the
* Massachusetts Institute of Technology nor the names of their
* contributors may be used to endorse or promote products derived from
* this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR
* TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE
* USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH
* DAMAGE.
*/
package org.dspace.app.oai;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.sql.SQLException;
import org.dspace.app.util.MetadataExposure;
import org.dspace.content.DCValue;
import org.dspace.content.Item;
import org.dspace.content.crosswalk.IConverter;
import org.dspace.search.HarvestedItemInfo;
import org.dspace.core.ConfigurationManager;
import org.dspace.core.PluginManager;
import org.dspace.core.LogManager;
import org.apache.log4j.Logger;
import ORG.oclc.oai.server.crosswalk.Crosswalk;
import ORG.oclc.oai.server.verb.CannotDisseminateFormatException;
/**
* OAI_DC Crosswalk implementation based on oaidc.properties file. All metadata
* included in the oaidc.properties file will be mapped on a valid oai_dc
* element, invalid oai_dc element will be not used. It is possible specify for
* any metadata a converter {@link org.dspace.content.crosswalk.IConverter}
* to manipulate the metadata value before that it will be dissemite in OAI_DC.
*
* @author Robert Tansley
* @author Andrea Bollini
* @version $Revision: 4386 $
*/
public class OAIDCCrosswalk extends Crosswalk
{
// Pattern containing all the characters we want to filter out / replace
// converting a String to xml
private static final Pattern invalidXmlPattern = Pattern
.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])");
// Patter to extract the converter name if any
private static final Pattern converterPattern = Pattern.compile(".*\\((.*)\\)");
private static final String[] oaidcElement = new String[] { "title",
"creator", "subject", "description", "publisher", "contributor",
"date", "type", "format", "identifier", "source", "language",
"relation", "coverage", "rights" };
/** Location of config file */
private static final String configFilePath = ConfigurationManager
.getProperty("dspace.dir")
+ File.separator
+ "config"
+ File.separator
+ "crosswalks"
+ File.separator + "oaidc.properties";
/** log4j logger */
private static Logger log = Logger.getLogger(OAIDCCrosswalk.class);
private static final Map<String, Set<String>> config = new HashMap<String, Set<String>>();
static
{
// Read in configuration
Properties crosswalkProps = new Properties();
FileInputStream fis = null;
try
{
fis = new FileInputStream(configFilePath);
crosswalkProps.load(fis);
}
catch (IOException e)
{
throw new IllegalArgumentException(
"Wrong configuration for OAI_DC", e);
}
finally
{
if (fis != null)
{
try
{
fis.close();
}
catch (IOException ioe)
{
log.error(ioe);
}
}
}
Set<Object> keySet = crosswalkProps.keySet();
if (keySet != null)
{
for (Object key : keySet)
{
String oaielement = crosswalkProps.getProperty((String) key);
if (oaielement != null && !oaielement.trim().equals(""))
{
Set<String> tmp = config.get(oaielement);
if (tmp == null)
{
tmp = new HashSet<String>();
}
tmp.add((String) key);
config.put(oaielement, tmp);
}
}
}
else
{
throw new IllegalArgumentException(
"Configurazione errata per l'uscita OAI_DC");
}
}
public OAIDCCrosswalk(Properties properties)
{
super("http://www.openarchives.org/OAI/2.0/oai_dc/ "
+ "http://www.openarchives.org/OAI/2.0/oai_dc.xsd");
}
public boolean isAvailableFor(Object nativeItem)
{
// We have DC for everything
return true;
}
public String createMetadata(Object nativeItem)
throws CannotDisseminateFormatException
{
Item item = ((HarvestedItemInfo) nativeItem).item;
StringBuffer metadata = new StringBuffer();
metadata
.append(
"<oai_dc:dc xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ")
.append("xmlns:dc=\"http://purl.org/dc/elements/1.1/\" ")
.append(
"xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ")
.append(
"xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd\">");
for (String element : oaidcElement)
{
Set<String> itemMetadata = config.get(element);
if (itemMetadata != null && itemMetadata.size() > 0)
{
for (String mdString : itemMetadata)
{
String converterName = null;
IConverter converter = null;
Matcher converterMatcher = converterPattern.matcher(mdString);
if (converterMatcher.matches())
{
converterName = converterMatcher.group(1);
converter = (IConverter) PluginManager.getNamedPlugin(
IConverter.class, converterName);
if (converter == null)
{
log.warn(LogManager.getHeader(null,
"createMetadata",
"no converter plugin found with name "
+ converterName + " for metadata "
+ mdString));
}
}
DCValue[] dcValues;
if (converterName != null)
{
dcValues = item.getMetadata(mdString.replaceAll("\\("
+ converterName + "\\)", ""));
}
else
{
dcValues = item.getMetadata(mdString);
}
try
{
for (DCValue dcValue : dcValues)
{
if (!MetadataExposure.isHidden(((HarvestedItemInfo) nativeItem).context,
dcValue.schema, dcValue.element, dcValue.qualifier))
{
String value;
if (converter != null)
{
value = converter.makeConversion(dcValue.value);
}
else
{
value = dcValue.value;
}
// Also replace all invalid characters with ' '
if (value != null)
{
StringBuffer valueBuf = new StringBuffer(value
.length());
Matcher xmlMatcher = invalidXmlPattern
.matcher(value.trim());
while (xmlMatcher.find())
{
String group = xmlMatcher.group();
// group will either contain a character that we
// need to encode for xml
// (ie. <, > or &), or it will be an invalid
// character
// test the contents and replace appropriately
if (group.equals("&"))
xmlMatcher.appendReplacement(valueBuf,
"&");
else if (group.equals("<"))
xmlMatcher.appendReplacement(valueBuf,
"<");
else if (group.equals(">"))
xmlMatcher.appendReplacement(valueBuf,
">");
else
xmlMatcher.appendReplacement(valueBuf, " ");
}
// add bit of the string after the final match
xmlMatcher.appendTail(valueBuf);
metadata.append("<dc:").append(element).append(">")
.append(valueBuf.toString())
.append("</dc:").append(element)
.append(">");
}
}
}
}
catch (SQLException e)
{
throw new CannotDisseminateFormatException(e.toString());
}
}
}
}
metadata.append("</oai_dc:dc>");
return metadata.toString();
}
}