/* * OAIDCCrosswalk.java * * Version: $Revision: 3705 $ * * Date: $Date: 2009-04-11 18:02:24 +0100 (Sat, 11 Apr 2009) $ * * Copyright (c) 2002-2005, Hewlett-Packard Company and Massachusetts * Institute of Technology. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are * met: * * - Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * - Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * - Neither the name of the Hewlett-Packard Company nor the name of the * Massachusetts Institute of Technology nor the names of their * contributors may be used to endorse or promote products derived from * this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT * HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR * TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE * USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH * DAMAGE. */ package org.dspace.app.oai; import java.util.Properties; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.dspace.content.DCValue; import org.dspace.content.Item; import org.dspace.search.HarvestedItemInfo; import ORG.oclc.oai.server.crosswalk.Crosswalk; import ORG.oclc.oai.server.verb.CannotDisseminateFormatException; /** * An OAICat Crosswalk implementation that extracts Qualified Dublin Core from * DSpace items into the oai_dc format. * * @author Robert Tansley * * * @author cgormle1 * Based this class on OAIDCCrosswalk and made changes to output qualified dc * @version $Revision: 3705 $ */ public class OAIQualifiedDCCrosswalk extends Crosswalk { // Pattern containing all the characters we want to filter out / replace // converting a String to xml private static final Pattern invalidXmlPattern = Pattern.compile("([^\\t\\n\\r\\u0020-\\ud7ff\\ue000-\\ufffd\\u10000-\\u10ffff]+|[&<>])"); // Pattern to test for only true dc elements. private static final Pattern dcElementPattern = Pattern .compile("(^(title|creator|subject|description|" + "publisher|contributor|date|type|" + "format|identifier|source|language|" + "relation|coverage|rights)$)"); public OAIQualifiedDCCrosswalk(Properties properties) { super("http://www.openarchives.org/OAI/2.0/oai_dc/ " + "http://www.openarchives.org/OAI/2.0/oai_dc.xsd"); } public boolean isAvailableFor(Object nativeItem) { // We have DC for everything return true; } public String createMetadata(Object nativeItem) throws CannotDisseminateFormatException { Item item = ((HarvestedItemInfo) nativeItem).item; // Get all the DC DCValue[] allDC = item.getDC(Item.ANY, Item.ANY, Item.ANY); StringBuffer metadata = new StringBuffer(); metadata .append( "<oai_dc:dc xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" ") .append("xmlns:dc=\"http://purl.org/dc/elements/1.1/\" ") .append( "xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" ") .append( "xsi:schemaLocation=\"http://www.openarchives.org/OAI/2.0/oai_dc/ http://www.openarchives.org/OAI/2.0/oai_dc.xsd\">"); for (int i = 0; i < allDC.length; i++) { String element = allDC[i].element; String qualifier = allDC[i].qualifier; // Do not include description.provenance boolean provenance = "description".equals(element) && "provenance".equals(qualifier); if (!provenance) { // contributor.author exposed as 'creator' if ("contributor".equals(element) && "author".equals(qualifier)) { element = "creator"; } String value = allDC[i].value; // Escape XML chars <, > and & // Also replace all invalid characters with ' ' if (value != null) { StringBuffer valueBuf = new StringBuffer(value.length()); Matcher xmlMatcher = invalidXmlPattern.matcher(value.trim()); while (xmlMatcher.find()) { String group = xmlMatcher.group(); // group will either contain a character that we need to encode for xml // (ie. <, > or &), or it will be an invalid character // test the contents and replace appropriately if (group.equals("&")) xmlMatcher.appendReplacement(valueBuf, "&"); else if (group.equals("<")) xmlMatcher.appendReplacement(valueBuf, "<"); else if (group.equals(">")) xmlMatcher.appendReplacement(valueBuf, ">"); else xmlMatcher.appendReplacement(valueBuf, " "); } // add bit of the string after the final match xmlMatcher.appendTail(valueBuf); metadata.append("<dc:").append(element); if (qualifier != null) { metadata.append(".").append(qualifier); } metadata.append(">").append(valueBuf.toString()).append("</dc:").append(element); if (qualifier != null) { metadata.append(".").append(qualifier); } metadata.append(">"); } } } metadata.append("</oai_dc:dc>"); return metadata.toString(); } }