/*~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * * University Of Edinburgh (EDINA) * Scotland * * * File Name : MetadataFormat.java * Author : gwaller * Approver : Gareth Waller * * Notes : * * *~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ * * HISTORY * ------- * * $LastChangedRevision$ * $LastChangedDate$ * $LastChangedBy$ */ package uk.ac.jorum.packager; import java.util.ArrayList; import java.util.List; import org.apache.log4j.Logger; import org.dspace.content.crosswalk.DisseminationCrosswalk; import org.dspace.content.crosswalk.MetadataValidationException; import org.jdom.Attribute; import org.jdom.Comment; import org.jdom.Document; import org.jdom.Element; import org.jdom.IllegalAddException; import org.jdom.JDOMException; import org.jdom.Namespace; import org.jdom.ProcessingInstruction; import org.jdom.Text; import org.jdom.output.Format; import org.jdom.output.XMLOutputter; import org.jdom.xpath.XPath; import uk.ac.jorum.utils.ExceptionLogger; import uk.ac.jorum.utils.URLChecker; /** * @author gwaller * */ public class MetadataFormat { public final static MetadataFormat IMSMDV1P2 = new MetadataFormat( "IMSMD", "lom", "http://www.imsglobal.org/xsd/imsmd_v1p2", "//jfoo:rights/jfoo:description/jfoo:langstring", "rights/description/langstring", "//jfoo:classification/jfoo:taxonpath/jfoo:taxon/jfoo:entry/jfoo:langstring", "//jfoo:technical/jfoo:location" ); public final static MetadataFormat LOM = new MetadataFormat( "LOM", "lom", "http://ltsc.ieee.org/xsd/LOM", "//jfoo:rights/jfoo:description/jfoo:string", "rights/description/string", "//jfoo:classification[jfoo:purpose/jfoo:value=\"discipline\"]/jfoo:taxonPath/jfoo:taxon/jfoo:entry/jfoo:string", "//jfoo:technical/jfoo:location" ); public final static MetadataFormat DC = new MetadataFormat( "DC", null, "http://purl.org/dc/elements/1.1/", "//jfoo:rights", "rights", "//jfoo:subject", "//jfoo:identifier"); public final static MetadataFormat QDC = new MetadataFormat( "QDC", "qualifieddc", "http://purl.org/dc/terms/", "//jbar:rights[@xsi:type=\"dcterms:URI\"]", "rights", "//jfoo:subject", "//jfoo:identifier"); public static MetadataFormat FORMATS[] = {IMSMDV1P2, LOM, DC, QDC}; /** log4j category */ private static Logger log = Logger.getLogger(MetadataFormat.class); /** This is the prefix that will be used for elements belongin to the namespace of the metadata format e.g refer to * jfoo:rights for DC would mean the rights element belonging to the namespace http://purl.org/dc/elements/1.1/ */ public final static String JORUM_NAMESPACE_PREFIX = "jfoo"; public static final String PI_HREF_ATTR = "href"; private String dspaceConfigStr; private String namespaceURI; private String licenceXpathExpression; private String classificationXpath; private String licenceElementsToCreateFromRootMetadataElement; private String rootMetadataElement; // GWaller 6/5/10 IssueID#263 Support for web links not in a manifest resource element private String webLinkIdentifierXpath; /** * Construxctor for a MetadataFormat instance * @param dspaceConfigStr * @param rootMetadataElementName set to null if a "root" node for the metadata block is not required e.g. in the case of DC. Otherwise set to * the name of the element which should be the root of the metadata XML block e.g. "lom" for LOM metadata * @param namespaceURI * @param licenceXpathExpression * @param licenceElementsToCreateFromRootMetadataElement * @param classificationXpath */ private MetadataFormat(String dspaceConfigStr, String rootMetadataElementName, String namespaceURI, String licenceXpathExpression, String licenceElementsToCreateFromRootMetadataElement, String classificationXpath, String webLinkIdentifierXpath) { this.dspaceConfigStr = dspaceConfigStr; this.namespaceURI = namespaceURI; this.licenceXpathExpression = licenceXpathExpression; this.classificationXpath = classificationXpath; this.licenceElementsToCreateFromRootMetadataElement = licenceElementsToCreateFromRootMetadataElement; this.rootMetadataElement = rootMetadataElementName; // GWaller 6/5/10 IssueID#263 Support for web links not in a manifest resource element this.webLinkIdentifierXpath = webLinkIdentifierXpath; } public XPath getXpathInstanceWithNamespace(String expr) throws JDOMException { XPath xpathIns = XPath.newInstance(expr); xpathIns.addNamespace(JORUM_NAMESPACE_PREFIX, this.namespaceURI); xpathIns.addNamespace(DisseminationCrosswalk.XSI_NS); return xpathIns; } /** * @return the dspaceConfigStr */ public String getDspaceConfigStr() { return dspaceConfigStr; } /** * @return the namespaceURI */ public String getNamespaceURI() { return namespaceURI; } public boolean isFormat(Element elem) { return elem.getNamespaceURI().compareToIgnoreCase(this.namespaceURI) == 0; } /** * @return the licenceXpathExpression */ public String getLicenceXpathExpression() { return licenceXpathExpression; } /** * @return the licenceElementsToCreateFromRootMetadataElement */ public String getLicenceElementsToCreateFromRootMetadataElement() { return licenceElementsToCreateFromRootMetadataElement; } /** * @return the classificationXpath */ public String getClassificationXpath() { return classificationXpath; } /** * @return the rootMetadataElement */ public String getRootMetadataElement() { return rootMetadataElement; } private String stringFromXpathResult(Object xpathResult) { String result = null; if (xpathResult instanceof Element) { // Element slected via xpath - crudely just get the text below this // node. To be more accurate, the xpath should select an attribute // or TEXT node result = ((Element) xpathResult).getTextTrim(); } else if (xpathResult instanceof Attribute) { // More accurate Xpath - simply return the value result = ((Attribute) xpathResult).getValue(); } else if (xpathResult instanceof Text) { // Will also catch CDATA // (subclass of Text) // More accurate Xpath - simply return the text with whitespace // trimmed result = ((Text) xpathResult).getTextTrim(); } else if (xpathResult instanceof Comment) { // More accurate Xpath - simply return the text result = ((Comment) xpathResult).getText(); } else if (xpathResult instanceof ProcessingInstruction) { // Support a PI with a href attribute pointing to the licence result = ((ProcessingInstruction) xpathResult).getPseudoAttributeValue(PI_HREF_ATTR); } else { // Fall back - just call toString result = xpathResult.toString(); } return result; } // START GWaller 02/02/09 IssueID #175 Added methods to deal with licence // manipulation inside packages private void setStringInXpathResult(Object xpathResult, String value) throws MetadataValidationException { if (xpathResult instanceof Element) { // Element selected via xpath ((Element) xpathResult).setText(value); } else if (xpathResult instanceof Attribute) { // More accurate Xpath selecting an attribute ((Attribute) xpathResult).setValue(value); } else if (xpathResult instanceof Text) { // Will also catch CDATA // (subclass of Text) // More accurate Xpath - simply set the text with the value supplied ((Text) xpathResult).setText(value); } else if (xpathResult instanceof Comment) { // More accurate Xpath - comment selected ((Comment) xpathResult).setText(value); } else if (xpathResult instanceof ProcessingInstruction) { // Support a PI by using the href attribute ((ProcessingInstruction) xpathResult).setPseudoAttribute(PI_HREF_ATTR, value); } else { // Unsupported JDOM element selected - need to throw an exception throw new MetadataValidationException("Attempting to set value (" + value + ") of an unsupported JDOM Element in manifest. Element is: " + xpathResult.toString()); } } public void setLicenceText(Document manifest, String licenceText, String rootMdElementXpath, String metadataElementPrefix) throws MetadataValidationException { try { Object licenceNode = getXpathInstanceWithNamespace(this.licenceXpathExpression).selectSingleNode(manifest); if (licenceNode == null) { // Need to create it! // Get the root metadata element to add to Object rootMDElement = getXpathInstanceWithNamespace(rootMdElementXpath).selectSingleNode(manifest); if (rootMDElement != null && rootMDElement instanceof Element) { Element root = (Element) rootMDElement; Namespace ns = Namespace.getNamespace(metadataElementPrefix, this.namespaceURI); try { manifest.getRootElement().addNamespaceDeclaration(ns); } catch (IllegalAddException e) { // thrown if a prefix clash happens - don't need to // worry. This just means the namespace was there // already! } String[] childrenToCreate = (this.getLicenceElementsToCreateFromRootMetadataElement()).split("/"); for (int i = 0; i < childrenToCreate.length; i++) { Element child = root.getChild(childrenToCreate[i], ns); if (child == null) { // Child not found - need to create it child = new Element(childrenToCreate[i], ns); root.addContent(child); } // We no should have a valid child pointer and this will // be come the root so we can create the other children // off it root = child; } // root should now point to the last element which should // contain the licence. Set the licenceNode pointer and let // setStringInXpathResult so the work of setting the licence // in the element licenceNode = root; } else { throw new MetadataValidationException("Could not find root metadata element using XPath: " + rootMdElementXpath); } } // Now set the licence text in the relevant node this.setStringInXpathResult(licenceNode, licenceText); } catch (JDOMException e) { ExceptionLogger.logException(log, e); // Wrap the exception in a MEtadataValidationException and throw throw new MetadataValidationException(e); } } // END GWaller 02/02/09 IssueID #175 Added methods to deal with licence // manipulation inside packages // GWaller 19/2/10 IssueID #199 Changed param to Object - could be an Element or Document public String getTextByXPath(Object rootForXpath, String xpathExperssion) throws MetadataValidationException { String licence = null; Object context = rootForXpath; log.debug("getTextByXPath: xpathExperssion= " + xpathExperssion); if (xpathExperssion == null) { return null; } if (rootForXpath != null && rootForXpath instanceof Element){ XMLOutputter outputPretty = new XMLOutputter(Format.getPrettyFormat()); log.debug("getTextByXPath: Running xpath expression using the following Element as context :"); log.debug(outputPretty.outputString((Element)rootForXpath)); context = new Document((Element)((Element)rootForXpath).clone()); } try { XPath xpathIns = getXpathInstanceWithNamespace(xpathExperssion); if(this.dspaceConfigStr.equals("QDC")){ // bit of a hack to add elements namespace for QDC metadata // Note that this namespace not declared in constructor as wouldn't be able // to differentiate from normal DC, so http://purl.org/dc/terms/ declared there. xpathIns.addNamespace("jbar","http://purl.org/dc/elements/1.1/"); } Object licenceValue = xpathIns.selectSingleNode(context); // Now check the object returned - can be Element, Attribute, Text, // CDATA, Comment, ProcessingInstruction, // Boolean, Double, String, or null if no item was selected if (licenceValue != null) { licence = stringFromXpathResult(licenceValue); } } catch (JDOMException e) { ExceptionLogger.logException(log, e); // Wrap the exception in a MEtadataValidationException and throw throw new MetadataValidationException(e); } log.debug("getTextByXPath: result = <" + licence + ">"); return licence; } public ArrayList<String> getAllTextByXPath(Document manifest, String xpathExperssion) throws MetadataValidationException { ArrayList<String> values = new ArrayList<String>(); log.debug("getAllTextByXPath: xpathExperssion= " + xpathExperssion); if (xpathExperssion == null) { return null; } try { List<Object> valueList = getXpathInstanceWithNamespace(xpathExperssion).selectNodes(manifest); // Now check the object returned - can be Element, Attribute, Text, // CDATA, Comment, ProcessingInstruction, // Boolean, Double, String, or null if no item was selected if (valueList != null && valueList.size() > 0) { for (Object o : valueList) { values.add(stringFromXpathResult(o)); } } } catch (JDOMException e) { ExceptionLogger.logException(log, e); // Wrap the exception in a MEtadataValidationException and throw throw new MetadataValidationException(e); } log.debug("getAllTextByXPath: result has " + values.size() + " entries"); return values; } // GWaller 19/2/10 IssueID #199 Changed param to Object - could be an Element or Document public String getLicenceText(Object rootForXpath) throws MetadataValidationException { return this.getTextByXPath(rootForXpath, this.licenceXpathExpression); } public String[] geClassificationText(Document manifest) throws MetadataValidationException { ArrayList<String> values = this.getAllTextByXPath(manifest, this.classificationXpath); return values.toArray(new String[values.size()]); } // GWaller 6/5/10 IssueID#263 Support for web links not in a manifest resource element public String[] geWebLinksFromMetadata(Document manifest) throws MetadataValidationException { ArrayList<String> values = this.getAllTextByXPath(manifest, this.webLinkIdentifierXpath); // Iterate across the possible list of links and check it is a real URL ArrayList<String> checkedLinks = new ArrayList<String>(); for (String v:values){ String trimmed = v.trim(); if (URLChecker.isURL(trimmed) > 0){ checkedLinks.add(trimmed); } } return checkedLinks.toArray(new String[checkedLinks.size()]); } }