package org.bbaw.wsp.cms.mdsystem.metadata.general.extractor;
import java.io.File;
import java.util.HashMap;
import java.util.List;
import javax.xml.xpath.XPath;
import net.sf.saxon.om.ValueRepresentation;
import net.sf.saxon.s9api.DocumentBuilder;
import net.sf.saxon.s9api.Processor;
import net.sf.saxon.s9api.SaxonApiException;
import net.sf.saxon.s9api.XPathCompiler;
import net.sf.saxon.s9api.XPathExecutable;
import net.sf.saxon.s9api.XPathSelector;
import net.sf.saxon.s9api.XdmItem;
import net.sf.saxon.s9api.XdmNode;
import net.sf.saxon.s9api.XdmValue;
import net.sf.saxon.trans.XPathException;
import org.bbaw.wsp.cms.mdsystem.metadata.general.MetadataParserHelper;
import de.mpg.mpiwg.berlin.mpdl.exception.ApplicationException;
/**
* This is the API class for all metadata parsers.
* @author Sascha Feldmann (wsp-shk1)
* @date 25.10.2012
*
*/
public abstract class MetadataExtractor {
/**
* The uri to be read by the saxon compiler.
*/
protected String uri;
protected XPathCompiler xPathCompiler;
protected XdmNode contextItem;
/**
* Create a new {@link MetadataExtractor} instance.
* @param uri - the URI of the xml document to be parsed.
* @throws ApplicationException if the ressource cannot be validated by Saxon.
* @throws IllegalArgumentException
* if the uri is null, empty or doesn't refer to an existing
* file.
*/
public MetadataExtractor(final String uri, final HashMap<String, String> namespaces) throws ApplicationException {
if (uri == null || uri.isEmpty()) {
throw new IllegalArgumentException(
"The value for the parameter uri in the constructor of ModsMetadataParser mustn't be empty.");
}
this.uri = uri;
// define the Saxon processor
Processor processor = new Processor(false);
xPathCompiler = processor.newXPathCompiler();
// declare each namespace
for (String namespace : namespaces.keySet()) {
xPathCompiler.declareNamespace(namespace, namespaces.get(namespace));
}
DocumentBuilder builder = processor.newDocumentBuilder();
try {
contextItem = builder.build(new File(uri));
} catch (SaxonApiException e) {
throw new ApplicationException("Error while trying to access file using Saxon: "+uri);
}
}
/**
* Compile and execute and XPath query.
* @param query - the {@link XPath} expression.
* @param moreNodes - set true, if you want to fetch more nodes in a {@link List} of {@link String}.
* @return an {@link Object} (String if moreNodes is false, String[] if moreNodes is set true)
*/
protected Object buildXPath(final String query, final boolean moreNodes) {
try {
XPathExecutable x = xPathCompiler.compile(query);
XPathSelector selector = x.load();
selector.setContextItem(this.contextItem);
XdmValue value = selector.evaluate();
if (moreNodes) {
String[] list = new String[value.size()];
int i = 0;
for (XdmItem xdmItem : value) {
list[i] = xdmItem.toString();
++i;
}
// Replace attribute chars
return MetadataParserHelper.removeAttributeChars(list);
}
ValueRepresentation rep = value.getUnderlyingValue();
// Replace attribute chars
return MetadataParserHelper.removeAttributeChars(rep.getStringValue());
} catch (SaxonApiException e) {
e.printStackTrace();
return null;
} catch (XPathException e) {
e.printStackTrace();
return null;
}
}
}