package edu.harvard.iq.dataverse.metadataimport;
import edu.harvard.iq.dataverse.ControlledVocabularyValue;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetFieldValue;
import edu.harvard.iq.dataverse.ForeignMetadataFieldMapping;
import edu.harvard.iq.dataverse.ForeignMetadataFormatMapping;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.logging.Logger;
import java.io.StringReader;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.PersistenceContext;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.stream.XMLInputFactory;
/**
*
* @author Leonid Andreev
*
* Draft/prototype XML import service for DVN 4.0
*
*/
@Stateless
@Named
public class ForeignMetadataImportServiceBean {
private static final Logger logger = Logger.getLogger(ForeignMetadataImportServiceBean.class.getCanonicalName());
@EJB
DatasetFieldServiceBean datasetfieldService;
@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;
ForeignMetadataFormatMapping findFormatMappingByName (String name) {
try {
return em.createNamedQuery("ForeignMetadataFormatMapping.findByName", ForeignMetadataFormatMapping.class)
.setParameter("name", name)
.getSingleResult();
} catch ( NoResultException nre ) {
return null;
}
}
public void importXML(String xmlToParse, String foreignFormat, DatasetVersion datasetVersion) {
StringReader reader = null;
XMLStreamReader xmlr = null;
ForeignMetadataFormatMapping mappingSupported = findFormatMappingByName (foreignFormat);
if (mappingSupported == null) {
throw new EJBException("Unknown/unsupported foreign metadata format "+foreignFormat);
}
try {
reader = new StringReader(xmlToParse);
XMLInputFactory xmlFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlr = xmlFactory.createXMLStreamReader(reader);
processXML(xmlr, mappingSupported, datasetVersion);
} catch (XMLStreamException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred while parsing XML fragment ("+xmlToParse.substring(0, 64)+"...); ", ex);
} finally {
try {
if (xmlr != null) { xmlr.close(); }
} catch (XMLStreamException ex) {}
}
}
public void importXML(File xmlFile, String foreignFormat, DatasetVersion datasetVersion) {
FileInputStream in = null;
XMLStreamReader xmlr = null;
// look up the foreign metadata mapping for this format:
ForeignMetadataFormatMapping mappingSupported = findFormatMappingByName (foreignFormat);
if (mappingSupported == null) {
throw new EJBException("Unknown/unsupported foreign metadata format "+foreignFormat);
}
try {
in = new FileInputStream(xmlFile);
XMLInputFactory xmlFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlr = xmlFactory.createXMLStreamReader(in);
processXML(xmlr, mappingSupported, datasetVersion);
} catch (FileNotFoundException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred in mapDDI: File Not Found!");
} catch (XMLStreamException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred while parsing XML (file "+xmlFile.getAbsolutePath()+"); ", ex);
} finally {
try {
if (xmlr != null) { xmlr.close(); }
} catch (XMLStreamException ex) {}
try {
if (in != null) { in.close();}
} catch (IOException ex) {}
}
}
private void processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping foreignFormatMapping, DatasetVersion datasetVersion) throws XMLStreamException {
// init - similarly to what I'm doing in the metadata extraction code?
//while ( xmlr.next() == XMLStreamConstants.COMMENT ); // skip pre root comments
xmlr.nextTag();
String openingTag = foreignFormatMapping.getStartElement();
if (openingTag != null) {
xmlr.require(XMLStreamConstants.START_ELEMENT, null, openingTag);
} else {
// TODO:
// add support for parsing the body regardless of the start element.
// June 20 2014 -- L.A.
throw new EJBException("No support for format mappings without start element defined (yet)");
}
processXMLElement(xmlr, ":", openingTag, foreignFormatMapping, datasetVersion);
}
private void processXMLElement(XMLStreamReader xmlr, String currentPath, String openingTag, ForeignMetadataFormatMapping foreignFormatMapping, DatasetVersion datasetVersion) throws XMLStreamException {
logger.fine("entering processXMLElement; ("+currentPath+")");
for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) {
if (event == XMLStreamConstants.START_ELEMENT) {
String currentElement = xmlr.getLocalName();
ForeignMetadataFieldMapping mappingDefined = datasetfieldService.findFieldMapping(foreignFormatMapping.getName(), currentPath+currentElement);
if (mappingDefined != null) {
DatasetFieldCompoundValue cachedCompoundValue = null;
// Process attributes, if any are defined in the mapping:
for (ForeignMetadataFieldMapping childMapping: mappingDefined.getChildFieldMappings()) {
if (childMapping.isAttribute()) {
String attributeName = childMapping.getForeignFieldXPath();
String attributeValue = xmlr.getAttributeValue(null, attributeName);
if (attributeValue != null) {
String mappedFieldName = childMapping.getDatasetfieldName();
logger.fine("looking up dataset field "+mappedFieldName);
DatasetFieldType mappedFieldType = datasetfieldService.findByNameOpt(mappedFieldName);
if (mappedFieldType != null) {
try {
cachedCompoundValue = createDatasetFieldValue(mappedFieldType, cachedCompoundValue, attributeValue, datasetVersion);
} catch (Exception ex) {
logger.warning("Caught unknown exception when processing attribute "+currentPath+currentElement+"{"+attributeName+"} (skipping);");
}
} else {
throw new EJBException ("Bad foreign metadata field mapping: no such DatasetField "+mappedFieldName+"!");
}
}
}
}
// Process the payload of this XML element:
String dataverseFieldName = mappingDefined.getDatasetfieldName();
if (dataverseFieldName != null && !dataverseFieldName.equals("")) {
DatasetFieldType dataverseFieldType = datasetfieldService.findByNameOpt(dataverseFieldName);
if (dataverseFieldType != null) {
String elementTextPayload = parseText(xmlr);
createDatasetFieldValue(dataverseFieldType, cachedCompoundValue, elementTextPayload, datasetVersion);
} else {
throw new EJBException ("Bad foreign metadata field mapping: no such DatasetField "+dataverseFieldName+"!");
}
}
} else {
// recursively, process the xml stream further down:
processXMLElement(xmlr, currentPath+currentElement+":", currentElement, foreignFormatMapping, datasetVersion);
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals(openingTag)) return;
}
}
}
private DatasetFieldCompoundValue createDatasetFieldValue(DatasetFieldType dsft, DatasetFieldCompoundValue savedCompoundValue, String elementText, DatasetVersion datasetVersion) {
if (dsft.isPrimitive()) {
if (!dsft.isHasParent()) {
// simple primitive:
DatasetField dsf = null;
for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
if (existingDsf.getDatasetFieldType().equals(dsft)) {
dsf = existingDsf;
}
}
// if doesn't exist, create a new one:
if (dsf == null) {
dsf = new DatasetField();
dsf.setDatasetFieldType(dsft);
datasetVersion.getDatasetFields().add(dsf);
dsf.setDatasetVersion(datasetVersion);
}
String dsfName = dsft.getName();
if (!dsft.isControlledVocabulary()) {
logger.fine("Creating a new value for field " + dsfName + ": " + elementText);
DatasetFieldValue newDsfv = new DatasetFieldValue(dsf);
newDsfv.setValue(elementText);
dsf.getDatasetFieldValues().add(newDsfv);
} else {
// A controlled vocabulary entry:
// first, let's see if it's a legit control vocab. entry:
/* not supported yet; though I expect the commented-out code
below to work;
ControlledVocabularyValue legitControlledVocabularyValue = null;
Collection<ControlledVocabularyValue> definedVocabularyValues = dsft.getControlledVocabularyValues();
if (definedVocabularyValues != null) {
for (ControlledVocabularyValue definedVocabValue : definedVocabularyValues) {
if (elementText.equals(definedVocabValue.getStrValue())) {
logger.fine("Yes, " + elementText + " is a valid controlled vocabulary value for the field " + dsfName);
legitControlledVocabularyValue = definedVocabValue;
break;
}
}
}
if (legitControlledVocabularyValue != null) {
logger.fine("Adding controlled vocabulary value " + elementText + " to field " + dsfName);
dsf.getControlledVocabularyValues().add(legitControlledVocabularyValue);
}
*/
}
// No compound values had to be created; returning null:
return null;
} else {
// a primitive that is part of a compound value:
// first, let's create the field and the value, for the
// primitive node itself:
DatasetField childField = new DatasetField();
childField.setDatasetFieldType(dsft);
DatasetFieldValue childValue = new DatasetFieldValue(childField);
childValue.setValue(elementText);
childField.getDatasetFieldValues().add(childValue);
// see if a compound value of the right type has already been
// created and passed to us:
DatasetFieldCompoundValue parentCompoundValue = null;
DatasetFieldType parentFieldType = dsft.getParentDatasetFieldType();
if (parentFieldType == null) {
logger.severe("Child field type with no parent field type defined!");
// we could throw an exception and exit... but maybe we
// could just skip this field and try to continue - ?
return null;
}
if (savedCompoundValue != null) {
if (parentFieldType.equals(savedCompoundValue.getParentDatasetField().getDatasetFieldType())) {
parentCompoundValue = savedCompoundValue;
}
}
// if not, create a new one:
if (parentCompoundValue == null) {
// and to do that, we need to find or create the "parent"
// dataset field for this compoound value:
// (I put quotes around "parent", because I really feel it
// is a misnomer, and that the relationship between the compound value
// and the corresponding dataset field should be called
// "CompoundDatasetField", not "ParentDatasetField") (discuss?)
DatasetField parentField = null;
for (DatasetField existingDsf : datasetVersion.getFlatDatasetFields()) {
if (existingDsf.getDatasetFieldType().equals(parentFieldType)) {
parentField = existingDsf;
}
}
// if doesn't exist, create a new one:
if (parentField == null) {
parentField = new DatasetField();
parentField.setDatasetFieldType(parentFieldType);
datasetVersion.getDatasetFields().add(parentField);
parentField.setDatasetVersion(datasetVersion);
}
// and then create new compound value:
parentCompoundValue = new DatasetFieldCompoundValue();
parentCompoundValue.setParentDatasetField(parentField);
parentField.getDatasetFieldCompoundValues().add(parentCompoundValue);
}
childField.setParentDatasetFieldCompoundValue(parentCompoundValue);
parentCompoundValue.getChildDatasetFields().add(childField);
return parentCompoundValue;
}
}
return null;
}
private String parseText(XMLStreamReader xmlr) throws XMLStreamException {
return parseText(xmlr,true);
}
private String parseText(XMLStreamReader xmlr, boolean scrubText) throws XMLStreamException {
String tempString = xmlr.getElementText();
// TODO:
// In 3.* we had to provide our own getElementText method, because
// at that point xmlr.getElementText() was found to be buggy.
// Investitage if that's still needed! -- See comments in the
// DDIServiceBean in 3.6 for details.
// -- L.A. June 23 2014
if (scrubText) {
tempString = tempString.trim().replace('\n',' ');
}
return tempString;
}
}