package edu.harvard.iq.dataverse.api.imports;
import com.google.gson.Gson;
import edu.harvard.iq.dataverse.DatasetFieldCompoundValue;
import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.ForeignMetadataFieldMapping;
import edu.harvard.iq.dataverse.ForeignMetadataFormatMapping;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.harvard.iq.dataverse.api.dto.*;
import edu.harvard.iq.dataverse.api.dto.FieldDTO;
import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.StringUtil;
import edu.harvard.iq.dataverse.util.json.JsonParseException;
import edu.harvard.iq.dataverse.util.json.JsonParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.*;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.ejb.Stateless;
import javax.inject.Named;
import javax.json.Json;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.json.JsonObject;
import javax.json.JsonReader;
import javax.persistence.EntityManager;
import javax.persistence.NoResultException;
import javax.persistence.PersistenceContext;
import javax.xml.stream.XMLInputFactory;
/**
*
* @author ellenk
* @author Leonid Andreev
* @author Bob Treacy
*/
@Stateless
@Named
public class ImportGenericServiceBean {
private static final Logger logger = Logger.getLogger(ImportGenericServiceBean.class.getCanonicalName());
@EJB
DatasetFieldServiceBean datasetfieldService;
@EJB
DatasetFieldServiceBean datasetFieldSvc;
@EJB
MetadataBlockServiceBean blockService;
@EJB
SettingsServiceBean settingsService;
@PersistenceContext(unitName = "VDCNet-ejbPU")
private EntityManager em;
public static String DCTERMS = "http://purl.org/dc/terms/";
public ForeignMetadataFormatMapping findFormatMappingByName (String name) {
try {
return em.createNamedQuery("ForeignMetadataFormatMapping.findByName", ForeignMetadataFormatMapping.class)
.setParameter("name", name)
.getSingleResult();
} catch ( NoResultException nre ) {
return null;
}
}
public void importXML(String xmlToParse, String foreignFormat, DatasetVersion datasetVersion) {
StringReader reader = null;
XMLStreamReader xmlr = null;
ForeignMetadataFormatMapping mappingSupported = findFormatMappingByName (foreignFormat);
if (mappingSupported == null) {
throw new EJBException("Unknown/unsupported foreign metadata format "+foreignFormat);
}
try {
reader = new StringReader(xmlToParse);
XMLInputFactory xmlFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlr = xmlFactory.createXMLStreamReader(reader);
DatasetDTO datasetDTO = processXML(xmlr, mappingSupported);
Gson gson = new Gson();
String json = gson.toJson(datasetDTO.getDatasetVersion());
logger.fine(json);
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
DatasetVersion dv = new JsonParser(datasetFieldSvc, blockService, settingsService).parseDatasetVersion(obj, datasetVersion);
} catch (XMLStreamException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred while parsing XML fragment ("+xmlToParse.substring(0, 64)+"...); ", ex);
} catch (JsonParseException ex) {
Logger.getLogger(ImportGenericServiceBean.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
if (xmlr != null) { xmlr.close(); }
} catch (XMLStreamException ex) {}
}
}
public void importXML(File xmlFile, String foreignFormat, DatasetVersion datasetVersion) {
FileInputStream in = null;
XMLStreamReader xmlr = null;
// look up the foreign metadata mapping for this format:
ForeignMetadataFormatMapping mappingSupported = findFormatMappingByName (foreignFormat);
if (mappingSupported == null) {
throw new EJBException("Unknown/unsupported foreign metadata format "+foreignFormat);
}
try {
in = new FileInputStream(xmlFile);
XMLInputFactory xmlFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlr = xmlFactory.createXMLStreamReader(in);
DatasetDTO datasetDTO = processXML(xmlr, mappingSupported);
Gson gson = new Gson();
String json = gson.toJson(datasetDTO.getDatasetVersion());
logger.info("Json:\n"+json);
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
DatasetVersion dv = new JsonParser(datasetFieldSvc, blockService, settingsService).parseDatasetVersion(obj, datasetVersion);
} catch (FileNotFoundException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred in mapDDI: File Not Found!");
} catch (XMLStreamException ex) {
//Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred while parsing XML (file "+xmlFile.getAbsolutePath()+"); ", ex);
} catch (JsonParseException ex) {
Logger.getLogger(ImportGenericServiceBean.class.getName()).log(Level.SEVERE, null, ex);
} finally {
try {
if (xmlr != null) { xmlr.close(); }
} catch (XMLStreamException ex) {}
try {
if (in != null) { in.close();}
} catch (IOException ex) {}
}
}
public DatasetDTO processXML( XMLStreamReader xmlr, ForeignMetadataFormatMapping foreignFormatMapping) throws XMLStreamException {
// init - similarly to what I'm doing in the metadata extraction code?
DatasetDTO datasetDTO = this.initializeDataset();
while ( xmlr.next() == XMLStreamConstants.COMMENT ); // skip pre root comments
// xmlr.nextTag();
String openingTag = foreignFormatMapping.getStartElement();
if (openingTag != null) {
xmlr.require(XMLStreamConstants.START_ELEMENT, null, openingTag);
} else {
// TODO:
// add support for parsing the body regardless of the start element.
// June 20 2014 -- L.A.
throw new EJBException("No support for format mappings without start element defined (yet)");
}
processXMLElement(xmlr, ":", openingTag, foreignFormatMapping, datasetDTO);
return datasetDTO;
}
// Helper method for importing harvested Dublin Core xml.
// Dublin Core is considered a mandatory, built in metadata format mapping.
// It is distributed as required content, in reference_data.sql.
// Note that arbitrary formatting tags are supported for the outer xml
// wrapper. -- L.A. 4.5
public DatasetDTO processOAIDCxml(String DcXmlToParse) throws XMLStreamException {
// look up DC metadata mapping:
ForeignMetadataFormatMapping dublinCoreMapping = findFormatMappingByName(DCTERMS);
if (dublinCoreMapping == null) {
throw new EJBException("Failed to find metadata mapping for " + DCTERMS);
}
DatasetDTO datasetDTO = this.initializeDataset();
StringReader reader = null;
XMLStreamReader xmlr = null;
try {
reader = new StringReader(DcXmlToParse);
XMLInputFactory xmlFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlr = xmlFactory.createXMLStreamReader(reader);
//while (xmlr.next() == XMLStreamConstants.COMMENT); // skip pre root comments
xmlr.nextTag();
xmlr.require(XMLStreamConstants.START_ELEMENT, null, OAI_DC_OPENING_TAG);
processXMLElement(xmlr, ":", OAI_DC_OPENING_TAG, dublinCoreMapping, datasetDTO);
} catch (XMLStreamException ex) {
throw new EJBException("ERROR occurred while parsing XML fragment (" + DcXmlToParse.substring(0, 64) + "...); ", ex);
}
datasetDTO.getDatasetVersion().setVersionState(DatasetVersion.VersionState.RELEASED);
// Our DC import handles the contents of the dc:identifier field
// as an "other id". In the context of OAI harvesting, we expect
// the identifier to be a global id, so we need to rearrange that:
String identifier = getOtherIdFromDTO(datasetDTO.getDatasetVersion());
logger.fine("Imported identifier: "+identifier);
String globalIdentifier = reassignIdentifierAsGlobalId(identifier, datasetDTO);
logger.fine("Detected global identifier: "+globalIdentifier);
if (globalIdentifier == null) {
throw new EJBException("Failed to find a global identifier in the OAI_DC XML record.");
}
return datasetDTO;
}
private void processXMLElement(XMLStreamReader xmlr, String currentPath, String openingTag, ForeignMetadataFormatMapping foreignFormatMapping, DatasetDTO datasetDTO) throws XMLStreamException {
logger.fine("entering processXMLElement; ("+currentPath+")");
for (int event = xmlr.next(); event != XMLStreamConstants.END_DOCUMENT; event = xmlr.next()) {
if (event == XMLStreamConstants.START_ELEMENT) {
String currentElement = xmlr.getLocalName();
ForeignMetadataFieldMapping mappingDefined = datasetfieldService.findFieldMapping(foreignFormatMapping.getName(), currentPath+currentElement);
if (mappingDefined != null) {
DatasetFieldType mappingDefinedFieldType = datasetfieldService.findByNameOpt(mappingDefined.getDatasetfieldName());
boolean compound = mappingDefinedFieldType.isCompound();
DatasetFieldCompoundValue cachedCompoundValue = null;
String dataverseFieldName = mappingDefined.getDatasetfieldName();
// Process attributes, if any are defined in the mapping:
if (mappingDefinedFieldType.isCompound()) {
List<HashSet<FieldDTO>> compoundField = new ArrayList<>();
HashSet<FieldDTO> set = new HashSet<>();
for (ForeignMetadataFieldMapping childMapping : mappingDefined.getChildFieldMappings()) {
if (childMapping.isAttribute()) {
String attributeName = childMapping.getForeignFieldXPath();
String attributeValue = xmlr.getAttributeValue(null, attributeName);
if (attributeValue != null) {
String mappedFieldName = childMapping.getDatasetfieldName();
logger.fine("looking up dataset field " + mappedFieldName);
DatasetFieldType mappedFieldType = datasetfieldService.findByNameOpt(mappedFieldName);
if (mappedFieldType != null) {
try {
addToSet(set, attributeName, attributeValue);
//FieldDTO value = FieldDTO.createPrimitiveFieldDTO(attributeName, attributeValue);
// FieldDTO attribute = FieldDTO.createCompoundFieldDTO(attributeName, value);
//MetadataBlockDTO citationBlock = datasetDTO.getDatasetVersion().getMetadataBlocks().get("citation");
//citationBlock.getFields().add(value);
// TO DO replace database output with Json cachedCompoundValue = createDatasetFieldValue(mappedFieldType, cachedCompoundValue, attributeValue, datasetVersion);
} catch (Exception ex) {
logger.warning("Caught unknown exception when processing attribute " + currentPath + currentElement + "{" + attributeName + "} (skipping);");
}
} else {
throw new EJBException("Bad foreign metadata field mapping: no such DatasetField " + mappedFieldName + "!");
}
}
}
}
if (!set.isEmpty()) {
compoundField.add(set);
MetadataBlockDTO citationBlock = datasetDTO.getDatasetVersion().getMetadataBlocks().get(mappingDefinedFieldType.getMetadataBlock().getName());
citationBlock.addField(FieldDTO.createMultipleCompoundFieldDTO(mappingDefined.getDatasetfieldName(), compoundField));
} else{
FieldDTO value = null;
if (mappingDefinedFieldType.isAllowMultiples()){
List<String> values = new ArrayList<String>();
values.add(parseText(xmlr));
value = FieldDTO.createMultiplePrimitiveFieldDTO(dataverseFieldName, values);
}else {
value = FieldDTO.createPrimitiveFieldDTO(dataverseFieldName, parseText(xmlr));
}
value = makeDTO(mappingDefinedFieldType, value, dataverseFieldName);
MetadataBlockDTO citationBlock = datasetDTO.getDatasetVersion().getMetadataBlocks().get(mappingDefinedFieldType.getMetadataBlock().getName());
citationBlock.addField(value);
}
} else // Process the payload of this XML element:
//xxString dataverseFieldName = mappingDefined.getDatasetfieldName();
if (dataverseFieldName != null && !dataverseFieldName.equals("")) {
DatasetFieldType dataverseFieldType = datasetfieldService.findByNameOpt(dataverseFieldName);
FieldDTO value;
if (dataverseFieldType != null) {
if (dataverseFieldType.isControlledVocabulary()) {
value = FieldDTO.createVocabFieldDTO(dataverseFieldName, parseText(xmlr));
} else {
value = FieldDTO.createPrimitiveFieldDTO(dataverseFieldName, parseText(xmlr));
}
value = makeDTO(dataverseFieldType, value, dataverseFieldName);
// value = FieldDTO.createPrimitiveFieldDTO(dataverseFieldName, parseText(xmlr));
// FieldDTO dataverseField = FieldDTO.createCompoundFieldDTO(dataverseFieldName, value);
MetadataBlockDTO citationBlock = datasetDTO.getDatasetVersion().getMetadataBlocks().get(mappingDefinedFieldType.getMetadataBlock().getName());
citationBlock.addField(value);
// TO DO replace database output with Json createDatasetFieldValue(dataverseFieldType, cachedCompoundValue, elementTextPayload, datasetVersion);
} else {
throw new EJBException("Bad foreign metadata field mapping: no such DatasetField " + dataverseFieldName + "!");
}
}
} else {
// recursively, process the xml stream further down:
processXMLElement(xmlr, currentPath+currentElement+":", currentElement, foreignFormatMapping, datasetDTO);
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals(openingTag)) return;
}
}
}
private FieldDTO makeDTO(DatasetFieldType dataverseFieldType, FieldDTO value, String dataverseFieldName) {
if (dataverseFieldType.isAllowMultiples()){
if(dataverseFieldType.isCompound()) {
value = FieldDTO.createMultipleCompoundFieldDTO(dataverseFieldName, value);
} else if (dataverseFieldType.isControlledVocabulary()) {
value = FieldDTO.createMultipleVocabFieldDTO(dataverseFieldName, Arrays.asList(value.getSinglePrimitive()));
} else {
value = FieldDTO.createMultiplePrimitiveFieldDTO(dataverseFieldName, Arrays.asList(value.getSinglePrimitive()));
}
if (dataverseFieldType.isChild()) {
DatasetFieldType parentDatasetFieldType = dataverseFieldType.getParentDatasetFieldType();
if (parentDatasetFieldType.isAllowMultiples()) {
value = FieldDTO.createMultipleCompoundFieldDTO(parentDatasetFieldType.getName(), value);
}
}
} else{
if (dataverseFieldType.isCompound()){
value = FieldDTO.createCompoundFieldDTO(dataverseFieldName, value);
}
}
// TODO:
// it looks like the code below has already been executed, in one of the
// if () blocks above... is this ok to be doing it again?? -- L.A. 4.5
if (dataverseFieldType.isChild()) {
DatasetFieldType parentDatasetFieldType = dataverseFieldType.getParentDatasetFieldType();
if (parentDatasetFieldType.isAllowMultiples()) {
value = FieldDTO.createMultipleCompoundFieldDTO(parentDatasetFieldType.getName(), value);
}
}
return value;
}
private String getOtherIdFromDTO(DatasetVersionDTO datasetVersionDTO) {
for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) {
String key = entry.getKey();
MetadataBlockDTO value = entry.getValue();
if ("citation".equals(key)) {
for (FieldDTO fieldDTO : value.getFields()) {
if (DatasetFieldConstant.otherId.equals(fieldDTO.getTypeName())) {
String otherId = "";
for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) {
for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) {
FieldDTO next = iterator.next();
if (DatasetFieldConstant.otherIdValue.equals(next.getTypeName())) {
otherId = next.getSinglePrimitive();
}
}
if (!otherId.isEmpty()){
return otherId;
}
}
}
}
}
}
return null;
}
private String reassignIdentifierAsGlobalId(String identifierString, DatasetDTO datasetDTO) {
int index1 = identifierString.indexOf(':');
int index2 = identifierString.lastIndexOf('/');
if (index1==-1) {
logger.warning("Error parsing identifier: " + identifierString + ". ':' not found in string");
return null;
}
String protocol = identifierString.substring(0, index1);
if (!"doi".equals(protocol) && !"hdl".equals(protocol)) {
logger.warning("Unsupported protocol: "+identifierString);
return null;
}
if (index2 == -1) {
logger.warning("Error parsing identifier: " + identifierString + ". Second separator not found in string");
return null;
}
String authority = identifierString.substring(index1+1, index2);
String identifier = identifierString.substring(index2+1);
datasetDTO.setProtocol(protocol);
datasetDTO.setDoiSeparator("/");
datasetDTO.setAuthority(authority);
datasetDTO.setIdentifier(identifier);
// reassemble and return:
return protocol + ":" + authority + "/" + identifier;
}
public static final String OAI_DC_OPENING_TAG = "dc";
public static final String DCTERMS_OPENING_TAG = "dcterms";
public static final String SOURCE_DVN_3_0 = "DVN_3_0";
public static final String NAMING_PROTOCOL_HANDLE = "hdl";
public static final String NAMING_PROTOCOL_DOI = "doi";
public static final String AGENCY_HANDLE = "handle";
public static final String AGENCY_DOI = "DOI";
public static final String REPLICATION_FOR_TYPE = "replicationFor";
public static final String VAR_WEIGHTED = "wgtd";
public static final String VAR_INTERVAL_CONTIN = "contin";
public static final String VAR_INTERVAL_DISCRETE = "discrete";
public static final String CAT_STAT_TYPE_FREQUENCY = "freq";
public static final String VAR_FORMAT_TYPE_NUMERIC = "numeric";
public static final String VAR_FORMAT_SCHEMA_ISO = "ISO";
public static final String EVENT_START = "start";
public static final String EVENT_END = "end";
public static final String EVENT_SINGLE = "single";
public static final String LEVEL_DVN = "dvn";
public static final String LEVEL_DV = "dv";
public static final String LEVEL_STUDY = "study";
public static final String LEVEL_FILE = "file";
public static final String LEVEL_VARIABLE = "variable";
public static final String LEVEL_CATEGORY = "category";
public static final String NOTE_TYPE_UNF = "VDC:UNF";
public static final String NOTE_SUBJECT_UNF = "Universal Numeric Fingerprint";
public static final String NOTE_TYPE_TERMS_OF_USE = "DVN:TOU";
public static final String NOTE_SUBJECT_TERMS_OF_USE = "Terms Of Use";
public static final String NOTE_TYPE_CITATION = "DVN:CITATION";
public static final String NOTE_SUBJECT_CITATION = "Citation";
public static final String NOTE_TYPE_VERSION_NOTE = "DVN:VERSION_NOTE";
public static final String NOTE_SUBJECT_VERSION_NOTE= "Version Note";
public static final String NOTE_TYPE_ARCHIVE_NOTE = "DVN:ARCHIVE_NOTE";
public static final String NOTE_SUBJECT_ARCHIVE_NOTE= "Archive Note";
public static final String NOTE_TYPE_ARCHIVE_DATE = "DVN:ARCHIVE_DATE";
public static final String NOTE_SUBJECT_ARCHIVE_DATE= "Archive Date";
public static final String NOTE_TYPE_EXTENDED_METADATA = "DVN:EXTENDED_METADATA";
public static final String NOTE_TYPE_LOCKSS_CRAWL = "LOCKSS:CRAWLING";
public static final String NOTE_SUBJECT_LOCKSS_PERM = "LOCKSS Permission";
public static final String NOTE_TYPE_REPLICATION_FOR = "DVN:REPLICATION_FOR";
private XMLInputFactory xmlInputFactory = null;
private ImportType importType;
public enum ImportType{ NEW, MIGRATION, HARVEST};
public ImportGenericServiceBean() {
}
public ImportGenericServiceBean(ImportType importType) {
this.importType=importType;
xmlInputFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlInputFactory.setProperty("javax.xml.stream.isCoalescing", java.lang.Boolean.TRUE);
}
public DatasetDTO doImport(String xmlToParse) throws XMLStreamException {
DatasetDTO datasetDTO = this.initializeDataset();
// Read docDescr and studyDesc into DTO objects.
Map fileMap = mapDCTerms(xmlToParse, datasetDTO);
if (!importType.equals(ImportType.MIGRATION)) {
//EMK TODO: Call methods for reading FileMetadata and related objects from xml, return list of FileMetadata objects.
/*try {
Map<String, DataTable> dataTableMap = new DataTableImportDDI().processDataDscr(xmlr);
} catch(Exception e) {
}*/
}
return datasetDTO;
}
public void importDCTerms(String xmlToParse, DatasetVersion datasetVersion, DatasetFieldServiceBean datasetFieldSvc, MetadataBlockServiceBean blockService, SettingsServiceBean settingsService) {
DatasetDTO datasetDTO = this.initializeDataset();
try {
// Read docDescr and studyDesc into DTO objects.
Map fileMap = mapDCTerms(xmlToParse, datasetDTO);
//
// convert DTO to Json,
Gson gson = new Gson();
String json = gson.toJson(datasetDTO.getDatasetVersion());
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
//and call parse Json to read it into a datasetVersion
DatasetVersion dv = new JsonParser(datasetFieldSvc, blockService, settingsService).parseDatasetVersion(obj, datasetVersion);
} catch (Exception e) {
// EMK TODO: exception handling
e.printStackTrace();
}
//EMK TODO: Call methods for reading FileMetadata and related objects from xml, return list of FileMetadata objects.
/*try {
Map<String, DataTable> dataTableMap = new DataTableImportDDI().processDataDscr(xmlr);
} catch(Exception e) {
}*/
// Save Dataset and DatasetVersion in database
}
public Map mapDCTerms(String xmlToParse, DatasetDTO datasetDTO) throws XMLStreamException {
Map filesMap = new HashMap();
StringReader reader = new StringReader(xmlToParse);
XMLStreamReader xmlr = null;
XMLInputFactory xmlFactory = javax.xml.stream.XMLInputFactory.newInstance();
xmlr = xmlFactory.createXMLStreamReader(reader);
processDCTerms(xmlr, datasetDTO, filesMap);
return filesMap;
}
public Map mapDCTerms(File ddiFile, DatasetDTO datasetDTO ) {
FileInputStream in = null;
XMLStreamReader xmlr = null;
Map filesMap = new HashMap();
try {
in = new FileInputStream(ddiFile);
xmlr = xmlInputFactory.createXMLStreamReader(in);
processDCTerms( xmlr, datasetDTO , filesMap );
} catch (FileNotFoundException ex) {
Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred in mapDDI: File Not Found!");
} catch (XMLStreamException ex) {
Logger.getLogger("global").log(Level.SEVERE, null, ex);
throw new EJBException("ERROR occurred in mapDDI.", ex);
} finally {
try {
if (xmlr != null) { xmlr.close(); }
} catch (XMLStreamException ex) {}
try {
if (in != null) { in.close();}
} catch (IOException ex) {}
}
return filesMap;
}
private void processDCTerms( XMLStreamReader xmlr, DatasetDTO datasetDTO, Map filesMap) throws XMLStreamException {
// make sure we have a codeBook
//while ( xmlr.next() == XMLStreamConstants.COMMENT ); // skip pre root comments
xmlr.nextTag();
MetadataBlockDTO citationBlock = datasetDTO.getDatasetVersion().getMetadataBlocks().get("citation");
/* if (codeBookLevelId != null && !codeBookLevelId.equals("")) {
if (citationBlock.getField("otherId")==null) {
// this means no ids were found during the parsing of the
// study description section. we'll use the one we found in
// the codeBook entry:
FieldDTO otherIdValue = FieldDTO.createPrimitiveFieldDTO("otherIdValue", codeBookLevelId);
FieldDTO otherId = FieldDTO.createCompoundFieldDTO("otherId", otherIdValue);
citationBlock.getFields().add(otherId);
}
}*/
}
// EMK TODO: update unit test so this doesn't have to be public
public DatasetDTO initializeDataset() {
DatasetDTO datasetDTO = new DatasetDTO();
DatasetVersionDTO datasetVersionDTO = new DatasetVersionDTO();
datasetDTO.setDatasetVersion(datasetVersionDTO);
HashMap<String, MetadataBlockDTO> metadataBlocks = new HashMap<>();
datasetVersionDTO.setMetadataBlocks(metadataBlocks);
datasetVersionDTO.getMetadataBlocks().put("citation", new MetadataBlockDTO());
datasetVersionDTO.getMetadataBlocks().get("citation").setFields(new ArrayList<FieldDTO>());
datasetVersionDTO.getMetadataBlocks().put("geospatial", new MetadataBlockDTO());
datasetVersionDTO.getMetadataBlocks().get("geospatial").setFields(new ArrayList<FieldDTO>());
datasetVersionDTO.getMetadataBlocks().put("social_science", new MetadataBlockDTO());
datasetVersionDTO.getMetadataBlocks().get("social_science").setFields(new ArrayList<FieldDTO>());
datasetVersionDTO.getMetadataBlocks().put("astrophysics", new MetadataBlockDTO());
datasetVersionDTO.getMetadataBlocks().get("astrophysics").setFields(new ArrayList<FieldDTO>());
return datasetDTO;
}
private String parseText(XMLStreamReader xmlr) throws XMLStreamException {
return parseText(xmlr,true);
}
private String parseText(XMLStreamReader xmlr, boolean scrubText) throws XMLStreamException {
String tempString = getElementText(xmlr);
if (scrubText) {
tempString = tempString.trim().replace('\n',' ');
}
return tempString;
}
private String parseDate (XMLStreamReader xmlr, String endTag) throws XMLStreamException {
String date = xmlr.getAttributeValue(null, "date");
if (date == null) {
date = parseText(xmlr);
}
return date;
}
/* We had to add this method because the ref getElementText has a bug where it
* would append a null before the text, if there was an escaped apostrophe; it appears
* that the code finds an null ENTITY_REFERENCE in this case which seems like a bug;
* the workaround for the moment is to comment or handling ENTITY_REFERENCE in this case
*/
private String getElementText(XMLStreamReader xmlr) throws XMLStreamException {
if(xmlr.getEventType() != XMLStreamConstants.START_ELEMENT) {
throw new XMLStreamException("parser must be on START_ELEMENT to read next text", xmlr.getLocation());
}
int eventType = xmlr.next();
StringBuffer content = new StringBuffer();
while(eventType != XMLStreamConstants.END_ELEMENT ) {
if(eventType == XMLStreamConstants.CHARACTERS
|| eventType == XMLStreamConstants.CDATA
|| eventType == XMLStreamConstants.SPACE
/* || eventType == XMLStreamConstants.ENTITY_REFERENCE*/) {
content.append(xmlr.getText());
} else if(eventType == XMLStreamConstants.PROCESSING_INSTRUCTION
|| eventType == XMLStreamConstants.COMMENT
|| eventType == XMLStreamConstants.ENTITY_REFERENCE) {
// skipping
} else if(eventType == XMLStreamConstants.END_DOCUMENT) {
throw new XMLStreamException("unexpected end of document when reading element text content");
} else if(eventType == XMLStreamConstants.START_ELEMENT) {
throw new XMLStreamException("element text content may not contain START_ELEMENT", xmlr.getLocation());
} else {
throw new XMLStreamException("Unexpected event type "+eventType, xmlr.getLocation());
}
eventType = xmlr.next();
}
return content.toString();
}
private Map<String,String> parseCompoundText (XMLStreamReader xmlr, String endTag) throws XMLStreamException {
Map<String,String> returnMap = new HashMap<String,String>();
String text = "";
while (true) {
int event = xmlr.next();
if (event == XMLStreamConstants.CHARACTERS) {
if (text != "") { text += "\n";}
text += xmlr.getText().trim().replace('\n',' ');
} else if (event == XMLStreamConstants.START_ELEMENT) {
if (xmlr.getLocalName().equals("ExtLink")) {
String mapKey = ("image".equalsIgnoreCase( xmlr.getAttributeValue(null, "role") ) || "logo".equalsIgnoreCase(xmlr.getAttributeValue(null, "title")))? "logo" : "url";
returnMap.put( mapKey, xmlr.getAttributeValue(null, "URI") );
parseText(xmlr, "ExtLink"); // this line effectively just skips though until the end of the tag
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals(endTag)) break;
}
}
returnMap.put( "name", text );
return returnMap;
}
private String parseText(XMLStreamReader xmlr, String endTag) throws XMLStreamException {
return (String) parseTextNew(xmlr,endTag);
}
private Object parseTextNew(XMLStreamReader xmlr, String endTag) throws XMLStreamException {
String returnString = "";
Map returnMap = null;
while (true) {
if (!returnString.equals("")) { returnString += "\n";}
int event = xmlr.next();
if (event == XMLStreamConstants.CHARACTERS) {
returnString += xmlr.getText().trim().replace('\n',' ');
} else if (event == XMLStreamConstants.START_ELEMENT) {
if (xmlr.getLocalName().equals("p")) {
returnString += "<p>" + parseText(xmlr, "p") + "</p>";
} else if (xmlr.getLocalName().equals("emph")) {
returnString += "<em>" + parseText(xmlr, "emph") + "</em>";
} else if (xmlr.getLocalName().equals("hi")) {
returnString += "<strong>" + parseText(xmlr, "hi") + "</strong>";
} else if (xmlr.getLocalName().equals("ExtLink")) {
String uri = xmlr.getAttributeValue(null, "URI");
String text = parseText(xmlr, "ExtLink").trim();
returnString += "<a href=\"" + uri + "\">" + ( StringUtil.isEmpty(text) ? uri : text) + "</a>";
} else if (xmlr.getLocalName().equals("list")) {
returnString += parseText_list(xmlr);
} else if (xmlr.getLocalName().equals("citation")) {
if (SOURCE_DVN_3_0.equals(xmlr.getAttributeValue(null, "source")) ) {
returnMap = parseDVNCitation(xmlr);
} else {
returnString += parseText_citation(xmlr);
}
} else {
throw new EJBException("ERROR occurred in mapDDI (parseText): tag not yet supported: <" + xmlr.getLocalName() + ">" );
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals(endTag)) break;
}
}
if (returnMap != null) {
// this is one of our new citation areas for DVN3.0
return returnMap;
}
// otherwise it's a standard section and just return the String like we always did
return returnString.trim();
}
private String parseNoteByType(XMLStreamReader xmlr, String type) throws XMLStreamException {
if (type.equalsIgnoreCase(xmlr.getAttributeValue(null, "type"))) {
return parseText(xmlr);
} else {
return null;
}
}
private String parseText_list (XMLStreamReader xmlr) throws XMLStreamException {
String listString = null;
String listCloseTag = null;
// check type
String listType = xmlr.getAttributeValue(null, "type");
if ("bulleted".equals(listType) ){
listString = "<ul>\n";
listCloseTag = "</ul>";
} else if ("ordered".equals(listType) ) {
listString = "<ol>\n";
listCloseTag = "</ol>";
} else {
// this includes the default list type of "simple"
throw new EJBException("ERROR occurred in mapDDI (parseText): ListType of types other than {bulleted, ordered} not currently supported.");
}
while (true) {
int event = xmlr.next();
if (event == XMLStreamConstants.START_ELEMENT) {
if (xmlr.getLocalName().equals("itm")) {
listString += "<li>" + parseText(xmlr,"itm") + "</li>\n";
} else {
throw new EJBException("ERROR occurred in mapDDI (parseText): ListType does not currently supported contained LabelType.");
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals("list")) break;
}
}
return (listString + listCloseTag);
}
private String parseText_citation (XMLStreamReader xmlr) throws XMLStreamException {
String citation = "<!-- parsed from DDI citation title and holdings -->";
boolean addHoldings = false;
String holdings = "";
while (true) {
int event = xmlr.next();
if (event == XMLStreamConstants.START_ELEMENT) {
if (xmlr.getLocalName().equals("titlStmt")) {
while (true) {
event = xmlr.next();
if (event == XMLStreamConstants.START_ELEMENT) {
if (xmlr.getLocalName().equals("titl")) {
citation += parseText(xmlr);
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals("titlStmt")) break;
}
}
} else if (xmlr.getLocalName().equals("holdings")) {
String uri = xmlr.getAttributeValue(null, "URI");
String holdingsText = parseText(xmlr);
if ( !StringUtil.isEmpty(uri) || !StringUtil.isEmpty(holdingsText)) {
holdings += addHoldings ? ", " : "";
addHoldings = true;
if ( StringUtil.isEmpty(uri) ) {
holdings += holdingsText;
} else if ( StringUtil.isEmpty(holdingsText) ) {
holdings += "<a href=\"" + uri + "\">" + uri + "</a>";
} else {
// both uri and text have values
holdings += "<a href=\"" + uri + "\">" + holdingsText + "</a>";
}
}
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals("citation")) break;
}
}
if (addHoldings) {
citation += " (" + holdings + ")";
}
return citation;
}
private String parseUNF(String unfString) {
if (unfString.indexOf("UNF:") != -1) {
return unfString.substring( unfString.indexOf("UNF:") );
} else {
return null;
}
}
private Map parseDVNCitation(XMLStreamReader xmlr) throws XMLStreamException {
Map returnValues = new HashMap();
while (true) {
int event = xmlr.next();
if (event == XMLStreamConstants.START_ELEMENT) {
if (xmlr.getLocalName().equals("IDNo")) {
returnValues.put("idType", xmlr.getAttributeValue(null, "agency") );
returnValues.put("idNumber", parseText(xmlr) );
}
else if (xmlr.getLocalName().equals("biblCit")) {
returnValues.put("text", parseText(xmlr) );
}
else if (xmlr.getLocalName().equals("holdings")) {
returnValues.put("url", xmlr.getAttributeValue(null, "URI") );
}
else if (xmlr.getLocalName().equals("notes")) {
if (NOTE_TYPE_REPLICATION_FOR.equals(xmlr.getAttributeValue(null, "type")) ) {
returnValues.put("replicationData", new Boolean(true));
}
}
} else if (event == XMLStreamConstants.END_ELEMENT) {
if (xmlr.getLocalName().equals("citation")) break;
}
}
return returnValues;
}
private void addToSet(HashSet<FieldDTO> set, String typeName, String value ) {
if (value!=null) {
set.add(FieldDTO.createPrimitiveFieldDTO(typeName, value));
}
}
}