package edu.harvard.iq.dataverse.export.ddi; import com.google.gson.Gson; import edu.harvard.iq.dataverse.DataFile; import edu.harvard.iq.dataverse.DataTable; import edu.harvard.iq.dataverse.Dataset; import edu.harvard.iq.dataverse.DatasetFieldConstant; import edu.harvard.iq.dataverse.DatasetVersion; import edu.harvard.iq.dataverse.FileMetadata; import edu.harvard.iq.dataverse.api.dto.DataVariableDTO; import edu.harvard.iq.dataverse.api.dto.DatasetDTO; import edu.harvard.iq.dataverse.api.dto.DatasetVersionDTO; import edu.harvard.iq.dataverse.api.dto.FieldDTO; import edu.harvard.iq.dataverse.api.dto.FileDTO; import edu.harvard.iq.dataverse.api.dto.FileMetadataDTO; import edu.harvard.iq.dataverse.api.dto.MetadataBlockDTO; import edu.harvard.iq.dataverse.datavariable.DataVariable; import edu.harvard.iq.dataverse.datavariable.SummaryStatistic; import edu.harvard.iq.dataverse.datavariable.VariableCategory; import edu.harvard.iq.dataverse.datavariable.VariableRange; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.LEVEL_FILE; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_TAG; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_SUBJECT_UNF; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_TAG; import static edu.harvard.iq.dataverse.export.DDIExportServiceBean.NOTE_TYPE_UNF; import static edu.harvard.iq.dataverse.util.SystemConfig.FQDN; import static edu.harvard.iq.dataverse.util.SystemConfig.SITE_URL; import edu.harvard.iq.dataverse.util.json.JsonUtil; import edu.harvard.iq.dataverse.util.xml.XmlPrinter; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; import java.net.InetAddress; import java.net.UnknownHostException; import java.nio.file.Files; import java.nio.file.Paths; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import java.util.logging.Level; import java.util.logging.Logger; import javax.json.JsonObject; import javax.xml.stream.XMLOutputFactory; import javax.xml.stream.XMLStreamException; import javax.xml.stream.XMLStreamWriter; public class DdiExportUtil { private static final Logger logger = Logger.getLogger(DdiExportUtil.class.getCanonicalName()); public static final String NOTE_TYPE_CONTENTTYPE = "DATAVERSE:CONTENTTYPE"; public static final String NOTE_SUBJECT_CONTENTTYPE = "Content/MIME Type"; public static String datasetDtoAsJson2ddi(String datasetDtoAsJson) { logger.fine(JsonUtil.prettyPrint(datasetDtoAsJson)); Gson gson = new Gson(); DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson, DatasetDTO.class); try { return dto2ddi(datasetDto); } catch (XMLStreamException ex) { Logger.getLogger(DdiExportUtil.class.getName()).log(Level.SEVERE, null, ex); return null; } } // "short" ddi, without the "<fileDscr>" and "<dataDscr>/<var>" sections: public static void datasetJson2ddi(JsonObject datasetDtoAsJson, OutputStream outputStream) throws XMLStreamException { logger.fine(JsonUtil.prettyPrint(datasetDtoAsJson.toString())); Gson gson = new Gson(); DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson.toString(), DatasetDTO.class); dtoddi(datasetDto, outputStream); } private static String dto2ddi(DatasetDTO datasetDto) throws XMLStreamException { OutputStream outputStream = new ByteArrayOutputStream(); dtoddi(datasetDto, outputStream); String xml = outputStream.toString(); return XmlPrinter.prettyPrintXml(xml); } private static void dtoddi(DatasetDTO datasetDto, OutputStream outputStream) throws XMLStreamException { XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("codeBook"); xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", "ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd"); writeAttribute(xmlw, "version", "2.5"); createStdyDscr(xmlw, datasetDto); createOtherMats(xmlw, datasetDto.getDatasetVersion().getFiles()); xmlw.writeEndElement(); // codeBook xmlw.flush(); } // "full" ddi, with the the "<fileDscr>" and "<dataDscr>/<var>" sections: public static void datasetJson2ddi(JsonObject datasetDtoAsJson, DatasetVersion version, OutputStream outputStream) throws XMLStreamException { logger.fine(JsonUtil.prettyPrint(datasetDtoAsJson.toString())); Gson gson = new Gson(); DatasetDTO datasetDto = gson.fromJson(datasetDtoAsJson.toString(), DatasetDTO.class); XMLStreamWriter xmlw = XMLOutputFactory.newInstance().createXMLStreamWriter(outputStream); xmlw.writeStartElement("codeBook"); xmlw.writeDefaultNamespace("ddi:codebook:2_5"); xmlw.writeAttribute("xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance"); xmlw.writeAttribute("xsi:schemaLocation", "ddi:codebook:2_5 http://www.ddialliance.org/Specification/DDI-Codebook/2.5/XMLSchema/codebook.xsd"); writeAttribute(xmlw, "version", "2.5"); createStdyDscr(xmlw, datasetDto); createFileDscr(xmlw, version); createDataDscr(xmlw, version); createOtherMatsFromFileMetadatas(xmlw, version.getFileMetadatas()); xmlw.writeEndElement(); // codeBook xmlw.flush(); } /** * @todo This is just a stub, copied from DDIExportServiceBean. It should * produce valid DDI based on * http://guides.dataverse.org/en/latest/developers/tools.html#msv but it is * incomplete and will be worked on as part of * https://github.com/IQSS/dataverse/issues/2579 . We'll want to reference * the DVN 3.x code for creating a complete DDI. * * @todo Rename this from "study" to "dataset". */ private static void createStdyDscr(XMLStreamWriter xmlw, DatasetDTO datasetDto) throws XMLStreamException { DatasetVersionDTO version = datasetDto.getDatasetVersion(); String persistentProtocol = datasetDto.getProtocol(); String persistentAgency = persistentProtocol; // The "persistentAgency" tag is used for the "agency" attribute of the // <IDNo> ddi section; back in the DVN3 days we used "handle" and "DOI" // for the 2 supported protocols, respectively. For the sake of backward // compatibility, we should probably stick with these labels: (-- L.A. 4.5) if ("hdl".equals(persistentAgency)) { persistentAgency = "handle"; } else if ("doi".equals(persistentAgency)) { persistentAgency = "DOI"; } String persistentAuthority = datasetDto.getAuthority(); String persistentId = datasetDto.getIdentifier(); //docDesc Block writeDocDescElement (xmlw, datasetDto); //stdyDesc Block xmlw.writeStartElement("stdyDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title)); writeFullElement(xmlw, "subTitl", dto2Primitive(version, DatasetFieldConstant.subTitle)); writeFullElement(xmlw, "altTitl", dto2Primitive(version, DatasetFieldConstant.alternativeTitle)); xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt writeAuthorsElement(xmlw, version); writeProducersElement(xmlw, version); xmlw.writeStartElement("distStmt"); writeFullElement(xmlw, "distrbtr", datasetDto.getPublisher()); writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt xmlw.writeEndElement(); // citation //End Citation Block //Start Study Info Block // Study Info xmlw.writeStartElement("stdyInfo"); writeSubjectElement(xmlw, version); //Subject and Keywords writeAbstractElement(xmlw, version); // Description writeFullElement(xmlw, "notes", dto2Primitive(version, DatasetFieldConstant.notesText)); writeSummaryDescriptionElement(xmlw, version); writeRelPublElement(xmlw, version); writeOtherIdElement(xmlw, version); writeDistributorsElement(xmlw, version); writeContactsElement(xmlw, version); writeFullElement(xmlw, "depositr", dto2Primitive(version, DatasetFieldConstant.depositor)); writeFullElement(xmlw, "depDate", dto2Primitive(version, DatasetFieldConstant.dateOfDeposit)); writeFullElementList(xmlw, "relMat", dto2PrimitiveList(version, DatasetFieldConstant.relatedMaterial)); writeFullElementList(xmlw, "relStdy", dto2PrimitiveList(version, DatasetFieldConstant.relatedDatasets)); writeFullElementList(xmlw, "othRefs", dto2PrimitiveList(version, DatasetFieldConstant.otherReferences)); writeSeriesElement(xmlw, version); writeSoftwareElement(xmlw, version); writeFullElementList(xmlw, "dataSrc", dto2PrimitiveList(version, DatasetFieldConstant.dataSources)); writeFullElement(xmlw, "srcOrig", dto2Primitive(version, DatasetFieldConstant.originOfSources)); writeFullElement(xmlw, "srcChar", dto2Primitive(version, DatasetFieldConstant.characteristicOfSources)); writeFullElement(xmlw, "srcDocu", dto2Primitive(version, DatasetFieldConstant.accessToSources)); xmlw.writeEndElement(); // stdyInfo // End Info Block //Social Science Metadata block writeMethodElement(xmlw, version); //Terms of Use and Access writeFullElement(xmlw, "useStmt", version.getTermsOfUse()); writeFullElement(xmlw, "confDec", version.getConfidentialityDeclaration()); writeFullElement(xmlw, "specPerm", version.getSpecialPermissions()); writeFullElement(xmlw, "restrctn", version.getRestrictions()); writeFullElement(xmlw, "citeReq", version.getCitationRequirements()); writeFullElement(xmlw, "deposReq", version.getDepositorRequirements()); writeFullElement(xmlw, "dataAccs", version.getTermsOfAccess()); writeFullElement(xmlw, "accsPlac", version.getDataAccessPlace()); writeFullElement(xmlw, "conditions", version.getConditions()); writeFullElement(xmlw, "disclaimer", version.getDisclaimer()); writeFullElement(xmlw, "origArch", version.getOriginalArchive()); writeFullElement(xmlw, "avlStatus", version.getAvailabilityStatus()); writeFullElement(xmlw, "contact", version.getContactForAccess()); writeFullElement(xmlw, "collSize", version.getSizeOfCollection()); writeFullElement(xmlw, "complete", version.getStudyCompletion()); xmlw.writeEndElement(); // stdyDscr } private static void writeDocDescElement (XMLStreamWriter xmlw, DatasetDTO datasetDto) throws XMLStreamException { DatasetVersionDTO version = datasetDto.getDatasetVersion(); String persistentProtocol = datasetDto.getProtocol(); String persistentAgency = persistentProtocol; // The "persistentAgency" tag is used for the "agency" attribute of the // <IDNo> ddi section; back in the DVN3 days we used "handle" and "DOI" // for the 2 supported protocols, respectively. For the sake of backward // compatibility, we should probably stick with these labels: (-- L.A. 4.5) if ("hdl".equals(persistentAgency)) { persistentAgency = "handle"; } else if ("doi".equals(persistentAgency)) { persistentAgency = "DOI"; } String persistentAuthority = datasetDto.getAuthority(); String persistentId = datasetDto.getIdentifier(); xmlw.writeStartElement("docDscr"); xmlw.writeStartElement("citation"); xmlw.writeStartElement("titlStmt"); writeFullElement(xmlw, "titl", dto2Primitive(version, DatasetFieldConstant.title)); xmlw.writeStartElement("IDNo"); writeAttribute(xmlw, "agency", persistentAgency); xmlw.writeCharacters(persistentProtocol + ":" + persistentAuthority + "/" + persistentId); xmlw.writeEndElement(); // IDNo xmlw.writeEndElement(); // titlStmt xmlw.writeStartElement("distStmt"); writeFullElement(xmlw, "distrbtr", datasetDto.getPublisher()); writeFullElement(xmlw, "distDate", datasetDto.getPublicationDate()); xmlw.writeEndElement(); // diststmt writeVersionStatement(xmlw, version); xmlw.writeStartElement("biblCit"); xmlw.writeCharacters(version.getCitation()); xmlw.writeEndElement(); // biblCit xmlw.writeEndElement(); // citation xmlw.writeEndElement(); // docDscr } private static void writeVersionStatement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ xmlw.writeStartElement("verStmt"); writeAttribute(xmlw,"source","DVN"); xmlw.writeStartElement("version"); writeAttribute(xmlw,"date", datasetVersionDTO.getReleaseTime().substring(0, 10)); writeAttribute(xmlw,"type", datasetVersionDTO.getVersionState().toString()); xmlw.writeCharacters(datasetVersionDTO.getVersionNumber().toString()); xmlw.writeEndElement(); // version xmlw.writeEndElement(); // verStmt } private static void writeSummaryDescriptionElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { xmlw.writeStartElement("sumDscr"); for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { Integer per = 0; Integer coll = 0; for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.timePeriodCovered.equals(fieldDTO.getTypeName())) { String dateValStart = ""; String dateValEnd = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { per++; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.timePeriodCoveredStart.equals(next.getTypeName())) { dateValStart = next.getSinglePrimitive(); } if (DatasetFieldConstant.timePeriodCoveredEnd.equals(next.getTypeName())) { dateValEnd = next.getSinglePrimitive(); } } if (!dateValStart.isEmpty()) { writeDateElement(xmlw, "timePrd", "P"+ per.toString(), "start", dateValStart ); } if (!dateValEnd.isEmpty()) { writeDateElement(xmlw, "timePrd", "P"+ per.toString(), "end", dateValEnd ); } } } if (DatasetFieldConstant.dateOfCollection.equals(fieldDTO.getTypeName())) { String dateValStart = ""; String dateValEnd = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { coll++; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.dateOfCollectionStart.equals(next.getTypeName())) { dateValStart = next.getSinglePrimitive(); } if (DatasetFieldConstant.dateOfCollectionEnd.equals(next.getTypeName())) { dateValEnd = next.getSinglePrimitive(); } } if (!dateValStart.isEmpty()) { writeDateElement(xmlw, "collDate", "P"+ coll.toString(), "start", dateValStart ); } if (!dateValEnd.isEmpty()) { writeDateElement(xmlw, "collDate", "P"+ coll.toString(), "end", dateValEnd ); } } } if (DatasetFieldConstant.kindOfData.equals(fieldDTO.getTypeName())) { writeMultipleElement(xmlw, "dataKind", fieldDTO); } } } if("geospatial".equals(key)){ for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.geographicCoverage.equals(fieldDTO.getTypeName())) { for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.country.equals(next.getTypeName())) { writeFullElement(xmlw, "nation", next.getSinglePrimitive()); } if (DatasetFieldConstant.city.equals(next.getTypeName())) { writeFullElement(xmlw, "geogCover", next.getSinglePrimitive()); } if (DatasetFieldConstant.state.equals(next.getTypeName())) { writeFullElement(xmlw, "geogCover", next.getSinglePrimitive()); } if (DatasetFieldConstant.otherGeographicCoverage.equals(next.getTypeName())) { writeFullElement(xmlw, "geogCover", next.getSinglePrimitive()); } } } } if (DatasetFieldConstant.geographicBoundingBox.equals(fieldDTO.getTypeName())) { for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.westLongitude.equals(next.getTypeName())) { writeFullElement(xmlw, "westBL", next.getSinglePrimitive()); } if (DatasetFieldConstant.eastLongitude.equals(next.getTypeName())) { writeFullElement(xmlw, "eastBL", next.getSinglePrimitive()); } if (DatasetFieldConstant.northLatitude.equals(next.getTypeName())) { writeFullElement(xmlw, "northBL", next.getSinglePrimitive()); } if (DatasetFieldConstant.southLatitude.equals(next.getTypeName())) { writeFullElement(xmlw, "southBL", next.getSinglePrimitive()); } } } } } writeFullElementList(xmlw, "geogUnit", dto2PrimitiveList(datasetVersionDTO, DatasetFieldConstant.geographicUnit)); } if("socialscience".equals(key)){ for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.universe.equals(fieldDTO.getTypeName())) { writeMultipleElement(xmlw, "universe", fieldDTO); } if (DatasetFieldConstant.unitOfAnalysis.equals(fieldDTO.getTypeName())) { writeMultipleElement(xmlw, "anlyUnit", fieldDTO); } } } } xmlw.writeEndElement(); //sumDscr } private static void writeMultipleElement(XMLStreamWriter xmlw, String element, FieldDTO fieldDTO) throws XMLStreamException { for (String value : fieldDTO.getMultiplePrimitive()) { writeFullElement(xmlw, element, value); } } private static void writeDateElement(XMLStreamWriter xmlw, String element, String cycle, String event, String dateIn) throws XMLStreamException { xmlw.writeStartElement(element); writeAttribute(xmlw, "cycle", cycle); writeAttribute(xmlw, "event", event); writeAttribute(xmlw, "date", dateIn); xmlw.writeCharacters(dateIn); xmlw.writeEndElement(); } private static void writeMethodElement(XMLStreamWriter xmlw , DatasetVersionDTO version) throws XMLStreamException{ xmlw.writeStartElement("method"); xmlw.writeStartElement("dataColl"); writeFullElement(xmlw, "timeMeth", dto2Primitive(version, DatasetFieldConstant.timeMethod)); writeFullElement(xmlw, "dataCollector", dto2Primitive(version, DatasetFieldConstant.dataCollector)); writeFullElement(xmlw, "collectorTraining", dto2Primitive(version, DatasetFieldConstant.collectorTraining)); writeFullElement(xmlw, "frequenc", dto2Primitive(version, DatasetFieldConstant.frequencyOfDataCollection)); writeFullElement(xmlw, "sampProc", dto2Primitive(version, DatasetFieldConstant.samplingProcedure)); writeTargetSampleElement(xmlw, version); writeFullElement(xmlw, "deviat", dto2Primitive(version, DatasetFieldConstant.deviationsFromSampleDesign)); writeFullElement(xmlw, "collMode", dto2Primitive(version, DatasetFieldConstant.collectionMode)); writeFullElement(xmlw, "resInstru", dto2Primitive(version, DatasetFieldConstant.researchInstrument)); writeFullElement(xmlw, "collSitu", dto2Primitive(version, DatasetFieldConstant.dataCollectionSituation)); writeFullElement(xmlw, "actMin", dto2Primitive(version, DatasetFieldConstant.actionsToMinimizeLoss)); writeFullElement(xmlw, "conOps", dto2Primitive(version, DatasetFieldConstant.controlOperations)); writeFullElement(xmlw, "weight", dto2Primitive(version, DatasetFieldConstant.weighting)); writeFullElement(xmlw, "cleanOps", dto2Primitive(version, DatasetFieldConstant.cleaningOperations)); xmlw.writeEndElement(); //dataColl xmlw.writeStartElement("anlyInfo"); writeFullElement(xmlw, "anylInfo", dto2Primitive(version, DatasetFieldConstant.datasetLevelErrorNotes)); writeFullElement(xmlw, "respRate", dto2Primitive(version, DatasetFieldConstant.responseRate)); writeFullElement(xmlw, "estSmpErr", dto2Primitive(version, DatasetFieldConstant.samplingErrorEstimates)); writeFullElement(xmlw, "dataAppr", dto2Primitive(version, DatasetFieldConstant.otherDataAppraisal)); xmlw.writeEndElement(); //anlyInfo writeNotesElement(xmlw, version); xmlw.writeEndElement();//method } private static void writeSubjectElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException{ //Key Words and Topic Classification xmlw.writeStartElement("subject"); for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.subject.equals(fieldDTO.getTypeName())){ for ( String subject : fieldDTO.getMultipleVocab()){ xmlw.writeStartElement("keyword"); xmlw.writeCharacters(subject); xmlw.writeEndElement(); //Keyword } } if (DatasetFieldConstant.keyword.equals(fieldDTO.getTypeName())) { for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { String keywordValue = ""; String keywordVocab = ""; String keywordURI = ""; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.keywordValue.equals(next.getTypeName())) { keywordValue = next.getSinglePrimitive(); } if (DatasetFieldConstant.keywordVocab.equals(next.getTypeName())) { keywordVocab = next.getSinglePrimitive(); } if (DatasetFieldConstant.keywordVocabURI.equals(next.getTypeName())) { keywordURI = next.getSinglePrimitive(); } } if (!keywordValue.isEmpty()){ xmlw.writeStartElement("keyword"); if(!keywordVocab.isEmpty()){ writeAttribute(xmlw,"vocab",keywordVocab); } if(!keywordURI.isEmpty()){ writeAttribute(xmlw,"URI",keywordURI); } xmlw.writeCharacters(keywordValue); xmlw.writeEndElement(); //Keyword } } } if (DatasetFieldConstant.topicClassification.equals(fieldDTO.getTypeName())) { for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { String topicClassificationValue = ""; String topicClassificationVocab = ""; String topicClassificationURI = ""; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.topicClassValue.equals(next.getTypeName())) { topicClassificationValue = next.getSinglePrimitive(); } if (DatasetFieldConstant.topicClassVocab.equals(next.getTypeName())) { topicClassificationVocab = next.getSinglePrimitive(); } if (DatasetFieldConstant.topicClassVocabURI.equals(next.getTypeName())) { topicClassificationURI = next.getSinglePrimitive(); } } if (!topicClassificationValue.isEmpty()){ xmlw.writeStartElement("topcClas"); if(!topicClassificationVocab.isEmpty()){ writeAttribute(xmlw,"vocab",topicClassificationVocab); } if(!topicClassificationURI.isEmpty()){ writeAttribute(xmlw,"URI",topicClassificationURI); } xmlw.writeCharacters(topicClassificationValue); xmlw.writeEndElement(); //topcClas } } } } } } xmlw.writeEndElement(); // subject } private static void writeAuthorsElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.author.equals(fieldDTO.getTypeName())) { xmlw.writeStartElement("rspStmt"); String authorName = ""; String authorAffiliation = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.authorName.equals(next.getTypeName())) { authorName = next.getSinglePrimitive(); } if (DatasetFieldConstant.authorAffiliation.equals(next.getTypeName())) { authorAffiliation = next.getSinglePrimitive(); } } if (!authorName.isEmpty()){ xmlw.writeStartElement("AuthEnty"); if(!authorAffiliation.isEmpty()){ writeAttribute(xmlw,"affiliation",authorAffiliation); } xmlw.writeCharacters(authorName); xmlw.writeEndElement(); //AuthEnty } } xmlw.writeEndElement(); //rspStmt } } } } } private static void writeContactsElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.datasetContact.equals(fieldDTO.getTypeName())) { String datasetContactName = ""; String datasetContactAffiliation = ""; String datasetContactEmail = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.datasetContactName.equals(next.getTypeName())) { datasetContactName = next.getSinglePrimitive(); } if (DatasetFieldConstant.datasetContactAffiliation.equals(next.getTypeName())) { datasetContactAffiliation = next.getSinglePrimitive(); } if (DatasetFieldConstant.datasetContactEmail.equals(next.getTypeName())) { datasetContactEmail = next.getSinglePrimitive(); } } if (!datasetContactName.isEmpty()){ xmlw.writeStartElement("contact"); if(!datasetContactAffiliation.isEmpty()){ writeAttribute(xmlw,"affiliation",datasetContactAffiliation); } if(!datasetContactEmail.isEmpty()){ writeAttribute(xmlw,"email",datasetContactEmail); } xmlw.writeCharacters(datasetContactName); xmlw.writeEndElement(); //AuthEnty } } } } } } } private static void writeProducersElement(XMLStreamWriter xmlw, DatasetVersionDTO version) throws XMLStreamException { xmlw.writeStartElement("prodStmt"); for (Map.Entry<String, MetadataBlockDTO> entry : version.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.producer.equals(fieldDTO.getTypeName())) { for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { String producerName = ""; String producerAffiliation = ""; String producerAbbreviation = ""; String producerLogo = ""; String producerURL = ""; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.producerName.equals(next.getTypeName())) { producerName = next.getSinglePrimitive(); } if (DatasetFieldConstant.producerAffiliation.equals(next.getTypeName())) { producerAffiliation = next.getSinglePrimitive(); } if (DatasetFieldConstant.producerAbbreviation.equals(next.getTypeName())) { producerAbbreviation = next.getSinglePrimitive(); } if (DatasetFieldConstant.producerLogo.equals(next.getTypeName())) { producerLogo = next.getSinglePrimitive(); } if (DatasetFieldConstant.producerURL.equals(next.getTypeName())) { producerURL = next.getSinglePrimitive(); } } if (!producerName.isEmpty()) { xmlw.writeStartElement("producer"); if (!producerAffiliation.isEmpty()) { writeAttribute(xmlw, "affiliation", producerAffiliation); } if (!producerAbbreviation.isEmpty()) { writeAttribute(xmlw, "abbr", producerAbbreviation); } if (!producerLogo.isEmpty()) { writeAttribute(xmlw, "role", producerLogo); } if (!producerURL.isEmpty()) { writeAttribute(xmlw, "URI", producerURL); } xmlw.writeCharacters(producerName); xmlw.writeEndElement(); //AuthEnty } } } } } } writeFullElement(xmlw, "prodDate", dto2Primitive(version, DatasetFieldConstant.productionDate)); writeFullElement(xmlw, "prodPlac", dto2Primitive(version, DatasetFieldConstant.productionPlace)); writeGrantElement(xmlw, version); xmlw.writeEndElement(); //prodStmt } private static void writeDistributorsElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.distributor.equals(fieldDTO.getTypeName())) { xmlw.writeStartElement("distrbtr"); for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { String distributorName = ""; String distributorAffiliation = ""; String distributorAbbreviation = ""; String distributorURL = ""; String distributorLogoURL = ""; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.distributorName.equals(next.getTypeName())) { distributorName = next.getSinglePrimitive(); } if (DatasetFieldConstant.distributorAffiliation.equals(next.getTypeName())) { distributorAffiliation = next.getSinglePrimitive(); } if (DatasetFieldConstant.distributorAbbreviation.equals(next.getTypeName())) { distributorAbbreviation = next.getSinglePrimitive(); } if (DatasetFieldConstant.distributorURL.equals(next.getTypeName())) { distributorURL = next.getSinglePrimitive(); } if (DatasetFieldConstant.distributorLogo.equals(next.getTypeName())) { distributorLogoURL = next.getSinglePrimitive(); } } if (!distributorName.isEmpty()) { xmlw.writeStartElement("distrbtr"); if (!distributorAffiliation.isEmpty()) { writeAttribute(xmlw, "affiliation", distributorAffiliation); } if (!distributorAbbreviation.isEmpty()) { writeAttribute(xmlw, "abbr", distributorAbbreviation); } if (!distributorURL.isEmpty()) { writeAttribute(xmlw, "URI", distributorURL); } if (!distributorLogoURL.isEmpty()) { writeAttribute(xmlw, "role", distributorLogoURL); } xmlw.writeCharacters(distributorName); xmlw.writeEndElement(); //AuthEnty } } xmlw.writeEndElement(); //rspStmt } } } } } private static void writeRelPublElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.publication.equals(fieldDTO.getTypeName())) { for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { String pubString = ""; String citation = ""; String IDType = ""; String IDNo = ""; String url = ""; for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.publicationCitation.equals(next.getTypeName())) { citation = next.getSinglePrimitive(); } if (DatasetFieldConstant.publicationIDType.equals(next.getTypeName())) { IDType = next.getSinglePrimitive(); } if (DatasetFieldConstant.publicationIDNumber.equals(next.getTypeName())) { IDNo = next.getSinglePrimitive(); } if (DatasetFieldConstant.publicationURL.equals(next.getTypeName())) { url = next.getSinglePrimitive(); } } pubString = appendCommaSeparatedValue(citation, IDType); pubString = appendCommaSeparatedValue(pubString, IDNo); pubString = appendCommaSeparatedValue(pubString, url); if (!pubString.isEmpty()){ xmlw.writeStartElement("relPubl"); xmlw.writeCharacters(pubString); xmlw.writeEndElement(); //relPubl } } } } } } } private static String appendCommaSeparatedValue(String inVal, String next) { if (!next.isEmpty()) { if (!inVal.isEmpty()) { return inVal + ", " + next; } else { return next; } } return inVal; } private static void writeAbstractElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.description.equals(fieldDTO.getTypeName())) { String descriptionText = ""; String descriptionDate = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.descriptionText.equals(next.getTypeName())) { descriptionText = next.getSinglePrimitive(); } if (DatasetFieldConstant.descriptionDate.equals(next.getTypeName())) { descriptionDate = next.getSinglePrimitive(); } } if (!descriptionText.isEmpty()){ xmlw.writeStartElement("abstract"); if(!descriptionDate.isEmpty()){ writeAttribute(xmlw,"date",descriptionDate); } xmlw.writeCharacters(descriptionText); xmlw.writeEndElement(); //abstract } } } } } } } private static void writeGrantElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.grantNumber.equals(fieldDTO.getTypeName())) { String grantNumber = ""; String grantAgency = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.grantNumberValue.equals(next.getTypeName())) { grantNumber = next.getSinglePrimitive(); } if (DatasetFieldConstant.grantNumberAgency.equals(next.getTypeName())) { grantAgency = next.getSinglePrimitive(); } } if (!grantNumber.isEmpty()){ xmlw.writeStartElement("grantNo"); if(!grantAgency.isEmpty()){ writeAttribute(xmlw,"agency",grantAgency); } xmlw.writeCharacters(grantNumber); xmlw.writeEndElement(); //grantno } } } } } } } private static void writeOtherIdElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.otherId.equals(fieldDTO.getTypeName())) { String otherId = ""; String otherIdAgency = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.otherIdValue.equals(next.getTypeName())) { otherId = next.getSinglePrimitive(); } if (DatasetFieldConstant.otherIdAgency.equals(next.getTypeName())) { otherIdAgency = next.getSinglePrimitive(); } } if (!otherId.isEmpty()){ xmlw.writeStartElement("IDNo"); if(!otherIdAgency.isEmpty()){ writeAttribute(xmlw,"agency",otherIdAgency); } xmlw.writeCharacters(otherId); xmlw.writeEndElement(); //IDNo } } } } } } } private static void writeSoftwareElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.software.equals(fieldDTO.getTypeName())) { String softwareName = ""; String softwareVersion = ""; for (HashSet<FieldDTO> foo : fieldDTO.getMultipleCompound()) { for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.softwareName.equals(next.getTypeName())) { softwareName = next.getSinglePrimitive(); } if (DatasetFieldConstant.softwareVersion.equals(next.getTypeName())) { softwareVersion = next.getSinglePrimitive(); } } if (!softwareName.isEmpty()){ xmlw.writeStartElement("software"); if(!softwareVersion.isEmpty()){ writeAttribute(xmlw,"version",softwareVersion); } xmlw.writeCharacters(softwareName); xmlw.writeEndElement(); //software } } } } } } } private static void writeSeriesElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("citation".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.series.equals(fieldDTO.getTypeName())) { xmlw.writeStartElement("serStmt"); String seriesName = ""; String seriesInformation = ""; Set<FieldDTO> foo = fieldDTO.getSingleCompound(); for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.seriesName.equals(next.getTypeName())) { seriesName = next.getSinglePrimitive(); } if (DatasetFieldConstant.seriesInformation.equals(next.getTypeName())) { seriesInformation = next.getSinglePrimitive(); } } if (!seriesName.isEmpty()){ xmlw.writeStartElement("serName"); xmlw.writeCharacters(seriesName); xmlw.writeEndElement(); //grantno } if (!seriesInformation.isEmpty()){ xmlw.writeStartElement("serInfo"); xmlw.writeCharacters(seriesInformation); xmlw.writeEndElement(); //grantno } xmlw.writeEndElement(); //serStmt } } } } } private static void writeTargetSampleElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("socialscience".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.targetSampleSize.equals(fieldDTO.getTypeName())) { String sizeFormula = ""; String actualSize = ""; Set<FieldDTO> foo = fieldDTO.getSingleCompound(); for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.targetSampleSizeFormula.equals(next.getTypeName())) { sizeFormula = next.getSinglePrimitive(); } if (DatasetFieldConstant.targetSampleActualSize.equals(next.getTypeName())) { actualSize = next.getSinglePrimitive(); } } if (!sizeFormula.isEmpty()) { xmlw.writeStartElement("sampleSizeFormula"); xmlw.writeCharacters(sizeFormula); xmlw.writeEndElement(); //sampleSizeFormula } if (!actualSize.isEmpty()) { xmlw.writeStartElement("sampleSize"); xmlw.writeCharacters(actualSize); xmlw.writeEndElement(); //sampleSize } } } } } } private static void writeNotesElement(XMLStreamWriter xmlw, DatasetVersionDTO datasetVersionDTO) throws XMLStreamException { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { String key = entry.getKey(); MetadataBlockDTO value = entry.getValue(); if ("socialscience".equals(key)) { for (FieldDTO fieldDTO : value.getFields()) { if (DatasetFieldConstant.socialScienceNotes.equals(fieldDTO.getTypeName())) { String notesText = ""; String notesType = ""; String notesSubject= ""; Set<FieldDTO> foo = fieldDTO.getSingleCompound(); for (Iterator<FieldDTO> iterator = foo.iterator(); iterator.hasNext();) { FieldDTO next = iterator.next(); if (DatasetFieldConstant.socialScienceNotesText.equals(next.getTypeName())) { notesText = next.getSinglePrimitive(); } if (DatasetFieldConstant.socialScienceNotesType.equals(next.getTypeName())) { notesType = next.getSinglePrimitive(); } if (DatasetFieldConstant.socialScienceNotesSubject.equals(next.getTypeName())) { notesSubject = next.getSinglePrimitive(); } } if (!notesText.isEmpty()) { xmlw.writeStartElement("notes"); if(!notesType.isEmpty()){ writeAttribute(xmlw,"type",notesType); } if(!notesSubject.isEmpty()){ writeAttribute(xmlw,"subject",notesSubject); } xmlw.writeCharacters(notesText); xmlw.writeEndElement(); } } } } } } // TODO: // see if there's more information that we could encode in this otherMat. // contentType? Unfs and such? (in the "short" DDI that is being used for // harvesting *all* files are encoded as otherMats; even tabular ones. private static void createOtherMats(XMLStreamWriter xmlw, List<FileDTO> fileDtos) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: String dataverseUrl = getDataverseSiteUrl(); for (FileDTO fileDTo : fileDtos) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, // tabular ones - in fileDscr sections. (fileDscr sections have special fields for numbers of variables // and observations, etc.) if (fileDTo.getDataFile().getDataTables() == null || fileDTo.getDataFile().getDataTables().isEmpty()) { xmlw.writeStartElement("otherMat"); writeAttribute(xmlw, "ID", "f" + fileDTo.getDataFile().getId()); writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileDTo.getDataFile().getId()); writeAttribute(xmlw, "level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileDTo.getDataFile().getFilename()); xmlw.writeEndElement(); // labl writeFileDescription(xmlw, fileDTo); // there's no readily available field in the othermat section // for the content type (aka mime type); so we'll store it in this // specially formatted notes section: String contentType = fileDTo.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); writeAttribute(xmlw, "level", LEVEL_FILE); writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } xmlw.writeEndElement(); // otherMat } } } // An alternative version of the createOtherMats method - this one is used // when a "full" DDI is being cooked; just like the fileDscr and data/var sections methods, // it operates on the list of FileMetadata entities, not on File DTOs. This is because // DTOs do not support "tabular", variable-level metadata yet. And we need to be able to // tell if this file is in fact tabular data - so that we know if it needs an // otherMat, or a fileDscr section. // -- L.A. 4.5 private static void createOtherMatsFromFileMetadatas(XMLStreamWriter xmlw, List<FileMetadata> fileMetadatas) throws XMLStreamException { // The preferred URL for this dataverse, for cooking up the file access API links: String dataverseUrl = getDataverseSiteUrl(); for (FileMetadata fileMetadata : fileMetadatas) { // We'll continue using the scheme we've used before, in DVN2-3: non-tabular files are put into otherMat, // tabular ones - in fileDscr sections. (fileDscr sections have special fields for numbers of variables // and observations, etc.) if (fileMetadata.getDataFile() != null && !fileMetadata.getDataFile().isTabularData()) { xmlw.writeStartElement("otherMat"); writeAttribute(xmlw, "ID", "f" + fileMetadata.getDataFile().getId()); writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + fileMetadata.getDataFile().getId()); writeAttribute(xmlw, "level", "datafile"); xmlw.writeStartElement("labl"); xmlw.writeCharacters(fileMetadata.getLabel()); xmlw.writeEndElement(); // labl String description = fileMetadata.getDescription(); if (description != null) { xmlw.writeStartElement("txt"); xmlw.writeCharacters(description); xmlw.writeEndElement(); // txt } // there's no readily available field in the othermat section // for the content type (aka mime type); so we'll store it in this // specially formatted notes section: String contentType = fileMetadata.getDataFile().getContentType(); if (!StringUtilisEmpty(contentType)) { xmlw.writeStartElement("notes"); writeAttribute(xmlw, "level", LEVEL_FILE); writeAttribute(xmlw, "type", NOTE_TYPE_CONTENTTYPE); writeAttribute(xmlw, "subject", NOTE_SUBJECT_CONTENTTYPE); xmlw.writeCharacters(contentType); xmlw.writeEndElement(); // notes } xmlw.writeEndElement(); // otherMat } } } private static void writeFileDescription(XMLStreamWriter xmlw, FileDTO fileDTo) throws XMLStreamException { xmlw.writeStartElement("txt"); String description = fileDTo.getDataFile().getDescription(); if (description != null) { xmlw.writeCharacters(description); } xmlw.writeEndElement(); // txt } private static String dto2Primitive(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { MetadataBlockDTO value = entry.getValue(); for (FieldDTO fieldDTO : value.getFields()) { if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { return fieldDTO.getSinglePrimitive(); } } } return null; } private static List<String> dto2PrimitiveList(DatasetVersionDTO datasetVersionDTO, String datasetFieldTypeName) { for (Map.Entry<String, MetadataBlockDTO> entry : datasetVersionDTO.getMetadataBlocks().entrySet()) { MetadataBlockDTO value = entry.getValue(); for (FieldDTO fieldDTO : value.getFields()) { if (datasetFieldTypeName.equals(fieldDTO.getTypeName())) { return fieldDTO.getMultiplePrimitive(); } } } return null; } private static void writeFullElementList(XMLStreamWriter xmlw, String name, List<String> values) throws XMLStreamException { //For the simplest Elements we can if (values != null && !values.isEmpty()) { for (String value : values) { xmlw.writeStartElement(name); xmlw.writeCharacters(value); xmlw.writeEndElement(); // labl } } } private static void writeFullElement (XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { //For the simplest Elements we can if (!StringUtilisEmpty(value)) { xmlw.writeStartElement(name); xmlw.writeCharacters(value); xmlw.writeEndElement(); // labl } } private static void writeAttribute(XMLStreamWriter xmlw, String name, String value) throws XMLStreamException { if (!StringUtilisEmpty(value)) { xmlw.writeAttribute(name, value); } } private static boolean StringUtilisEmpty(String str) { if (str == null || str.trim().equals("")) { return true; } return false; } private static void saveJsonToDisk(String datasetVersionAsJson) throws IOException { Files.write(Paths.get("/tmp/out.json"), datasetVersionAsJson.getBytes()); } /** * The "official", designated URL of the site; * can be defined as a complete URL; or derived from the * "official" hostname. If none of these options is set, * defaults to the InetAddress.getLocalHOst() and https; */ private static String getDataverseSiteUrl() { String hostUrl = System.getProperty(SITE_URL); if (hostUrl != null && !"".equals(hostUrl)) { return hostUrl; } String hostName = System.getProperty(FQDN); if (hostName == null) { try { hostName = InetAddress.getLocalHost().getCanonicalHostName(); } catch (UnknownHostException e) { hostName = null; } } if (hostName != null) { return "https://" + hostName; } return "http://localhost:8080"; } // Methods specific to the tabular data ("<dataDscr>") section. // Note that these do NOT operate on DTO objects, but instead directly // on Dataverse DataVariable, DataTable, etc. objects. // This is because for this release (4.5) we are recycling the already available // code, and this is what we got. (We already have DTO objects for DataTable, // and DataVariable, etc., but the current version JsonPrinter.jsonAsDatasetDto() // does not produce JSON for these objects - it stops at DataFile. Eventually // we want all of our objects to be exportable as JSON, and then all the exports // can go through the same DTO state... But we don't have time for it now; // plus, the structure of file-level metadata is currently being re-designed, // so we probably should not invest any time into it right now). -- L.A. 4.5 private static void createDataDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException { if (datasetVersion.getFileMetadatas() == null || datasetVersion.getFileMetadatas().isEmpty()) { return; } boolean tabularData = false; // we're not writing the opening <dataDscr> tag until we find an actual // tabular datafile. for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); if (dataFile != null && dataFile.isTabularData()) { if (!tabularData) { xmlw.writeStartElement("dataDscr"); tabularData = true; } List<DataVariable> vars = dataFile.getDataTable().getDataVariables(); for (DataVariable var : vars) { createVarDDI(xmlw, var); } } } if (tabularData) { xmlw.writeEndElement(); // dataDscr } } private static void createVarDDI(XMLStreamWriter xmlw, DataVariable dv) throws XMLStreamException { xmlw.writeStartElement("var"); writeAttribute(xmlw, "ID", "v" + dv.getId().toString()); writeAttribute(xmlw, "name", dv.getName()); if (dv.getNumberOfDecimalPoints() != null) { writeAttribute(xmlw, "dcml", dv.getNumberOfDecimalPoints().toString()); } if (dv.isOrderedCategorical()) { writeAttribute(xmlw, "nature", "ordinal"); } if (dv.getInterval() != null) { String interval = dv.getIntervalLabel(); if (interval != null) { writeAttribute(xmlw, "intrvl", interval); } } // location xmlw.writeEmptyElement("location"); if (dv.getFileStartPosition() != null) { writeAttribute(xmlw, "StartPos", dv.getFileStartPosition().toString()); } if (dv.getFileEndPosition() != null) { writeAttribute(xmlw, "EndPos", dv.getFileEndPosition().toString()); } if (dv.getRecordSegmentNumber() != null) { writeAttribute(xmlw, "RecSegNo", dv.getRecordSegmentNumber().toString()); } writeAttribute(xmlw, "fileid", "f" + dv.getDataTable().getDataFile().getId().toString()); // labl if (!StringUtilisEmpty(dv.getLabel())) { xmlw.writeStartElement("labl"); writeAttribute(xmlw, "level", "variable"); xmlw.writeCharacters(dv.getLabel()); xmlw.writeEndElement(); //labl } // invalrng boolean invalrngAdded = false; for (VariableRange range : dv.getInvalidRanges()) { //if (range.getBeginValueType() != null && range.getBeginValueType().getName().equals(DB_VAR_RANGE_TYPE_POINT)) { if (range.getBeginValueType() != null && range.isBeginValueTypePoint()) { if (range.getBeginValue() != null) { invalrngAdded = checkParentElement(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("item"); writeAttribute(xmlw, "VALUE", range.getBeginValue()); } } else { invalrngAdded = checkParentElement(xmlw, "invalrng", invalrngAdded); xmlw.writeEmptyElement("range"); if (range.getBeginValueType() != null && range.getBeginValue() != null) { if (range.isBeginValueTypeMin()) { writeAttribute(xmlw, "min", range.getBeginValue()); } else if (range.isBeginValueTypeMinExcl()) { writeAttribute(xmlw, "minExclusive", range.getBeginValue()); } } if (range.getEndValueType() != null && range.getEndValue() != null) { if (range.isEndValueTypeMax()) { writeAttribute(xmlw, "max", range.getEndValue()); } else if (range.isEndValueTypeMaxExcl()) { writeAttribute(xmlw, "maxExclusive", range.getEndValue()); } } } } if (invalrngAdded) { xmlw.writeEndElement(); // invalrng } //universe if (!StringUtilisEmpty(dv.getUniverse())) { xmlw.writeStartElement("universe"); xmlw.writeCharacters(dv.getUniverse()); xmlw.writeEndElement(); //universe } //sum stats for (SummaryStatistic sumStat : dv.getSummaryStatistics()) { xmlw.writeStartElement("sumStat"); if (sumStat.getTypeLabel() != null) { writeAttribute(xmlw, "type", sumStat.getTypeLabel()); } else { writeAttribute(xmlw, "type", "unknown"); } xmlw.writeCharacters(sumStat.getValue()); xmlw.writeEndElement(); //sumStat } // categories for (VariableCategory cat : dv.getCategories()) { xmlw.writeStartElement("catgry"); if (cat.isMissing()) { writeAttribute(xmlw, "missing", "Y"); } // catValu xmlw.writeStartElement("catValu"); xmlw.writeCharacters(cat.getValue()); xmlw.writeEndElement(); //catValu // label if (!StringUtilisEmpty(cat.getLabel())) { xmlw.writeStartElement("labl"); writeAttribute(xmlw, "level", "category"); xmlw.writeCharacters(cat.getLabel()); xmlw.writeEndElement(); //labl } // catStat if (cat.getFrequency() != null) { xmlw.writeStartElement("catStat"); writeAttribute(xmlw, "type", "freq"); // if frequency is actually a long value, we want to write "100" instead of "100.0" if (Math.floor(cat.getFrequency()) == cat.getFrequency()) { xmlw.writeCharacters(new Long(cat.getFrequency().longValue()).toString()); } else { xmlw.writeCharacters(cat.getFrequency().toString()); } xmlw.writeEndElement(); //catStat } xmlw.writeEndElement(); //catgry } // varFormat xmlw.writeEmptyElement("varFormat"); if (dv.isTypeNumeric()) { writeAttribute(xmlw, "type", "numeric"); } else if (dv.isTypeCharacter()) { writeAttribute(xmlw, "type", "character"); } else { throw new XMLStreamException("Illegal Variable Format Type!"); } writeAttribute(xmlw, "formatname", dv.getFormat()); //experiment writeAttribute(xmlw, "schema", dv.getFormatSchema()); writeAttribute(xmlw, "category", dv.getFormatCategory()); // notes if (dv.getUnf() != null && !"".equals(dv.getUnf())) { xmlw.writeStartElement("notes"); writeAttribute(xmlw, "subject", "Universal Numeric Fingerprint"); writeAttribute(xmlw, "level", "variable"); writeAttribute(xmlw, "type", "Dataverse:UNF"); xmlw.writeCharacters(dv.getUnf()); xmlw.writeEndElement(); //notes } xmlw.writeEndElement(); //var } private static void createFileDscr(XMLStreamWriter xmlw, DatasetVersion datasetVersion) throws XMLStreamException { String dataverseUrl = getDataverseSiteUrl(); for (FileMetadata fileMetadata : datasetVersion.getFileMetadatas()) { DataFile dataFile = fileMetadata.getDataFile(); if (dataFile != null && dataFile.isTabularData()) { DataTable dt = dataFile.getDataTable(); xmlw.writeStartElement("fileDscr"); writeAttribute(xmlw, "ID", "f" + dataFile.getId()); writeAttribute(xmlw, "URI", dataverseUrl + "/api/access/datafile/" + dataFile.getId()); xmlw.writeStartElement("fileTxt"); xmlw.writeStartElement("fileName"); xmlw.writeCharacters(fileMetadata.getLabel()); xmlw.writeEndElement(); // fileName if (dt.getCaseQuantity() != null || dt.getVarQuantity() != null || dt.getRecordsPerCase() != null) { xmlw.writeStartElement("dimensns"); if (dt.getCaseQuantity() != null) { xmlw.writeStartElement("caseQnty"); xmlw.writeCharacters(dt.getCaseQuantity().toString()); xmlw.writeEndElement(); // caseQnty } if (dt.getVarQuantity() != null) { xmlw.writeStartElement("varQnty"); xmlw.writeCharacters(dt.getVarQuantity().toString()); xmlw.writeEndElement(); // varQnty } if (dt.getRecordsPerCase() != null) { xmlw.writeStartElement("recPrCas"); xmlw.writeCharacters(dt.getRecordsPerCase().toString()); xmlw.writeEndElement(); // recPrCas } xmlw.writeEndElement(); // dimensns } xmlw.writeStartElement("fileType"); xmlw.writeCharacters(dataFile.getContentType()); xmlw.writeEndElement(); // fileType xmlw.writeEndElement(); // fileTxt // various notes: // this specially formatted note section is used to store the UNF // (Universal Numeric Fingerprint) signature: if (dt.getUnf() != null && !dt.getUnf().equals("")) { xmlw.writeStartElement("notes"); writeAttribute(xmlw, "level", LEVEL_FILE); writeAttribute(xmlw, "type", NOTE_TYPE_UNF); writeAttribute(xmlw, "subject", NOTE_SUBJECT_UNF); xmlw.writeCharacters(dt.getUnf()); xmlw.writeEndElement(); // notes } if (dataFile.getTags() != null) { for (int i = 0; i < dataFile.getTags().size(); i++) { xmlw.writeStartElement("notes"); writeAttribute(xmlw, "level", LEVEL_FILE); writeAttribute(xmlw, "type", NOTE_TYPE_TAG); writeAttribute(xmlw, "subject", NOTE_SUBJECT_TAG); xmlw.writeCharacters(dataFile.getTags().get(i).getTypeLabel()); xmlw.writeEndElement(); // notes } } // TODO: add the remaining fileDscr elements! xmlw.writeEndElement(); // fileDscr } } } private static boolean checkParentElement(XMLStreamWriter xmlw, String elementName, boolean elementAdded) throws XMLStreamException { if (!elementAdded) { xmlw.writeStartElement(elementName); } return true; } }