/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package edu.harvard.iq.dataverse.api.imports;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import edu.harvard.iq.dataverse.DataFile;
import edu.harvard.iq.dataverse.Dataset;
import edu.harvard.iq.dataverse.DatasetField;
import edu.harvard.iq.dataverse.DatasetFieldConstant;
import edu.harvard.iq.dataverse.DatasetFieldServiceBean;
import edu.harvard.iq.dataverse.DatasetFieldType;
import edu.harvard.iq.dataverse.DatasetFieldValue;
import edu.harvard.iq.dataverse.DatasetServiceBean;
import edu.harvard.iq.dataverse.DatasetVersion;
import edu.harvard.iq.dataverse.Dataverse;
import edu.harvard.iq.dataverse.DataverseContact;
import edu.harvard.iq.dataverse.DataverseServiceBean;
import edu.harvard.iq.dataverse.EjbDataverseEngine;
import edu.harvard.iq.dataverse.ForeignMetadataFormatMapping;
import edu.harvard.iq.dataverse.MetadataBlockServiceBean;
import edu.harvard.iq.dataverse.api.dto.DatasetDTO;
import edu.harvard.iq.dataverse.api.imports.ImportUtil.ImportType;
import edu.harvard.iq.dataverse.authorization.users.AuthenticatedUser;
import edu.harvard.iq.dataverse.authorization.users.User;
import edu.harvard.iq.dataverse.engine.command.DataverseRequest;
import edu.harvard.iq.dataverse.engine.command.exception.CommandException;
import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetCommand;
import edu.harvard.iq.dataverse.engine.command.impl.CreateDatasetVersionCommand;
import edu.harvard.iq.dataverse.engine.command.impl.CreateDataverseCommand;
import edu.harvard.iq.dataverse.engine.command.impl.DestroyDatasetCommand;
import edu.harvard.iq.dataverse.harvest.client.HarvestingClient;
import edu.harvard.iq.dataverse.search.IndexServiceBean;
import edu.harvard.iq.dataverse.settings.SettingsServiceBean;
import edu.harvard.iq.dataverse.util.json.JsonParseException;
import edu.harvard.iq.dataverse.util.json.JsonParser;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringReader;
import java.nio.file.Files;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.logging.Formatter;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;
import javax.ejb.EJB;
import javax.ejb.EJBException;
import javax.ejb.Stateless;
import javax.ejb.TransactionAttribute;
import javax.ejb.TransactionAttributeType;
import static javax.ejb.TransactionAttributeType.REQUIRES_NEW;
import javax.json.Json;
import javax.json.JsonObject;
import javax.json.JsonObjectBuilder;
import javax.json.JsonReader;
import javax.persistence.EntityManager;
import javax.persistence.PersistenceContext;
import javax.servlet.http.HttpServletRequest;
import javax.validation.ConstraintViolation;
import javax.validation.ConstraintViolationException;
import javax.validation.Validation;
import javax.validation.Validator;
import javax.validation.ValidatorFactory;
import javax.ws.rs.core.Context;
import javax.xml.stream.XMLStreamException;
import org.apache.commons.lang.StringUtils;
/**
*
* @author ellenk
* TODO: Why does this bean not extend AbstractApiBean?
*/
@Stateless
public class ImportServiceBean {
@PersistenceContext(unitName="VDCNet-ejbPU")
private EntityManager em;
private static final Logger logger = Logger.getLogger(ImportServiceBean.class.getCanonicalName());
@EJB
protected EjbDataverseEngine engineSvc;
@EJB
DatasetServiceBean datasetService;
@EJB
DataverseServiceBean dataverseService;
@EJB
DatasetFieldServiceBean datasetfieldService;
@EJB
MetadataBlockServiceBean metadataBlockService;
@EJB
SettingsServiceBean settingsService;
@EJB
ImportDDIServiceBean importDDIService;
@EJB
ImportGenericServiceBean importGenericService;
@EJB
IndexServiceBean indexService;
/**
* This is just a convenience method, for testing migration. It creates
* a dummy dataverse with the directory name as dataverse name & alias.
* @param dvName
* @param dataverseRequest
* @return
* @throws ImportException
*/
@TransactionAttribute(REQUIRES_NEW)
public Dataverse createDataverse(String dvName, DataverseRequest dataverseRequest) throws ImportException {
Dataverse d = new Dataverse();
Dataverse root = dataverseService.findByAlias("root");
d.setOwner(root);
d.setAlias(dvName);
d.setName(dvName);
d.setAffiliation("affiliation");
d.setPermissionRoot(false);
d.setDescription("description");
d.setDataverseType(Dataverse.DataverseType.RESEARCHERS);
DataverseContact dc = new DataverseContact();
dc.setContactEmail("pete@mailinator.com");
ArrayList<DataverseContact> dcList = new ArrayList<>();
dcList.add(dc);
d.setDataverseContacts(dcList);
try {
d = engineSvc.submit(new CreateDataverseCommand(d, dataverseRequest, null, null));
} catch (EJBException ex) {
Throwable cause = ex;
StringBuilder sb = new StringBuilder();
sb.append("Error creating dataverse.");
while (cause.getCause() != null) {
cause = cause.getCause();
if (cause instanceof ConstraintViolationException) {
ConstraintViolationException constraintViolationException = (ConstraintViolationException) cause;
for (ConstraintViolation<?> violation : constraintViolationException.getConstraintViolations()) {
sb.append(" Invalid value: <<<").append(violation.getInvalidValue()).append(">>> for ").append(violation.getPropertyPath()).append(" at ").append(violation.getLeafBean()).append(" - ").append(violation.getMessage());
}
}
}
logger.log(Level.SEVERE, sb.toString());
System.out.println("Error creating dataverse: " + sb.toString());
throw new ImportException(sb.toString());
} catch (Exception e) {
throw new ImportException(e.getMessage());
}
return d;
}
@TransactionAttribute(REQUIRES_NEW)
public JsonObjectBuilder handleFile(DataverseRequest dataverseRequest, Dataverse owner, File file, ImportType importType, PrintWriter validationLog, PrintWriter cleanupLog) throws ImportException, IOException {
System.out.println("handling file: " + file.getAbsolutePath());
String ddiXMLToParse;
try {
ddiXMLToParse = new String(Files.readAllBytes(file.toPath()));
JsonObjectBuilder status = doImport(dataverseRequest, owner, ddiXMLToParse,file.getParentFile().getName() + "/" + file.getName(), importType, cleanupLog);
status.add("file", file.getName());
logger.log(Level.INFO, "completed doImport {0}/{1}", new Object[]{file.getParentFile().getName(), file.getName()});
return status;
} catch (ImportException ex) {
String msg = "Import Exception processing file " + file.getParentFile().getName() + "/" + file.getName() + ", msg:" + ex.getMessage();
logger.info(msg);
if (validationLog!=null) {
validationLog.println(msg);
}
return Json.createObjectBuilder().add("message", "Import Exception processing file " + file.getParentFile().getName() + "/" + file.getName() + ", msg:" + ex.getMessage());
} catch (Exception e) {
Throwable causedBy =e.getCause();
while (causedBy != null && causedBy.getCause()!=null) {
causedBy = causedBy.getCause();
}
String stackLine = "";
if (causedBy != null && causedBy.getStackTrace() != null && causedBy.getStackTrace().length > 0) {
stackLine = causedBy.getStackTrace()[0].toString();
}
String msg = "Unexpected Error in handleFile(), file:" + file.getParentFile().getName() + "/" + file.getName();
if (e.getMessage()!=null) {
msg+= "message: " +e.getMessage();
}
msg += ", caused by: " +causedBy;
if (causedBy != null && causedBy.getMessage()!=null) {
msg+=", caused by message: "+ causedBy.getMessage();
}
msg += " at line: "+ stackLine;
validationLog.println(msg);
e.printStackTrace();
return Json.createObjectBuilder().add("message", "Unexpected Exception processing file " + file.getParentFile().getName() + "/" + file.getName() + ", msg:" + e.getMessage());
}
}
@TransactionAttribute(TransactionAttributeType.REQUIRES_NEW)
public Dataset doImportHarvestedDataset(DataverseRequest dataverseRequest, HarvestingClient harvestingClient, String harvestIdentifier, String metadataFormat, File metadataFile, PrintWriter cleanupLog) throws ImportException, IOException {
if (harvestingClient == null || harvestingClient.getDataverse() == null) {
throw new ImportException("importHarvestedDataset called wiht a null harvestingClient, or an invalid harvestingClient.");
}
Dataverse owner = harvestingClient.getDataverse();
Dataset importedDataset = null;
DatasetDTO dsDTO = null;
String json = null;
// TODO:
// At the moment (4.5; the first official "export/harvest release"), there
// are 3 supported metadata formats: DDI, DC and native Dataverse metadata
// encoded in JSON. The 2 XML formats are handled by custom implementations;
// each of the 2 implementations uses its own parsing approach. (see the
// ImportDDIServiceBean and ImportGenerciServiceBean for details).
// TODO: Need to create a system of standardized import plugins - similar to Stephen
// Kraffmiller's export modules; replace the logic below with clean
// programmatic lookup of the import plugin needed.
if ("ddi".equalsIgnoreCase(metadataFormat) || "oai_ddi".equals(metadataFormat)
|| metadataFormat.toLowerCase().matches("^oai_ddi.*")) {
try {
String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
// TODO:
// import type should be configurable - it should be possible to
// select whether you want to harvest with or without files,
// ImportType.HARVEST vs. ImportType.HARVEST_WITH_FILES
logger.fine("importing DDI "+metadataFile.getAbsolutePath());
dsDTO = importDDIService.doImport(ImportType.HARVEST_WITH_FILES, xmlToParse);
} catch (Exception e) {
throw new ImportException("Failed to process DDI XML record: "+ e.getClass() + " (" + e.getMessage() + ")");
}
} else if ("dc".equalsIgnoreCase(metadataFormat) || "oai_dc".equals(metadataFormat)) {
logger.fine("importing DC "+metadataFile.getAbsolutePath());
try {
String xmlToParse = new String(Files.readAllBytes(metadataFile.toPath()));
dsDTO = importGenericService.processOAIDCxml(xmlToParse);
} catch (Exception e) {
throw new ImportException("Failed to process Dublin Core XML record: "+ e.getClass() + " (" + e.getMessage() + ")");
}
} else if ("dataverse_json".equals(metadataFormat)) {
// This is Dataverse metadata already formatted in JSON.
// Simply read it into a string, and pass to the final import further down:
logger.fine("Attempting to import custom dataverse metadata from file "+metadataFile.getAbsolutePath());
json = new String(Files.readAllBytes(metadataFile.toPath()));
} else {
throw new ImportException("Unsupported import metadata format: " + metadataFormat);
}
if (json == null) {
if (dsDTO != null ) {
// convert DTO to Json,
Gson gson = new GsonBuilder().setPrettyPrinting().create();
json = gson.toJson(dsDTO);
logger.fine("JSON produced for the metadata harvested: "+json);
} else {
throw new ImportException("Failed to transform XML metadata format "+metadataFormat+" into a DatasetDTO");
}
}
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
//and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService);
parser.setLenient(true);
Dataset ds = parser.parseDataset(obj);
// For ImportType.NEW, if the metadata contains a global identifier, and it's not a protocol
// we support, it should be rejected.
// (TODO: ! - add some way of keeping track of supported protocols!)
//if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
// throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
//}
ds.setOwner(owner);
ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
// Check data against required contraints
List<ConstraintViolation> violations = ds.getVersions().get(0).validateRequired();
if (!violations.isEmpty()) {
// For migration and harvest, add NA for missing required values
for (ConstraintViolation v : violations) {
DatasetField f = ((DatasetField) v.getRootBean());
f.setSingleValue(DatasetField.NA_VALUE);
}
}
// Check data against validation constraints
// If we are migrating and "scrub migration data" is true we attempt to fix invalid data
// if the fix fails stop processing of this file by throwing exception
Set<ConstraintViolation> invalidViolations = ds.getVersions().get(0).validate();
ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
Validator validator = factory.getValidator();
if (!invalidViolations.isEmpty()) {
for (ConstraintViolation v : invalidViolations) {
DatasetFieldValue f = ((DatasetFieldValue) v.getRootBean());
boolean fixed = false;
boolean converted = false;
// TODO: Is this scrubbing something we want to continue doing?
if (settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
fixed = processMigrationValidationError(f, cleanupLog, metadataFile.getName());
converted = true;
if (fixed) {
Set<ConstraintViolation<DatasetFieldValue>> scrubbedViolations = validator.validate(f);
if (!scrubbedViolations.isEmpty()) {
fixed = false;
}
}
}
if (!fixed) {
String msg = "Data modified - File: " + metadataFile.getName() + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; "
+ "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
}
}
}
// A Global ID is required, in order for us to be able to harvest and import
// this dataset:
if (StringUtils.isEmpty(ds.getGlobalId())) {
throw new ImportException("The harvested metadata record with the OAI server identifier "+harvestIdentifier+" does not contain a global unique identifier that we could recognize, skipping.");
}
ds.setHarvestedFrom(harvestingClient);
ds.setHarvestIdentifier(harvestIdentifier);
Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId());
if (existingDs != null) {
// If this dataset already exists IN ANOTHER DATAVERSE
// we are just going to skip it!
if (existingDs.getOwner() != null && !owner.getId().equals(existingDs.getOwner().getId())) {
throw new ImportException("The dataset with the global id "+ds.getGlobalId()+" already exists, in the dataverse "+existingDs.getOwner().getAlias()+", skipping.");
}
// And if we already have a dataset with this same id, in this same
// dataverse, but it is LOCAL dataset (can happen!), we're going to
// skip it also:
if (!existingDs.isHarvested()) {
throw new ImportException("A LOCAL dataset with the global id "+ds.getGlobalId()+" already exists in this dataverse; skipping.");
}
// For harvested datasets, there should always only be one version.
// We will replace the current version with the imported version.
if (existingDs.getVersions().size() != 1) {
throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
}
// Purge all the SOLR documents associated with this client from the
// index server:
indexService.deleteHarvestedDocuments(existingDs);
// files from harvested datasets are removed unceremoniously,
// directly in the database. no need to bother calling the
// DeleteFileCommand on them.
for (DataFile harvestedFile : existingDs.getFiles()) {
DataFile merged = em.merge(harvestedFile);
em.remove(merged);
harvestedFile = null;
}
// TODO:
// Verify what happens with the indexed files in SOLR?
// are they going to be overwritten by the reindexing of the dataset?
existingDs.setFiles(null);
Dataset merged = em.merge(existingDs);
engineSvc.submit(new DestroyDatasetCommand(merged, dataverseRequest));
importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
} else {
importedDataset = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, ImportType.HARVEST));
}
} catch (Exception ex) {
logger.fine("Failed to import harvested dataset: " + ex.getClass() + ": " + ex.getMessage());
FileOutputStream savedJsonFileStream = new FileOutputStream(new File(metadataFile.getAbsolutePath() + ".json"));
byte[] jsonBytes = json.getBytes();
int i = 0;
while (i < jsonBytes.length) {
int chunkSize = i + 8192 <= jsonBytes.length ? 8192 : jsonBytes.length - i;
savedJsonFileStream.write(jsonBytes, i, chunkSize);
i += chunkSize;
savedJsonFileStream.flush();
}
savedJsonFileStream.close();
logger.info("JSON produced saved in " + metadataFile.getAbsolutePath() + ".json");
throw new ImportException("Failed to import harvested dataset: " + ex.getClass() + " (" + ex.getMessage() + ")", ex);
}
return importedDataset;
}
public JsonObjectBuilder doImport(DataverseRequest dataverseRequest, Dataverse owner, String xmlToParse, String fileName, ImportType importType, PrintWriter cleanupLog) throws ImportException, IOException {
String status = "";
Long createdId = null;
DatasetDTO dsDTO = null;
try {
dsDTO = importDDIService.doImport(importType, xmlToParse);
} catch (XMLStreamException e) {
throw new ImportException("XMLStreamException" + e);
}
// convert DTO to Json,
Gson gson = new GsonBuilder().setPrettyPrinting().create();
String json = gson.toJson(dsDTO);
JsonReader jsonReader = Json.createReader(new StringReader(json));
JsonObject obj = jsonReader.readObject();
//and call parse Json to read it into a dataset
try {
JsonParser parser = new JsonParser(datasetfieldService, metadataBlockService, settingsService);
parser.setLenient(!importType.equals(ImportType.NEW));
Dataset ds = parser.parseDataset(obj);
// For ImportType.NEW, if the user supplies a global identifier, and it's not a protocol
// we support, it will be rejected.
if (importType.equals(ImportType.NEW)) {
if (ds.getGlobalId() != null && !ds.getProtocol().equals(settingsService.getValueForKey(SettingsServiceBean.Key.Protocol, ""))) {
throw new ImportException("Could not register id " + ds.getGlobalId() + ", protocol not supported");
}
}
ds.setOwner(owner);
ds.getLatestVersion().setDatasetFields(ds.getLatestVersion().initDatasetFields());
// Check data against required contraints
List<ConstraintViolation> violations = ds.getVersions().get(0).validateRequired();
if (!violations.isEmpty()) {
if (importType.equals(ImportType.MIGRATION) || importType.equals(ImportType.HARVEST)) {
// For migration and harvest, add NA for missing required values
for (ConstraintViolation v : violations) {
DatasetField f = ((DatasetField) v.getRootBean());
f.setSingleValue(DatasetField.NA_VALUE);
}
} else {
// when importing a new dataset, the import will fail
// if required values are missing.
String errMsg = "Error importing data:";
for (ConstraintViolation v : violations) {
errMsg += " " + v.getMessage();
}
throw new ImportException(errMsg);
}
}
// Check data against validation constraints
// If we are migrating and "scrub migration data" is true we attempt to fix invalid data
// if the fix fails stop processing of this file by throwing exception
Set<ConstraintViolation> invalidViolations = ds.getVersions().get(0).validate();
ValidatorFactory factory = Validation.buildDefaultValidatorFactory();
Validator validator = factory.getValidator();
if (!invalidViolations.isEmpty()) {
for (ConstraintViolation v : invalidViolations) {
DatasetFieldValue f = ((DatasetFieldValue) v.getRootBean());
boolean fixed = false;
boolean converted = false;
if ((importType.equals(ImportType.MIGRATION) || importType.equals(ImportType.HARVEST)) && settingsService.isTrueForKey(SettingsServiceBean.Key.ScrubMigrationData, false)) {
fixed = processMigrationValidationError(f, cleanupLog, fileName);
converted = true;
if (fixed) {
Set<ConstraintViolation<DatasetFieldValue>> scrubbedViolations = validator.validate(f);
if (!scrubbedViolations.isEmpty()) {
fixed = false;
}
}
}
if (!fixed) {
if (importType.equals(ImportType.HARVEST)) {
String msg = "Data modified - File: " + fileName + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; "
+ "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + DatasetField.NA_VALUE + "'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
} else {
String msg = " Validation error for ";
if (converted) {
msg += "converted ";
}
msg += "value: " + f.getValue() + ", " + f.getValidationMessage();
throw new ImportException(msg);
}
}
}
}
Dataset existingDs = datasetService.findByGlobalId(ds.getGlobalId());
if (existingDs != null) {
if (importType.equals(ImportType.HARVEST)) {
// For harvested datasets, there should always only be one version.
// We will replace the current version with the imported version.
if (existingDs.getVersions().size() != 1) {
throw new ImportException("Error importing Harvested Dataset, existing dataset has " + existingDs.getVersions().size() + " versions");
}
engineSvc.submit(new DestroyDatasetCommand(existingDs, dataverseRequest));
Dataset managedDs = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, importType));
status = " updated dataset, id=" + managedDs.getId() + ".";
} else {
// If we are adding a new version to an existing dataset,
// check that the version number isn't already in the dataset
for (DatasetVersion dsv : existingDs.getVersions()) {
if (dsv.getVersionNumber().equals(ds.getLatestVersion().getVersionNumber())) {
throw new ImportException("VersionNumber " + ds.getLatestVersion().getVersionNumber() + " already exists in dataset " + existingDs.getGlobalId());
}
}
DatasetVersion dsv = engineSvc.submit(new CreateDatasetVersionCommand(dataverseRequest, existingDs, ds.getVersions().get(0)));
status = " created datasetVersion, for dataset "+ dsv.getDataset().getGlobalId();
createdId = dsv.getId();
}
} else {
Dataset managedDs = engineSvc.submit(new CreateDatasetCommand(ds, dataverseRequest, false, importType));
status = " created dataset, id=" + managedDs.getId() + ".";
createdId = managedDs.getId();
}
} catch (JsonParseException ex) {
logger.log(Level.INFO, "Error parsing datasetVersion: {0}", ex.getMessage());
throw new ImportException("Error parsing datasetVersion: " + ex.getMessage(), ex);
} catch (CommandException ex) {
logger.log(Level.INFO, "Error excuting Create dataset command: {0}", ex.getMessage());
throw new ImportException("Error excuting dataverse command: " + ex.getMessage(), ex);
}
return Json.createObjectBuilder().add("message", status);
}
private boolean processMigrationValidationError(DatasetFieldValue f, PrintWriter cleanupLog, String fileName) {
if (f.getDatasetField().getDatasetFieldType().getName().equals(DatasetFieldConstant.datasetContactEmail)) {
//Try to convert it based on the errors we've seen
String convertedVal = convertInvalidEmail(f.getValue());
if (!(convertedVal == null)) {
String msg = "Data modified - File: " + fileName + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; "
+ "Invalid value: '" + f.getValue() + "'" + " Converted Value:'" + convertedVal + "'";
cleanupLog.println(msg);
f.setValue(convertedVal);
return true;
}
//if conversion fails set to NA
String msg = "Data modified - File: " + fileName + "; Field: Dataset Contact Email; " + "Invalid value: '" + f.getValue() + "'" + " Converted Value: 'NA'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
return true;
}
if (f.getDatasetField().getDatasetFieldType().getName().equals(DatasetFieldConstant.producerURL)) {
if (f.getValue().equals("PRODUCER URL")) {
String msg = "Data modified - File: " + fileName + "; Field: Producer URL; " + "Invalid value: '" + f.getValue() + "'" + " Converted Value: 'NA'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
return true;
}
}
if (f.getDatasetField().getDatasetFieldType().getFieldType().equals(DatasetFieldType.FieldType.DATE)) {
if(f.getValue().toUpperCase().equals("YYYY-MM-DD")){
String msg = "Data modified - File: " + fileName + "; Field:" + f.getDatasetField().getDatasetFieldType().getDisplayName() + "; "
+ "Invalid value: '" + f.getValue() + "'" + " Converted Value: 'NA'";
cleanupLog.println(msg);
f.setValue(DatasetField.NA_VALUE);
return true;
}
String convertedVal = convertInvalidDateString(f.getValue());
if(!(convertedVal == null)) {
String msg = "Data modified - File: " + fileName + "; Field: " + f.getDatasetField().getDatasetFieldType().getDisplayName() + ""
+ " Converted Value:" + convertedVal + "; Invalid value: '" + f.getValue() + "'";
cleanupLog.println(msg);
f.setValue(convertedVal);
return true;
}
}
return false;
}
private String convertInvalidEmail(String inString){
//First we'll see if the invalid email is a comma delimited list of email addresses
//if so we'll return the first one - maybe try to get them all at some point?
if (inString.contains(",")){
String[] addresses = inString.split("\\,");
return addresses[0];
}
//This works on the specific error we've seen where the user has put in a link for the email address
//as in '<a href="IFPRI-Data@cgiar.org" > IFPRI-Data@cgiar.org</a>'
//this returns the string between the first > and the second <
if (inString.indexOf("<a", 0) > -1){
try {
String eMailAddress = inString.substring(inString.indexOf(">", 0) + 1, inString.indexOf("</a>", inString.indexOf(">", 0)));
return eMailAddress.trim();
} catch (Exception e){
return null;
}
}
return null;
}
private String convertInvalidDateString(String inString){
//converts XXXX0000 to XXXX for date purposes
if (inString.trim().length() == 8){
if (inString.trim().endsWith("0000")){
return inString.replace("0000", "").trim();
}
}
//convert question marks to dashes and add brackets
if (inString.contains("?")) {
String testval = inString.replace("?", " ").replace("[", " ").replace("]", " ");
if (StringUtils.isNumeric(testval.trim())) {
switch (testval.trim().length()) {
case 1:
return "[" + testval.trim() + "---?]";
case 2:
return "[" + testval.trim() + "--?]";
case 3:
return "[" + testval.trim() + "-?]";
case 4:
return "[" + testval.trim() + "?]";
case 8:
if(testval.trim().contains("0000")){
return "[" + testval.trim().replace("0000", "") + "?]";
}
}
}
}
//Convert string months to numeric
if (inString.toUpperCase().contains("JANUARY")){
return inString.toUpperCase().replace("JANUARY", "").replace(",", "").trim() + "-01";
}
if (inString.toUpperCase().contains("FEBRUARY")){
return inString.toUpperCase().replace("FEBRUARY", "").replace(",", "").trim() + "-02";
}
if (inString.toUpperCase().contains("MARCH")){
return inString.toUpperCase().replace("MARCH", "").replace(",", "").trim() + "-03";
}
if (inString.toUpperCase().contains("APRIL")){
return inString.toUpperCase().replace("APRIL", "").replace(",", "").trim() + "-04";
}
if (inString.toUpperCase().contains("MAY")){
return inString.toUpperCase().replace("MAY", "").replace(",", "").trim() + "-05";
}
if (inString.toUpperCase().contains("JUNE")){
return inString.toUpperCase().replace("JUNE", "").replace(",", "").trim() + "-06";
}
if (inString.toUpperCase().contains("JULY")){
return inString.toUpperCase().replace("JULY", "").replace(",", "").trim() + "-07";
}
if (inString.toUpperCase().contains("AUGUST")){
return inString.toUpperCase().replace("AUGUST", "").replace(",", "").trim() + "-08";
}
if (inString.toUpperCase().contains("SEPTEMBER")){
return inString.toUpperCase().replace("SEPTEMBER", "").replace(",", "").trim() + "-09";
}
if (inString.toUpperCase().contains("OCTOBER")){
return inString.toUpperCase().replace("OCTOBER", "").replace(",", "").trim() + "-10";
}
if (inString.toUpperCase().contains("NOVEMBER")){
return inString.toUpperCase().replace("NOVEMBER", "").replace(",", "").trim() + "-11";
}
if (inString.toUpperCase().contains("DECEMBER")){
return inString.toUpperCase().replace("DECEMBER", "").replace(",", "").trim() + "-12";
}
return null;
}
private static class MyCustomFormatter extends Formatter {
@Override
public String format(LogRecord record) {
StringBuffer sb = new StringBuffer();
sb.append("Prefixn");
sb.append(record.getMessage());
sb.append("Suffixn");
sb.append("n");
return sb.toString();
}
}
}