package org.isatools.isacreator.io.importisa; import org.apache.commons.collections15.OrderedMap; import org.apache.log4j.Logger; import org.isatools.errorreporter.model.ErrorLevel; import org.isatools.errorreporter.model.ErrorMessage; import org.isatools.errorreporter.model.FileType; import org.isatools.errorreporter.model.ISAFileErrorReport; import org.isatools.isacreator.configuration.MappingObject; import org.isatools.isacreator.configuration.Ontology; import org.isatools.isacreator.gui.reference.DataEntryReferenceObject; import org.isatools.isacreator.io.importisa.errorhandling.exceptions.MalformedInvestigationException; import org.isatools.isacreator.io.importisa.investigationproperties.InvestigationFileSection; import org.isatools.isacreator.managers.ConfigurationManager; import org.isatools.isacreator.model.Assay; import org.isatools.isacreator.model.Investigation; import org.isatools.isacreator.model.Study; import org.isatools.isacreator.ontologymanager.OntologyManager; import org.isatools.isacreator.ontologymanager.OntologySourceRefObject; import org.isatools.isacreator.ontologymanager.bioportal.io.AcceptedOntologies; import org.isatools.isacreator.ontologymanager.common.OntologyTerm; import org.isatools.isacreator.settings.ISAcreatorProperties; import org.isatools.isacreator.spreadsheet.model.TableReferenceObject; import uk.ac.ebi.utils.collections.Pair; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; /** * Created Created by the ISA team * <p/> * <p/> * Abstract class for importing ISATab files * <p/> * Date: 11/07/2012 * Time: 16:08 * * @author <a href="mailto:eamonnmag@gmail.com">Eamonn Maguire</a> * @author <a href="mailto:alejandra.gonzalez.beltran@gmail.com">Alejandra Gonzalez-Beltran</a> */ public abstract class ISAtabImporter { private static final Logger log = Logger.getLogger(ISAtabImporter.class); protected Investigation investigation; protected List<ISAFileErrorReport> errors; private List<ErrorMessage> messages; protected StructureToInvestigationMapper mapper; public ISAtabImporter() { errors = new ArrayList<ISAFileErrorReport>(); messages = new ArrayList<ErrorMessage>(); } /** * Imports ISATab files * * @param parentDir string with the path of the parent directory where ISAtab files are stored * @return indicates if import was successful or not */ public abstract boolean importFile(String parentDir); /** * Retrieves errors of the import process * * @return list of ISAFileErrorReports */ public List<ISAFileErrorReport> getMessages() { return errors; } public String getMessagesAsString() { StringBuilder builder = new StringBuilder(); for (ISAFileErrorReport errorReport : errors) { builder.append("Error filename: ").append(errorReport.getFileName()); builder.append("\n Error messages: "); for (ErrorMessage error : errorReport.getMessages()) { builder.append("\n").append(error.getMessage()); } } return builder.toString(); } /** * Retrieves the investigation object * * @return investigation */ public Investigation getInvestigation() { return investigation; } /** * Given the folder containing the ISAtab dataset, it loads the content in objects according to the ISA model * * @param parentDir directory containing the ISAtab dataset * @return true if successful, false otherwise */ protected boolean commonImportFile(String parentDir) { File investigationFile = new File(parentDir); if (!investigationFile.isDirectory()) { investigationFile = investigationFile.getParentFile(); parentDir = investigationFile.getAbsolutePath(); } log.info("Parent directory is -> " + parentDir); boolean investigationFileFound = false; if (investigationFile.exists()) { File[] isaDirectorFiles = investigationFile.listFiles(); if (isaDirectorFiles != null) { for (File isaFile : isaDirectorFiles) { if (isaFile.getName().toLowerCase().startsWith("i_")) { investigationFileFound = true; investigationFile = isaFile; break; } } } if (!investigationFileFound) { messages.add(new ErrorMessage(ErrorLevel.ERROR, "Investigation file does not exist in folder " + parentDir + ". Please create an investigation file and name it " + "\"i_<investigation identifier>.txt\"")); ISAFileErrorReport investigationErrorReport = new ISAFileErrorReport(investigationFile.getName(), FileType.INVESTIGATION, messages); errors.add(investigationErrorReport); return false; } try { InvestigationImport investigationFileImporter = new InvestigationImport(); Pair<Boolean, OrderedMap<String, OrderedMap<InvestigationFileSection, OrderedMap<String, List<String>>>>> investigationFileImport = investigationFileImporter.importInvestigationFile(investigationFile); messages.addAll(investigationFileImporter.getMessages()); if (investigationFileImport.fst) { log.info("Import of Investigation in " + investigationFile.getPath() + " was successful..."); log.info("Proceeding to map to Investigation..."); mapper = new StructureToInvestigationMapper(); Pair<Boolean, Investigation> mappingResult = mapper.createInvestigationFromDataStructure(investigationFileImport.snd); messages.addAll(mapper.getMessages()); if (!mappingResult.fst) { ISAFileErrorReport investigationErrorReport = new ISAFileErrorReport(investigationFile.getName(), FileType.INVESTIGATION, messages); errors.add(investigationErrorReport); return false; } investigation = mappingResult.snd; investigation.setFileReference(investigationFile.getPath()); if (investigation.getReferenceObject() != null) { TableReferenceObject tro = ConfigurationManager.selectTROForUserSelection(MappingObject.INVESTIGATION); DataEntryReferenceObject referenceObject = investigation.getReferenceObject(); referenceObject.setFieldDefinition(tro.getTableFields().getFields()); for (Study study : investigation.getStudies().values()) { study.getReferenceObject().setFieldDefinition(tro.getTableFields().getFields()); } } if (!processInvestigation(parentDir)) { ISAFileErrorReport investigationErrorReport = new ISAFileErrorReport(investigationFile.getName(), FileType.INVESTIGATION, messages); errors.add(investigationErrorReport); return false; } String lastConfigurationUsed = ISAcreatorProperties.getProperty(ISAcreatorProperties.CURRENT_CONFIGURATION); if (lastConfigurationUsed.contains(File.separator)) { lastConfigurationUsed = lastConfigurationUsed.substring(lastConfigurationUsed.lastIndexOf(File.separator) + 1); } if (!investigation.getLastConfigurationUsed().equals("") && !lastConfigurationUsed.equals("")) { if (!lastConfigurationUsed.equals(investigation.getLastConfigurationUsed())) { messages.add(new ErrorMessage(ErrorLevel.WARNING, "The last configuration used to load this ISAtab file was " + investigation.getLastConfigurationUsed() + ". The currently loaded configuration is " + lastConfigurationUsed + ". You can continue to load, but " + "the settings from " + investigation.getLastConfigurationUsed() + " may be important.")); ISAFileErrorReport investigationErrorReport = new ISAFileErrorReport(investigationFile.getName(), FileType.INVESTIGATION, messages); errors.add(investigationErrorReport); } } } else { messages.addAll(investigationFileImporter.getMessages()); ISAFileErrorReport investigationErrorReport = new ISAFileErrorReport(investigationFile.getName(), FileType.INVESTIGATION, messages); errors.add(investigationErrorReport); return false; } System.out.println("********************\n" + OntologyManager.getURIMappingInfo()); String mappingInfo = OntologyManager.getURIMappingInfoHTML(); if (mappingInfo != null && !mappingInfo.equals("")) messages.add(new ErrorMessage(ErrorLevel.INFO, mappingInfo)); } catch (IOException e) { messages.add(new ErrorMessage(ErrorLevel.ERROR, e.getMessage())); ISAFileErrorReport investigationErrorReport = new ISAFileErrorReport(investigationFile.getName(), FileType.INVESTIGATION, messages); errors.add(investigationErrorReport); return false; } } // investigation file exists return true; } protected boolean processInvestigation(String parentDirectoryPath) { SpreadsheetImport spreadsheetImporter = new SpreadsheetImport(); boolean errorsFound = false; for (String studyIdentifier : investigation.getStudies().keySet()) { Study study = investigation.getStudies().get(studyIdentifier); System.out.println("Processing " + studyIdentifier); // here we process the study sample file TableReferenceObject studySampleReference = ConfigurationManager.selectTROForUserSelection( MappingObject.STUDY_SAMPLE); if (studySampleReference != null) { List<ErrorMessage> messages = new ArrayList<ErrorMessage>(); try { TableReferenceObject builtReference = spreadsheetImporter.loadInTables(parentDirectoryPath + File.separator + study.getStudySampleFileIdentifier(), studySampleReference); if (builtReference != null) { study.setStudySamples(new Assay(study.getStudySampleFileIdentifier(), builtReference)); OntologyManager.addToOntologyTerms(builtReference.getReferencedOntologyTerms()); } } catch (MalformedInvestigationException mie) { mie.printStackTrace(); messages.add(new ErrorMessage(ErrorLevel.ERROR, mie.getMessage())); } catch (Exception e) { e.printStackTrace(); messages.add(new ErrorMessage(ErrorLevel.ERROR, e.getMessage())); } finally { if (messages.size() > 0) { ISAFileErrorReport studySampleReport = new ISAFileErrorReport(study.getStudySampleFileIdentifier(), FileType.STUDY_SAMPLE, messages); errors.add(studySampleReport); errorsFound = true; } } } // here we process the assay files List<Assay> noReferenceobjectFound = new ArrayList<Assay>(); for (String assayReference : study.getAssays().keySet()) { List<ErrorMessage> messages = new ArrayList<ErrorMessage>(); Assay assay = study.getAssays().get(assayReference); TableReferenceObject assayTableReferenceObject = ConfigurationManager.selectTROForUserSelection( assay.getMeasurementEndpoint(), assay.getTechnologyType()); if (!assay.getMeasurementEndpointTermAccession().startsWith("http")) { MappingObject mappingObject = assayTableReferenceObject.getTableFields().getMappingObject(); assay.setMeasurementEndpointTermAccession(mappingObject.getMeasurementAccession()); assay.setTechnologyTypeTermAccession(mappingObject.getTechnologyAccession()); addMissingOntologyRefsForAssayDefinition(mappingObject.getMeasurementEndpointType(), mappingObject.getMeasurementAccession(), mappingObject.getMeasurementSource()); addMissingOntologyRefsForAssayDefinition(mappingObject.getTechnologyType(), mappingObject.getTechnologyAccession(), mappingObject.getTechnologySource()); } if (assayTableReferenceObject != null) { try { TableReferenceObject builtReference = spreadsheetImporter.loadInTables(parentDirectoryPath + File.separator + assay.getAssayReference(), assayTableReferenceObject); if (builtReference != null) { assay.setTableReferenceObject(builtReference); OntologyManager.addToOntologyTerms(builtReference.getReferencedOntologyTerms()); } } catch (IOException e) { messages.add(new ErrorMessage(ErrorLevel.ERROR, e.getMessage())); } catch (MalformedInvestigationException e) { messages.add(new ErrorMessage(ErrorLevel.ERROR, e.getMessage())); } catch (Exception e) { messages.add(new ErrorMessage(ErrorLevel.ERROR, e.getMessage())); } finally { if (messages.size() > 0) { ISAFileErrorReport studySampleReport = new ISAFileErrorReport(assay.getAssayReference(), inferISAFileType(assay), messages); errors.add(studySampleReport); errorsFound = true; } } } else { messages.add(new ErrorMessage(ErrorLevel.WARNING, "Assay with measurement " + assay.getMeasurementEndpoint() + " & technology " + assay.getTechnologyType() + " is not recognised. Please ensure you are using the correct configuration!")); log.info("Assay with measurement " + assay.getMeasurementEndpoint() + " & technology " + assay.getTechnologyType() + " is not recognised. Please ensure you are using the correct configuration!"); noReferenceobjectFound.add(assay); ISAFileErrorReport studySampleReport = new ISAFileErrorReport(assay.getAssayReference(), assay.getTechnologyType(), assay.getMeasurementEndpoint(), inferISAFileType(assay), messages); errors.add(studySampleReport); errorsFound = false; } } for (Assay toRemove : noReferenceobjectFound) { log.info("Assay " + toRemove.getAssayReference() + " will not be loaded into ISAcreator because there is no configuration to define it..."); study.removeAssay(toRemove.getAssayReference()); } } assignOntologiesToSession(mapper.getOntologyTermsDefined()); return !errorsFound; } private void addMissingOntologyRefsForAssayDefinition(String term, String termAccession, String termSource) { OntologySourceRefObject osro; if ((osro = OntologyManager.getOntologySourceReferenceObjectByAbbreviation(termSource)) == null) { String ontologyURI = AcceptedOntologies.getOntologyIdForAbbreviation(termSource); Ontology ontology = AcceptedOntologies.getAcceptedOntologies().get(ontologyURI); if (ontology != null) { osro = AcceptedOntologies.convertOntologyToOntologySourceRefObject(ontology); OntologyManager.addOntologySource(osro); } } if (osro != null) { OntologyManager.addToOntologyTerms(new OntologyTerm(term, termAccession, termAccession, osro)); } } protected FileType inferISAFileType(Assay assay) { String assayDescription = assay.getTechnologyType().toLowerCase() + " " + assay.getMeasurementEndpoint().toLowerCase(); if (assayDescription.contains(FileType.MICROARRAY.getType())) { return FileType.MICROARRAY; } else if (assayDescription.contains(FileType.MASS_SPECTROMETRY.getType())) { return FileType.MASS_SPECTROMETRY; } else if (assayDescription.contains(FileType.NMR.getType())) { return FileType.NMR; } else if (assayDescription.contains(FileType.FLOW_CYTOMETRY.getType())) { return FileType.FLOW_CYTOMETRY; } else if (assayDescription.contains(FileType.GEL_ELECTROPHORESIS.getType())) { return FileType.GEL_ELECTROPHORESIS; } else if (assayDescription.contains(FileType.CLINICAL_CHEMISTRY.getType())) { return FileType.CLINICAL_CHEMISTRY; } else if (assayDescription.contains(FileType.HEMATOLOGY.getType())) { return FileType.HEMATOLOGY; } else if (assayDescription.contains(FileType.HISTOLOGY.getType())) { return FileType.HISTOLOGY; } else { return FileType.STUDY_SAMPLE; } } private void assignOntologiesToSession(List<OntologyTerm> ontologiesUsed) { for (OntologyTerm oo : ontologiesUsed) { if (!oo.getOntologyTermName().trim().equals("")) { OntologyManager.addToOntologyTerms(oo); } } } }