package org.isatools.isacreator.io.importisa; import au.com.bytecode.opencsv.CSVReader; import org.apache.commons.collections15.set.ListOrderedSet; import org.isatools.isacreator.configuration.DataTypes; import org.isatools.isacreator.configuration.FieldObject; import org.isatools.isacreator.io.importisa.errorhandling.exceptions.MalformedInvestigationException; import org.isatools.isacreator.managers.ConfigurationManager; import org.isatools.isacreator.spreadsheet.model.TableReferenceObject; import org.isatools.isacreator.utils.GeneralUtils; import java.io.File; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import java.util.Set; import java.util.Vector; /** * Utility class to deal with import of Spreadsheet files into the system. * * @author Eamonn Maguire (eamonnmag@gmail.com) */ public class SpreadsheetImport { /** * Create tablemodel for item! * * @param fileName - Name of file to load * @param defaultTableRef - TableReferenceObject to be used to load in file * @return TableReferenceObject describing the table * @throws java.io.IOException when file does not exist or if the CSVReader cannot read the next line * @throws org.isatools.isacreator.io.importisa.errorhandling.exceptions.MalformedInvestigationException * - when a problem is found when reading in file. */ public TableReferenceObject loadInTables(String fileName, TableReferenceObject defaultTableRef) throws IOException, MalformedInvestigationException { File f = new File(fileName); if (f.exists()) { CSVReader reader = new CSVReader(new FileReader(f), '\t'); int count = 0; String[] nextLine; String[] colHeaders = null; TableReferenceObject tro = null; while ((nextLine = reader.readNext()) != null) { if (count == 0) { colHeaders = nextLine; try { tro = reformTableDefinition(fileName, nextLine, defaultTableRef); } catch (MalformedInvestigationException mie) { System.err.println(mie.toString()); TableReferenceObject generic_tro = ConfigurationManager.selectTROForUserSelection("*", "*"); if (generic_tro != null && defaultTableRef != generic_tro) { tro = reformTableDefinition(fileName, nextLine, generic_tro); } else { throw mie; } } Vector<String> preDefinedHeaders = new Vector<String>(); preDefinedHeaders.add("Row No."); for (String h : nextLine) { if (!h.toLowerCase().contains("term source ref") && !h.toLowerCase().contains("term accession number") && !h.equals("")) { preDefinedHeaders.add(h); } } if (preDefinedHeaders.size() > 0) { tro.setPreDefinedHeaders(preDefinedHeaders); } count++; } else { tro.addRowData(colHeaders, nextLine); } } return tro; } else { throw new FileNotFoundException("<p>The file " + fileName + " was not found. Please ensure that the file exists within " + "the folder and that the name referred to in the investigation file is correct!</p>"); } } /** * Process the table file, piecing together which Units belong to which factors, and which parameters belong to which protocol refs, etc. * * @param tableName - Name of table to read * @param headers - The column headers for the table * @param startReference - the TableReferenceObject to be used to define standard terms * @return the Fully built table reference object for the table! * @throws MalformedInvestigationException * - when a problem is found when reforming the Investigation. */ private TableReferenceObject reformTableDefinition(String tableName, String[] headers, TableReferenceObject startReference) throws MalformedInvestigationException { TableReferenceObject tro = new TableReferenceObject(tableName); // way of storing previously seen protocol to determine where the parameters are which associated with it. int previousProtocol = -1; // way of storing previously read characteristic, factor, or parameter to determine what type it is String previousCharFactParam = null; int expectedNextUnitLocation = -1; int count = 0; int positionInheaders = 0; int parentColPos; for (String columnHeader : headers) { positionInheaders++; String fieldAsLowercase = columnHeader.toLowerCase(); if (expectedNextUnitLocation == positionInheaders) { if (fieldAsLowercase.contains("unit")) { // add two fields...one accepting string values and the unit, also accepting string values :o) FieldObject newFo = startReference.getFieldByName(previousCharFactParam); if (newFo == null) { newFo = new FieldObject(count, previousCharFactParam, "", DataTypes.STRING, "", false, false, false); } tro.addField(newFo); if (tro.getColumnDependencies().get(count) == null) { tro.getColumnDependencies() .put(count, new ListOrderedSet<Integer>()); } parentColPos = count; count++; // get the unit for this factor. newFo = startReference.getNextUnitField(previousCharFactParam); if (newFo == null) { newFo = new FieldObject(count, columnHeader, "", DataTypes.ONTOLOGY_TERM, "", false, false, false); } tro.addField(newFo); tro.getColumnDependencies().get(parentColPos).add(count); count++; // AND ATTACH UNIT TO FIELD VIA THE MAPPING IN THE TABLE CLASS } else { // add a field accepting ontology terms FieldObject newFo = startReference.getFieldByName(previousCharFactParam); if (newFo == null) { newFo = new FieldObject(count, previousCharFactParam, "", DataTypes.ONTOLOGY_TERM, "", false, false, false); } tro.addField(newFo); parentColPos = count; count++; } // add just added parameter to a list of dependencies to be maintained for each protocol reference (Protocol REF) field if (previousCharFactParam != null && previousCharFactParam.toLowerCase().contains("parameter value")) { if (tro.getColumnDependencies().get(previousProtocol) == null) { tro.getColumnDependencies() .put(previousProtocol, new ListOrderedSet<Integer>()); } tro.getColumnDependencies().get(previousProtocol) .add(parentColPos); } // reset expectedPosition expectedNextUnitLocation = -1; } FieldObject field = startReference.getFieldByName(columnHeader); if (field != null) { if ((fieldAsLowercase.contains("factor value") || fieldAsLowercase.contains("characteristics") || fieldAsLowercase.contains("parameter value")) && !fieldAsLowercase.contains("comment")) { previousCharFactParam = columnHeader; expectedNextUnitLocation = positionInheaders + 1; } else { tro.addField(field); } count++; } else { if ((fieldAsLowercase.contains("factor value") || fieldAsLowercase.contains("characteristics") || fieldAsLowercase.contains("parameter value")) && !fieldAsLowercase.contains("comment")) { previousCharFactParam = columnHeader; expectedNextUnitLocation = positionInheaders + 1; } if (fieldAsLowercase.equals("performer") || fieldAsLowercase.contains("comment") || fieldAsLowercase.equals("provider")) { FieldObject additionalFo = new FieldObject(count, columnHeader, "An additional column", DataTypes.STRING, "", false, false, false); tro.addField(additionalFo); count++; } if (fieldAsLowercase.contains("material type")) { FieldObject newFo = new FieldObject(count, columnHeader, "", DataTypes.ONTOLOGY_TERM, "", false, false, false); tro.addField(newFo); count++; } if (fieldAsLowercase.contains("date")) { FieldObject dateFo = new FieldObject(count, columnHeader, "Date field", DataTypes.DATE, "", false, false, false); tro.addField(dateFo); count++; } if (fieldAsLowercase.contains("protocol ref")) { previousProtocol = count; FieldObject newFo = new FieldObject(count, "Protocol REF", "A reference to a protocol", DataTypes.LIST, "", false, false, false); tro.addField(newFo); count++; } } } if (expectedNextUnitLocation != -1) { // add last factor/characteristic to the table FieldObject newFo = startReference.getFieldByName(previousCharFactParam); if (newFo == null) { newFo = new FieldObject(count, previousCharFactParam, "", DataTypes.ONTOLOGY_TERM, "", false, false, false); } tro.addField(newFo); } tro.setMissingFields(GeneralUtils.findMissingFields(headers, startReference)); Set<String> invalidHeaders = findInvalidFields(headers, tro); if (invalidHeaders.size() > 0) { String invalidHeaderNames = ""; int headerCount = invalidHeaders.size(); for (String s : invalidHeaders) { invalidHeaderNames += s; if (headerCount != invalidHeaders.size() - 1) { invalidHeaderNames += ", "; } headerCount++; } String colText = invalidHeaders.size() > 1 ? "The columns " : "The column "; String linkText = invalidHeaders.size() > 1 ? " are " : " is "; throw new MalformedInvestigationException(colText + invalidHeaderNames + linkText + "not supported in this assay"); } return tro; } private Set<String> findInvalidFields(String[] headers, TableReferenceObject finalTableDefinition) { Set<String> headerSet = new HashSet<String>(); headerSet.addAll(Arrays.asList(headers)); Set<String> invalidHeaders = new HashSet<String>(); Set<String> toIgnoreAsSet = new HashSet<String>(); toIgnoreAsSet.add("Term Source REF"); toIgnoreAsSet.add("Term Accession Number"); for (String field : headerSet) { if (!toIgnoreAsSet.contains(field) && !field.trim().equals("") && !finalTableDefinition.getHeaders().contains(field)) { invalidHeaders.add(field); } } return invalidHeaders; } }