/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.design.io; import static fr.ens.biologie.genomique.eoulsan.design.DesignMetadata.ADDITIONAL_ANNOTATION_FILE_KEY; import static fr.ens.biologie.genomique.eoulsan.design.DesignMetadata.GENOME_FILE_KEY; import static fr.ens.biologie.genomique.eoulsan.design.DesignMetadata.GFF_FILE_KEY; import static fr.ens.biologie.genomique.eoulsan.design.DesignMetadata.GTF_FILE_KEY; import static java.util.Collections.unmodifiableMap; import static com.google.common.base.Preconditions.checkNotNull; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.core.Naming; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.design.Design; import fr.ens.biologie.genomique.eoulsan.design.DesignFactory; import fr.ens.biologie.genomique.eoulsan.design.Sample; import fr.ens.biologie.genomique.eoulsan.design.SampleMetadata; /** * This class define a design reader for limma design files. * @since 1.0 * @author Laurent Jourdren */ public class Eoulsan1DesignReader implements DesignReader { private final static String TAB_SEPARATOR = "\t"; // For backward compatibility static final String SAMPLE_NUMBER_FIELD = "SampleNumber"; static final String SAMPLE_NAME_FIELD = "Name"; static final String FILENAME_FIELD = "FileName"; static final String READS_FIELD = "Reads"; static final String EXPERIMENT_FIELD = "Experiment"; private final InputStream is; private Map<String, String> defineDesignMetadataFields() { final Map<String, String> map = new HashMap<>(); map.put("Genome", GENOME_FILE_KEY); map.put("Annotation", GFF_FILE_KEY); map.put("AdditionalAnnotation", ADDITIONAL_ANNOTATION_FILE_KEY); map.put(GENOME_FILE_KEY, GENOME_FILE_KEY); map.put(GFF_FILE_KEY, GFF_FILE_KEY); map.put(GTF_FILE_KEY, GTF_FILE_KEY); map.put(ADDITIONAL_ANNOTATION_FILE_KEY, ADDITIONAL_ANNOTATION_FILE_KEY); return unmodifiableMap(map); } private Map<String, String> defineSampleMetadataFields() { final Map<String, String> map = new HashMap<>(); map.put(READS_FIELD, SampleMetadata.READS_KEY); map.put("FastqFormat", SampleMetadata.FASTQ_FORMAT_KEY); map.put("Condition", SampleMetadata.CONDITION_KEY); map.put("RepTechGroup", SampleMetadata.REP_TECH_GROUP_KEY); map.put("Reference", SampleMetadata.REFERENCE_KEY); map.put("UUID", SampleMetadata.UUID_KEY); map.put("Operator", SampleMetadata.OPERATOR_KEY); return unmodifiableMap(map); } @Override public Design read() throws IOException { final List<String> fieldnames = new ArrayList<>(); final Design design = DesignFactory.createEmptyDesign(); try (final BufferedReader br = new BufferedReader( new InputStreamReader(this.is, Globals.DEFAULT_CHARSET))) { final String separator = TAB_SEPARATOR; String line = null; boolean firstLine = true; // String ref = null; final Map<String, String> designMetadataFields = defineDesignMetadataFields(); final Map<String, String> sampleMetadataFields = defineSampleMetadataFields(); int idFieldIndex = -1; int nameFieldIndex = -1; int experimentFieldIndex = -1; while ((line = br.readLine()) != null) { final String empty = line.trim(); if ("".equals(empty) || empty.startsWith("#")) { continue; } final String[] fields = line.split(separator); if (firstLine) { for (int i = 0; i < fields.length; i++) { String field = fields[i].trim(); if ("".equals(field)) { throw new IOException( "Found an empty field name in design file header."); } // Compatibility with old design files if (field.equals(FILENAME_FIELD)) { field = SampleMetadata.READS_KEY; fields[i] = field; } if (fieldnames.contains(field)) { throw new IOException("There is two or more field \"" + field + "\" in design file header."); } fieldnames.add(field); switch (field) { case SAMPLE_NUMBER_FIELD: idFieldIndex = i; break; case SAMPLE_NAME_FIELD: nameFieldIndex = i; break; case EXPERIMENT_FIELD: experimentFieldIndex = i; break; default: break; } } if (idFieldIndex != 0) { throw new IOException("Invalid file format: " + "The \"" + SAMPLE_NUMBER_FIELD + "\" field is not the first field."); } if (nameFieldIndex != 1) { throw new IOException("Invalid file format: " + "The \"" + SAMPLE_NAME_FIELD + "\" field is not the second field."); } firstLine = false; } else { if (fields.length != fieldnames.size()) { throw new IOException("Invalid file format: " + "Found " + fields.length + " fields whereas " + fieldnames.size() + " are required in line: " + line); } Sample sample = null; for (int i = 0; i < fields.length; i++) { final String value = fields[i].trim(); final String fieldName = fieldnames.get(i); if (i == idFieldIndex) { // Do nothing for the SampleNumber field continue; } else if (i == nameFieldIndex) { // The Name filed sample = design.addSample(Naming.toValidName(value)); sample.setName(value); } else if (i == experimentFieldIndex) { // The Experiment field final String experimentId = Naming.toValidName(value); if (!design.containsExperiment(experimentId)) { design.addExperiment(experimentId); design.getExperiment(experimentId).setName(value); } design.getExperiment(value).addSample(sample); } else { // Other fields if (designMetadataFields.containsKey(fieldName)) { final String mdKey = designMetadataFields.get(fieldName); if (!design.getMetadata().contains(mdKey)) { design.getMetadata().set(mdKey, value); } } else { String mdKey; if (sampleMetadataFields.containsKey(fieldName)) { mdKey = sampleMetadataFields.get(fieldName); } else { mdKey = fieldName; } sample.getMetadata().set(mdKey, value); } } } } } } if (!fieldnames.contains(READS_FIELD)) { throw new IOException("Invalid file format: No Reads field"); } return design; } // // Constructors // /** * Public constructor. * @param file file to read * @throws FileNotFoundException if the file cannot be found */ public Eoulsan1DesignReader(final File file) throws FileNotFoundException { checkNotNull(file, "the file argument cannot be null"); this.is = new FileInputStream(file); } /** * Public constructor. * @param is Input stream to read * @throws IOException if an error occurs while opening the file */ public Eoulsan1DesignReader(final InputStream is) throws IOException { checkNotNull(is, "the is argument cannot be null"); this.is = is; } /** * Public constructor. * @param file file to read * @throws IOException if an error occurs while opening the file */ public Eoulsan1DesignReader(final DataFile file) throws IOException { checkNotNull(file, "the file argument cannot be null"); this.is = file.open(); } /** * Public constructor. * @param filename File to read * @throws FileNotFoundException if the file doesn't exist */ public Eoulsan1DesignReader(final String filename) throws FileNotFoundException { checkNotNull(filename, "the filename argument cannot be null"); this.is = new FileInputStream(filename); } }