/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.design; import static com.google.common.base.Preconditions.checkNotNull; import java.io.IOException; import java.io.InputStream; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFormatRegistry; import fr.ens.biologie.genomique.eoulsan.design.io.DefaultDesignReader; import fr.ens.biologie.genomique.eoulsan.design.io.DesignReader; /** * Utils methods for Design. * @since 1.0 * @author Laurent Jourdren * @author Xavier Bauquet */ public final class DesignUtils { /** * Show a design * @param design Design to show */ public static void showDesign(final Design design) { checkNotNull(design, "design argument cannot be null"); final StringBuilder sb = new StringBuilder(); // Print the name and number of the design sb.append("Design: "); sb.append(design.getName()); sb.append(" ("); sb.append(design.getNumber()); sb.append(")\n"); // Print design metadata sb.append("Design metadata:\n"); for (Map.Entry<String, String> e : design.getMetadata().entrySet()) { sb.append('\t'); sb.append(e.getKey()); sb.append('='); sb.append(e.getValue()); sb.append('\n'); } sb.append('\n'); // Print experiment metadata sb.append("Experiments:\n"); for (Experiment e : design.getExperiments()) { final String expId = e.getId(); for (Map.Entry<String, String> m : e.getMetadata().entrySet()) { sb.append('\t'); sb.append("Exp."); sb.append(expId); sb.append("."); sb.append(m.getKey()); sb.append('='); sb.append(m.getValue()); sb.append('\n'); } sb.append('\n'); } sb.append('\n'); // // Print column names // sb.append("SampleId"); sb.append('\t'); sb.append("SampleNumber"); sb.append('\t'); sb.append("SampleName"); final List<String> sampleMDKeys = getAllSamplesMetadataKeys(design); // Print common column names for (String key : sampleMDKeys) { sb.append('\t'); sb.append(key); } // Print experiments column names for (Experiment experiment : design.getExperiments()) { final String prefix = "Exp." + experiment.getId() + "."; final List<String> experimentMDKeys = getExperimentSampleAllMetadataKeys(experiment); for (String key : experimentMDKeys) { sb.append('\t'); sb.append(prefix); sb.append(key); } } sb.append('\n'); // Print samples metadata for (Sample sample : design.getSamples()) { sb.append(sample.getId()); sb.append('\t'); sb.append(sample.getNumber()); sb.append('\t'); sb.append(sample.getName()); final SampleMetadata smd = sample.getMetadata(); for (String key : sampleMDKeys) { sb.append('\t'); if (smd.contains(key)) { sb.append(smd.get(key)); } } for (Experiment experiment : design.getExperiments()) { final ExperimentSampleMetadata expSampleMetadata = experiment.getExperimentSample(sample).getMetadata(); final List<String> experimentMDKeys = getExperimentSampleAllMetadataKeys(experiment); for (String key : experimentMDKeys) { sb.append('\t'); if (expSampleMetadata.contains(key)) { sb.append(expSampleMetadata.get(key)); } } } sb.append('\n'); } System.out.println(sb.toString()); } /** * Get all the sample metadata keys of the samples of a design. * @param design the design * @return a list with the sample metadata keys of the samples of a design */ public static List<String> getAllSamplesMetadataKeys(final Design design) { checkNotNull(design, "design argument cannot be null"); final List<String> result = new ArrayList<>(); final Set<String> keys = new HashSet<>(); for (Sample sample : design.getSamples()) { for (String key : sample.getMetadata().keySet()) { if (keys.contains(key)) { continue; } keys.add(key); result.add(key); } } return Collections.unmodifiableList(result); } /** * Get all the experiment metadata keys of the samples of a design. * @param experiment the experiment * @return a list with the experiment metadata keys of the samples of a design */ public static List<String> getExperimentSampleAllMetadataKeys( final Experiment experiment) { checkNotNull(experiment, "design argument cannot be null"); final List<String> result = new ArrayList<>(); final Set<String> keys = new HashSet<>(); for (ExperimentSample sample : experiment.getExperimentSamples()) { for (String key : sample.getMetadata().keySet()) { if (keys.contains(key)) { continue; } keys.add(key); result.add(key); } } return Collections.unmodifiableList(result); } // // Constructor // /** * Private constructor. */ private DesignUtils() { } /** * Check if there are duplicate samples in the design. * @param design Design to test * @return if there are no duplicate */ public static boolean checkSamples(final Design design) { final Set<String> samplesSources = new HashSet<>(); for (Sample s : design.getSamples()) { for (String fileSource : s.getMetadata().getReads()) { if (samplesSources.contains(fileSource)) { return false; } samplesSources.add(fileSource); } } return true; } /** * Check if there are duplicate samples in the design. * @param design Design to test * @return if there are no duplicate * @throws EoulsanException if a source is a duplicate */ private static boolean checkSamplesWithException(final Design design) throws EoulsanException { final Set<String> samplesSources = new HashSet<>(); for (Sample s : design.getSamples()) { for (String fileSource : s.getMetadata().getReads()) { if (samplesSources.contains(fileSource)) { throw new EoulsanException( "Error: The design contains one or more duplicate sample sources: " + fileSource + " (sample " + s.getId() + ")"); } samplesSources.add(fileSource); } } return true; } /** * Check if there is more than one genome in the design * @param design Design to test * @return true if there is more than one genome in the genome */ public static boolean checkGenomes(final Design design) { return design.getMetadata().containsGenomeFile(); } /** * Check if there is more than one annotation in the design * @param design Design to test * @return true if there is more than one annotation in the genome */ public static boolean checkAnnotations(final Design design) { return design.getMetadata().containsGffFile(); } /** * Read and Check design * @param is InputStream for the design * @return a Design object * @throws EoulsanException if an error occurs while reading the design */ public static Design readAndCheckDesign(final InputStream is) throws EoulsanException { try { final DesignReader dr = new DefaultDesignReader(is); final Design design = dr.read(); DesignUtils.checkSamplesWithException(design); if (!DesignUtils.checkGenomes(design)) { throw new EoulsanException( "Warning: The design contains more than one genome file."); } if (!DesignUtils.checkAnnotations(design)) { throw new EoulsanException( "Warning: The design contains more than one annotation file."); } return design; } catch (IOException e) { throw new EoulsanException(e); } } /** * Remove optional description fields and obfuscate condition field. * @param design design object to obfuscate * @param removeReplicateInformation if replicate information must be removed */ public static void obfuscate(final Design design, final boolean removeReplicateInformation) { if (design == null) { return; } removeSampleMedataIfExists(design, SampleMetadata.COMMENT_KEY); removeSampleMedataIfExists(design, SampleMetadata.DATE_KEY); removeSampleMedataIfExists(design, SampleMetadata.OPERATOR_KEY); if (removeReplicateInformation) { removeExperimentSampleMedataIfExists(design, ExperimentSampleMetadata.CONDITION_KEY); removeExperimentSampleMedataIfExists(design, ExperimentSampleMetadata.REP_TECH_GROUP_KEY); removeExperimentSampleMedataIfExists(design, ExperimentSampleMetadata.REFERENCE_KEY); } final Map<Experiment, Integer> mapExperiment = new HashMap<>(); final Map<String, Integer> mapCondition = new HashMap<>(); final Map<String, Integer> mapRepTechGroup = new HashMap<>(); int countExperiment = 0; int countCondition = 0; int countRepTechGroup = 0; for (Experiment exp : design.getExperiments()) { if (!mapExperiment.containsKey(exp)) { mapExperiment.put(exp, ++countExperiment); } exp.setName("e" + mapExperiment.get(exp)); for (ExperimentSample es : exp.getExperimentSamples()) { ExperimentSampleMetadata esmd = es.getMetadata(); // Obfuscate Condition field if (esmd.containsCondition()) { final String condition = esmd.getCondition(); if (!mapCondition.containsKey(condition)) { mapCondition.put(condition, ++countCondition); } esmd.setCondition("c" + mapCondition.get(condition)); } // Obfuscate RepTechGroup field if (esmd.containsRepTechGroup()) { final String rtg = esmd.getRepTechGroup(); if (!mapRepTechGroup.containsKey(rtg)) { mapRepTechGroup.put(rtg, ++countRepTechGroup); } esmd.setRepTechGroup("g" + mapRepTechGroup.get(rtg)); } } } for (Sample s : design.getSamples()) { final String newSampleName = "s" + s.getId(); if (!newSampleName.equals(s.getName())) { s.setName(newSampleName); } } } private static void removeSampleMedataIfExists(final Design design, final String fieldName) { if (design == null || fieldName == null) { return; } for (Sample sample : design.getSamples()) { SampleMetadata smd = sample.getMetadata(); if (smd.contains(fieldName)) { smd.remove(fieldName); } } } private static void removeExperimentSampleMedataIfExists(final Design design, final String fieldName) { if (design == null || fieldName == null) { return; } for (Experiment experiment : design.getExperiments()) { for (ExperimentSample expSample : experiment.getExperimentSamples()) { ExperimentSampleMetadata esmd = expSample.getMetadata(); if (esmd.contains(fieldName)) { esmd.remove(fieldName); } } } } /** * Replace the local paths in the design by paths to symbolic links in a * directory. * @param design Design object to modify * @param symlinksDir path to the directory where create symbolic links * @throws IOException if an error occurs while creating symbolic links of if * a path the design file does not exists */ public static void replaceLocalPathBySymlinks(final Design design, final DataFile symlinksDir) throws IOException { if (design == null) { return; } final DataFormatRegistry registry = DataFormatRegistry.getInstance(); // // Design metadata // final List<String> designKeysToModify = new ArrayList<>(); for (String field : design.getMetadata().keySet()) { if (registry.getDataFormatForDesignMetadata(field) != null) { designKeysToModify.add(field); } } final DesignMetadata dmd = design.getMetadata(); for (final String field : designKeysToModify) { dmd.set(field, replaceLocalPathBySymlinks(dmd.getAsList(field), symlinksDir)); } // // Sample metadata // final Set<String> sampleKeysToModify = new HashSet<>(); for (final Sample s : design.getSamples()) { for (String field : s.getMetadata().keySet()) { if (registry.getDataFormatForSampleMetadata(field) != null) { sampleKeysToModify.add(field); } } } for (final Sample s : design.getSamples()) { final SampleMetadata smd = s.getMetadata(); for (final String field : sampleKeysToModify) { smd.set(field, replaceLocalPathBySymlinks(smd.getAsList(field), symlinksDir)); } } } private static List<String> replaceLocalPathBySymlinks(List<String> values, final DataFile symlinksDir) throws IOException { final List<String> result = new ArrayList<>(); for (String inputPath : values) { final DataFile inFile = new DataFile(inputPath); if (inFile.isLocalFile()) { final DataFile outFile = new DataFile(symlinksDir, inFile.getName()); if (!inFile.exists()) { throw new IOException("File not exists: " + inFile); } if (outFile.exists()) { throw new IOException( "The symlink to create, already exists: " + outFile); } try { inFile.symlink(outFile); } catch (IOException e) { throw new IOException("Cannot create symlink: " + outFile, e); } result.add(inFile.getName()); } else { result.add(inputPath); } } return result; } /** * Get the Condition metadata value for an experimentSample. First look in * @param experiment the experiment * @param sample the sample * @return the Condition value */ public static String getCondition(final Experiment experiment, final Sample sample) { checkNotNull(experiment, "experiment argument cannot be null"); checkNotNull(sample, "sample argument cannot be null"); final ExperimentSample es = experiment.getExperimentSample(sample); return getCondition(es); } /** * Get the Condition metadata value for an experimentSample. First look in * @param experimentSample the experiment sample * @return the Condition value */ public static String getCondition(final ExperimentSample experimentSample) { checkNotNull(experimentSample, "experimentSample argument cannot be null"); final ExperimentSampleMetadata esm = experimentSample.getMetadata(); if (esm.containsCondition()) { return esm.getCondition(); } final SampleMetadata sm = experimentSample.getSample().getMetadata(); final String result = sm.getCondition(); return result == null ? null : result.trim(); } /** * Get the RepTechGroup metadata value for an experimentSample. First look in * @param experiment the experiment * @param sample the sample * @return the Condition value */ public static String getRepTechGroup(final Experiment experiment, final Sample sample) { checkNotNull(experiment, "experiment argument cannot be null"); checkNotNull(sample, "sample argument cannot be null"); final ExperimentSample es = experiment.getExperimentSample(sample); return getRepTechGroup(es); } /** * Get the Condition metadata value for an experimentSample. First look in * @param experimentSample the experiment sample * @return the Condition value */ public static String getRepTechGroup( final ExperimentSample experimentSample) { checkNotNull(experimentSample, "experimentSample argument cannot be null"); final ExperimentSampleMetadata esm = experimentSample.getMetadata(); if (esm.containsRepTechGroup()) { return esm.getRepTechGroup(); } final SampleMetadata sm = experimentSample.getSample().getMetadata(); final String result = sm.getRepTechGroup(); return result == null ? null : result.trim(); } /** * Test if an experiement is skipped. * @param experiment the experiment * @return true if the experiment must be skipped */ public static boolean isSkipped(final Experiment experiment) { checkNotNull(experiment, "experiment argument cannot be null"); final ExperimentMetadata emd = experiment.getMetadata(); return emd.containsSkip() && emd.isSkip(); } /** * Test if an experiment contains reference fields * @return true if an experiment contains reference fields */ public static boolean containsReferenceField(final Experiment experiment) { checkNotNull(experiment, "experiment argument cannot be null"); for (ExperimentSample es : experiment.getExperimentSamples()) { final ExperimentSampleMetadata esmd = es.getMetadata(); if (esmd.containsReference()) { return true; } final SampleMetadata smd = es.getSample().getMetadata(); if (smd.containsReference()) { return true; } } return false; } /** * Get the reference of a sample. * @param experiment the experiment * @param sample the sample * @return the reference of a sample */ public static String getReference(final Experiment experiment, final Sample sample) { checkNotNull(experiment, "experiment argument cannot be null"); checkNotNull(sample, "sample argument cannot be null"); final ExperimentSample es = experiment.getExperimentSample(sample); return getReference(es); } /** * Get the reference of a sample. * @param experimentSample the experiment sample * @return the reference of a sample */ public static String getReference(final ExperimentSample experimentSample) { checkNotNull(experimentSample, "experimentSample argument cannot be null"); final ExperimentSampleMetadata esmd = experimentSample.getMetadata(); if (esmd.containsReference()) { return esmd.getReference(); } final SampleMetadata smd = experimentSample.getSample().getMetadata(); if (smd.containsReference()) { return smd.getReference(); } return null; } /** * Convert a reference value to an integer. * @param value the reference value * @return an integer */ public static int referenceValueToInt(final String value, final String experiementReference) { if (value == null) { return 0; } final String s = value.trim(); if (s.equals(experiementReference)) { return 1; } switch (s.toLowerCase()) { case "t": case "true": case "y": case "yes": return 1; default: try { return Integer.parseInt(s); } catch (NumberFormatException e) { return 0; } } } }