/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.diffana;
import static com.google.common.base.Preconditions.checkNotNull;
import static fr.ens.biologie.genomique.eoulsan.design.DesignUtils.getAllSamplesMetadataKeys;
import static fr.ens.biologie.genomique.eoulsan.design.DesignUtils.getExperimentSampleAllMetadataKeys;
import static fr.ens.biologie.genomique.eoulsan.design.DesignUtils.referenceValueToInt;
import static fr.ens.biologie.genomique.eoulsan.util.StringUtils.toCompactTime;
import static java.util.Arrays.asList;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.core.Parameter;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.design.Design;
import fr.ens.biologie.genomique.eoulsan.design.DesignUtils;
import fr.ens.biologie.genomique.eoulsan.design.Experiment;
import fr.ens.biologie.genomique.eoulsan.design.ExperimentMetadata;
import fr.ens.biologie.genomique.eoulsan.design.ExperimentSample;
import fr.ens.biologie.genomique.eoulsan.design.ExperimentSampleMetadata;
import fr.ens.biologie.genomique.eoulsan.design.Sample;
import fr.ens.biologie.genomique.eoulsan.design.SampleMetadata;
import fr.ens.biologie.genomique.eoulsan.util.r.RExecutor;
/**
* This class contains methods to run the differential analysis module DESeq2.
* @author Xavier Bauquet
* @since 2.0
*/
public class DESeq2 {
private static final String CONDITION_COLUMN = "Condition";
private static final String REFERENCE_COLUMN_NAME = "Reference";
// R scripts path in JAR file
private static final String SCRIPTS_PATH_IN_JAR_FILE = "/DESeq2/";
private static final String NORM_DIFFANA_SCRIPT = "normDiffana.R";
private static final String BUILD_CONTRAST_SCRIPT = "buildContrast.R";
// Suffix for output files from DEseq2
private static final String DESEQ_DESIGN_FILE_SUFFIX = "-deseq2Design.txt";
private static final String COMPARISON_FILE_SUFFIX = "-comparisonFile.txt";
private static final String CONTRAST_FILE_SUFFIX = "-contrastFile.txt";
// Constants
private static final String SAMPLE_ID_FIELDNAME = "SampleId";
private static final String SAMPLE_NAME_FIELDNAME = "Name";
private static final String EXPRESSION_FILE_FIELDNAME = "expressionFile";
private static final String TAB_SEPARATOR = "\t";
private static final String NEWLINE = "\n";
// The experiment
private final Experiment experiment;
// List of expression files
final Map<String, File> sampleFiles;
// The design
private final Design design;
// Workflow options for DEseq2
private final boolean normFig;
private final boolean diffanaFig;
private final boolean normDiffana;
private final boolean diffana;
private final DESeq2.SizeFactorsType sizeFactorsType;
private final DESeq2.FitType fitType;
private final DESeq2.StatisticTest statisticTest;
// Design options for DEseq2
private final String model;
private final boolean contrast;
private final boolean buildContrast;
private final DataFile contrastFile;
private final boolean expHeader = true;
// Files and file names
private final RExecutor executor;
private final boolean saveRScripts;
private final String stepId;
// Temporary expression filenames
private Map<String, String> sampleFilenames = new HashMap<>();
//
// Enums
//
/***
* Enum for the sizeFactorsType option in DESeq2 related to the estimation of
* the size factor.
*/
public enum SizeFactorsType {
RATIO, ITERATE;
/**
* Get the size factors type to be used in DESeq2.
* @param parameter Eoulsan parameter
* @return the size factors type value
* @throws EoulsanException if the size factors type value is different from
* ratio or iterate
*/
public static SizeFactorsType get(final Parameter parameter)
throws EoulsanException {
checkNotNull(parameter, "parameter argument cannot be null");
final String lowerName = parameter.getLowerStringValue().trim();
for (SizeFactorsType dem : SizeFactorsType.values()) {
if (dem.name().toLowerCase().equals(lowerName)) {
return dem;
}
}
throw new EoulsanException("The value: "
+ parameter.getValue() + " is not an acceptable value for the "
+ parameter.getName() + " parameter.");
}
/**
* Convert the enum name into DESeq2 value.
* @return DESeq2 value
*/
public String toDESeq2Value() {
return this.name().toLowerCase();
}
}
/**
* Enum for the fitType option in DESeq2 related to the dispersion estimation.
*/
public enum FitType {
PARAMETRIC, LOCAL, MEAN;
/**
* Get the fit type to be used in DESeq2.
* @param name name of the enum
* @return the fit type value
* @throws EoulsanException if the fit type value is different from
* parametric, local or mean
*/
public static FitType get(final String name) throws EoulsanException {
checkNotNull(name, "fitType argument cannot be null");
final String lowerName = name.trim().toLowerCase();
for (FitType dem : FitType.values()) {
if (dem.name().toLowerCase().equals(lowerName)) {
return dem;
}
}
throw new EoulsanException("The value: "
+ name + " is not an acceptable value for the fitType option.");
}
/**
* Convert the enum name into DESeq2 value.
* @return DESeq2 value
*/
public String toDESeq2Value() {
return this.name().toLowerCase();
}
}
/**
* Enum for the statisticTest option in DESeq2 related to the statistic test
* to be used during the differential expression analysis
*/
public enum StatisticTest {
WALD("Wald"), LRT("LRT");
private String name;
public String toDESeq2Value() {
return name;
}
/**
* Get the statistic test to be used in DESeq2.
* @param name name of the enum
* @return the statistic test value
* @throws EoulsanException if the statistic test value is different from
* Wald or LRT
*/
public static StatisticTest get(final String name) throws EoulsanException {
checkNotNull(name, "statisticTest cargument annot be null");
final String lowerName = name.trim().toLowerCase();
for (StatisticTest dem : StatisticTest.values()) {
if (dem.toDESeq2Value().toLowerCase().equals(lowerName)) {
return dem;
}
}
throw new EoulsanException("The value: "
+ name + " is not an acceptable value for the statisticTest option.");
}
/**
* Constructor.
* @param method, dispersion estimation method
*/
StatisticTest(final String method) {
this.name = method;
}
}
/**
* Put sample files.
* @throws IOException if an error occurs while putting sample files
*/
private void putSampleFiles() throws IOException {
for (Sample sample : experiment.getSamples()) {
// Check if the expression file related to the sample exist
if (!this.sampleFiles.containsKey(sample.getId())) {
continue;
}
final String key = sample.getId();
final DataFile inputFile =
new DataFile(this.sampleFiles.get(key).getAbsolutePath());
final String outputFilename = "expression-" + key + ".tsv";
this.executor.putInputFile(inputFile, outputFilename);
this.sampleFilenames.put(key, outputFilename);
}
}
/**
* Generate DESeq2 design.
* @return the DESeq2 in a string
*/
private String generateDeseq2Design() {
final StringBuilder sb = new StringBuilder();
//
// Print column names
//
sb.append(SAMPLE_ID_FIELDNAME);
sb.append(TAB_SEPARATOR);
sb.append(SAMPLE_NAME_FIELDNAME);
sb.append(TAB_SEPARATOR);
sb.append(EXPRESSION_FILE_FIELDNAME);
// Get the common column names
final List<String> sampleMDKeys = getAllSamplesMetadataKeys(design);
// Get the experiment column names
final List<String> experimentMDKeys =
getExperimentSampleAllMetadataKeys(experiment);
// Get Experiment reference
final String experimentReference = experiment.getMetadata().getReference();
final boolean referenceColumn = experimentReference != null
|| sampleMDKeys.contains(SampleMetadata.REFERENCE_KEY)
|| experimentMDKeys.contains(ExperimentSampleMetadata.REFERENCE_KEY);
// Print common column names
for (String key : sampleMDKeys) {
if (!experimentMDKeys.contains(key)) {
// The reference column will be the last column
if (SampleMetadata.REFERENCE_KEY.equals(key)) {
continue;
}
sb.append(TAB_SEPARATOR);
sb.append(key);
}
}
// Print experiment column names
for (String key : experimentMDKeys) {
// The reference column will be the last column
if (ExperimentSampleMetadata.REFERENCE_KEY.equals(key)) {
continue;
}
sb.append(TAB_SEPARATOR);
sb.append(key);
}
// Add the column reference when the reference option is on the experiment
// metadata
if (referenceColumn) {
sb.append(TAB_SEPARATOR);
sb.append(REFERENCE_COLUMN_NAME);
}
sb.append(NEWLINE);
// Print sample metadata
for (Sample sample : experiment.getSamples()) {
// Check if the expression file related to the sample exist
if (!this.sampleFilenames.containsKey(sample.getId())) {
continue;
}
sb.append(sample.getId());
sb.append(TAB_SEPARATOR);
sb.append(sample.getName());
sb.append(TAB_SEPARATOR);
sb.append(this.sampleFilenames.get(sample.getId()));
final SampleMetadata smd = sample.getMetadata();
for (String key : sampleMDKeys) {
if (!experimentMDKeys.contains(key)) {
// The reference column will be the last column
if (SampleMetadata.REFERENCE_KEY.equals(key)) {
continue;
}
sb.append(TAB_SEPARATOR);
if (smd.contains(key)) {
sb.append(smd.get(key));
}
}
}
// print experiment sample metadata
final ExperimentSample es = experiment.getExperimentSample(sample);
final ExperimentSampleMetadata expSampleMetadata = es.getMetadata();
for (String key : experimentMDKeys) {
// The reference column will be the last column
if (ExperimentSampleMetadata.REFERENCE_KEY.equals(key)) {
continue;
}
sb.append(TAB_SEPARATOR);
if (expSampleMetadata.contains(key)) {
sb.append(expSampleMetadata.get(key));
}
}
// Add reference column
if (referenceColumn) {
sb.append(TAB_SEPARATOR);
sb.append(referenceValueToInt(DesignUtils.getReference(es),
experimentReference));
}
sb.append(NEWLINE);
}
return sb.toString();
}
/**
* Generate comparison file content.
* @return a String with the comparison file content
* @throws EoulsanException if the format of one of the comparison entries of
* the design file is invalid
*/
private String generateComparisonFileContent() throws EoulsanException {
final StringBuilder sb = new StringBuilder();
for (String c : experiment.getMetadata().getComparisons().split(";")) {
final String[] splitC = c.split(":");
if (splitC.length != 2) {
throw new EoulsanException("Invalid comparison entry format: " + c);
}
sb.append(splitC[0].trim());
sb.append(TAB_SEPARATOR);
sb.append(splitC[1].trim());
sb.append(NEWLINE);
}
return sb.toString();
}
/**
* Create the command line to run normDiffana.R.
* @return the command line to run normDiffana.R
*/
private String[] createNormDiffanaCommandLine(
final String deseq2DesignFileName, final String contrastFilename) {
final List<String> command = new ArrayList<>();
command.addAll(asList(booleanParameter(normFig), booleanParameter(diffana),
booleanParameter(diffanaFig)));
// Define contrast file
if (contrast) {
// add the default name of the contrast file if not an other is define
command.add(booleanParameter(contrast));
} else {
// add FALSE if the contrast parameter is at false
command.add(booleanParameter(false));
}
command.addAll(asList(deseq2DesignFileName, this.model,
this.experiment.getName(), booleanParameter(this.expHeader),
this.sizeFactorsType.toDESeq2Value(), this.fitType.toDESeq2Value(),
this.statisticTest.toDESeq2Value(), contrastFilename,
this.stepId + "_"));
return command.toArray(new String[command.size()]);
}
/**
* Transform boolean for DEseq2 command line.
* @param value boolean
* @return boolean for DEseq2 command line
*/
private static String booleanParameter(boolean value) {
return Boolean.valueOf(value).toString().toUpperCase();
}
/**
* Check experiment design.
* @throws EoulsanException if the experiment design is not correct
*/
private void checkExperimentDesign() throws EoulsanException {
final ExperimentMetadata emd = experiment.getMetadata();
if (emd.containsComparisons()) {
// Check if the comparison value is correct
for (String c : emd.getComparisons().split(";")) {
String[] splitC = c.split(":");
if (splitC.length != 2) {
throw new EoulsanException("Error in "
+ experiment.getName()
+ " experiment, comparison cannot have more than 1 value: " + c);
}
}
}
// Check if the column Condition is missing for the experiment
if (!getExperimentSampleAllMetadataKeys(experiment)
.contains(CONDITION_COLUMN)
&& !getAllSamplesMetadataKeys(design).contains(CONDITION_COLUMN)) {
throw new EoulsanException("Condition column missing for experiment: "
+ this.experiment.getName());
}
}
/**
* Method to run DESeq2.
* @param workflowOutputDir workflow output directory
* @throws IOException if writeDeseq2Design fails
* @throws EoulsanException if the comparisons value is not correct
*/
public void runDEseq2(final DataFile workflowOutputDir) throws IOException, EoulsanException {
final String prefix = this.stepId + "_" + this.experiment.getName();
// Define design filename
final String deseq2DesignFileName = prefix + DESEQ_DESIGN_FILE_SUFFIX;
// Define comparison filename
final String comparisonFileName = prefix + COMPARISON_FILE_SUFFIX;
// Define contrast filename
final String contrastFilename = prefix + CONTRAST_FILE_SUFFIX;
// Check experiment design
checkExperimentDesign();
// Open executor connection
this.executor.openConnection();
// Put Sample files
putSampleFiles();
// Write the deseq2 design
this.executor.writerFile(generateDeseq2Design(), deseq2DesignFileName);
// Copy contrast file
if (this.contrastFile != null) {
this.executor.putInputFile(this.contrastFile, contrastFilename);
}
// Build the contrast file
if (this.buildContrast) {
if (!this.experiment.getMetadata().containsComparisons()) {
throw new EoulsanException(
"No comparison defined to build the constrasts in experiment: "
+ this.experiment.getName());
}
// Write the comparison file from the Eoulsan design (experiment metadata)
this.executor.writerFile(generateComparisonFileContent(),
comparisonFileName);
// Read build contrast R script
final String buildContrastScript =
readFromJar(SCRIPTS_PATH_IN_JAR_FILE + BUILD_CONTRAST_SCRIPT);
// Set the description of the analysis
final String description = this.stepId
+ "_" + this.experiment.getName() + "-buildcontrasts-"
+ toCompactTime(System.currentTimeMillis());
// Run buildContrast.R
this.executor.executeRScript(buildContrastScript, false, null,
this.saveRScripts, description, workflowOutputDir, deseq2DesignFileName, this.model,
comparisonFileName, this.experiment.getName() + CONTRAST_FILE_SUFFIX,
this.stepId + "_");
}
// Run normalization and differential analysis
if (this.normDiffana) {
// Read build contrast R script
final String normDiffanaScript =
readFromJar(SCRIPTS_PATH_IN_JAR_FILE + NORM_DIFFANA_SCRIPT);
// Set the description of the analysis
final String description = this.stepId
+ "_" + this.experiment.getName() + "-normdiffana-"
+ toCompactTime(System.currentTimeMillis());
// TODO Do not handle custom contrast files with
// ExperimentMetadata.containsContrastFile()
// Run normDiffana.R
this.executor.executeRScript(normDiffanaScript, false, null,
this.saveRScripts, description, workflowOutputDir,
createNormDiffanaCommandLine(deseq2DesignFileName, contrastFilename));
}
// Remove input files
this.executor.removeInputFiles();
// Retrieve output files
this.executor.getOutputFiles();
// Close executor connection
this.executor.closeConnection();
}
//
// Static methods
//
/**
* Read a file from the Jar.
* @param filePathInJar, path to the file to copy from the Jar
* @return a String with the content of the file
* @throws IOException if reading fails
*/
private static String readFromJar(final String filePathInJar)
throws IOException {
final InputStream is = DESeq2.class.getResourceAsStream(filePathInJar);
final StringBuilder sb = new StringBuilder();
String line = null;
try (
BufferedReader reader = new BufferedReader(new InputStreamReader(is))) {
while ((line = reader.readLine()) != null) {
sb.append(line);
sb.append('\n');
}
}
return sb.toString();
}
//
// Construtors
//
/**
* Public constructor.
* @param stepId the step id
* @param design the Eoulsan design
* @param experiment the experiment
* @param sampleFiles the list of expression files
* @param normFig normFig DESeq2 option
* @param diffanaFig diffanaFig DESeq2 option
* @param normDiffana normDiffana DESeq2 option
* @param diffana diffana DESeq2 option
* @param sizeFactorsType sizeFactorsType DESeq2 option
* @param fitType fitType DESeq2 option
* @param statisticTest statisticTest DESeq2 option
*/
public DESeq2(final RExecutor executor, final String stepId,
final Design design, final Experiment experiment,
final Map<String, File> sampleFiles, final boolean normFig,
final boolean diffanaFig, final boolean normDiffana,
final boolean diffana, final SizeFactorsType sizeFactorsType,
final FitType fitType, final StatisticTest statisticTest,
boolean saveRScripts) {
checkNotNull(stepId, "stepId argument cannot be null");
checkNotNull(executor, "executor argument cannot be null");
checkNotNull(design, "design argument cannot be null");
checkNotNull(experiment, "experiment argument cannot be null");
checkNotNull(sampleFiles, "sampleFiles argument cannot be null");
checkNotNull(sizeFactorsType, "sizeFactorsType argument cannot be null");
checkNotNull(fitType, "fitType argument cannot be null");
checkNotNull(statisticTest, "statisticTest argument cannot be null");
this.stepId = stepId;
this.executor = executor;
this.saveRScripts = saveRScripts;
this.design = design;
this.experiment = experiment;
this.sampleFiles = sampleFiles;
ExperimentMetadata expMD = experiment.getMetadata();
// Get model option
if (expMD.containsModel()) {
this.model = expMD.getModel();
} else {
this.model = "~Condition";
}
// Get contrast option
if (expMD.containsContrast()) {
this.contrast = expMD.isContrast();
} else if (expMD.containsContrastFile()) {
this.contrast = true;
} else {
this.contrast = false;
}
// Get designFile option
if (expMD.containsContrastFile()) {
this.contrastFile = new DataFile(expMD.getContrastFile());
this.buildContrast = false;
} else {
this.contrastFile = null;
// Get buildContrast option
if (expMD.containsBuildContrast()) {
this.buildContrast = expMD.isBuildContrast();
} else {
this.buildContrast = false;
}
}
// Workflow options for DEseq2
this.normFig = normFig;
this.diffanaFig = diffanaFig;
this.normDiffana = normDiffana;
this.diffana = diffana;
this.sizeFactorsType = sizeFactorsType;
this.fitType = fitType;
this.statisticTest = statisticTest;
}
}