/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.diffana;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.core.TaskContext;
import fr.ens.biologie.genomique.eoulsan.design.Design;
import fr.ens.biologie.genomique.eoulsan.design.DesignUtils;
import fr.ens.biologie.genomique.eoulsan.design.Experiment;
import fr.ens.biologie.genomique.eoulsan.design.ExperimentSample;
import fr.ens.biologie.genomique.eoulsan.design.Sample;
import fr.ens.biologie.genomique.eoulsan.util.r.RExecutor;
/**
* This class create and launch a R script to compute differential analysis.
* @since 1.0
* @author Laurent Jourdren
* @author Vivien Deshaies
*/
public class DiffAna extends Normalization {
private static final String DISPERSION_ESTIMATION =
"/DESeq1/dispersionEstimation.Rnw";
private static final String ANADIFF_WITH_REFERENCE =
"/DESeq1/anadiffWithReference.Rnw";
private static final String ANADIFF_WITHOUT_REFERENCE =
"/DESeq1/anadiffWithoutReference.Rnw";
// dispersion estimation parameters
private DispersionMethod dispEstMethod;
private DispersionFitType dispEstFitType;
private DispersionSharingMode dispEstSharingMode;
//
// enums
//
/**
* Dispersion estimation method enum for DESeq differential analysis
*/
public enum DispersionMethod {
POOLED("pooled"), PER_CONDITION("per-condition"), BLIND("blind");
private String name;
/**
* Get the dispersion estimation method
* @return a string with the dispersion estimation method
*/
public String getName() {
return this.name;
}
/**
* Get the Dispersion estimation method form its name
* @param name dispersion estimation method name
* @return a DispersionMethod or null if no DispersionMethod found for the
* name
*/
public static DispersionMethod getDispEstMethodFromName(final String name) {
if (name == null) {
return null;
}
final String lowerName = name.trim().toLowerCase();
for (DispersionMethod dem : DispersionMethod.values()) {
if (dem.getName().toLowerCase().equals(lowerName)) {
return dem;
}
}
return null;
}
/**
* Constructor
* @param method dispersion estimation method
*/
DispersionMethod(final String method) {
this.name = method;
}
}
/**
* Dispersion estimation sharingMode enum for DESeq differential analysis
*/
public enum DispersionSharingMode {
FIT_ONLY("fit-only"), MAXIMUM("maximum"), GENE_EST_ONLY("gene-est-only");
private String name;
/**
* Get the dispersion estimation sharingMode name
* @return a string with the dispersion estimation sharingMode name
*/
public String getName() {
return this.name;
}
/**
* Get the Dispersion estimation sharing mode form its name
* @param name dispersion estimation sharing mode name
* @return a DispersionSharingMode or null if no DispersionSharingMode found
* for the name
*/
public static DispersionSharingMode getDispEstSharingModeFromName(
final String name) {
if (name == null) {
return null;
}
final String lowerName = name.trim().toLowerCase();
for (DispersionSharingMode desm : DispersionSharingMode.values()) {
if (desm.getName().toLowerCase().equals(lowerName)) {
return desm;
}
}
return null;
}
/**
* Constructor
* @param name dispersion estimation sharingMode name
*/
DispersionSharingMode(final String name) {
this.name = name;
}
}
/**
* Dispersion estimation fitType enum for DESeq differential analysis
*/
public enum DispersionFitType {
PARAMETRIC("parametric"), LOCAL("local");
private String name;
/**
* Get the dispersion estimation fitType name
* @return a string with the dispersion estimation fitType name
*/
public String getName() {
return this.name;
}
/**
* Get the Dispersion estimation fit type form its name
* @param name dispersion estimation fit type name
* @return a DispersionFitType or null if no DispersionFitType found for the
* name
*/
public static DispersionFitType getDispEstFitTypeFromName(
final String name) {
if (name == null) {
return null;
}
final String lowerName = name.trim().toLowerCase();
for (DispersionFitType deft : DispersionFitType.values()) {
if (deft.getName().toLowerCase().equals(lowerName)) {
return deft;
}
}
return null;
}
/**
* Constructor
* @param name dispersion estimation fitType name
*/
DispersionFitType(final String name) {
this.name = name;
}
}
//
// Protected methods
//
@Override
protected String generateScript(final Experiment experiment,
final TaskContext context) throws EoulsanException {
final String comparison = experiment.getMetadata().getComparisons();
// Check if user use the comparison property of the experiment
if (comparison != null && !comparison.trim().isEmpty()) {
throw new EoulsanException(
"The diffana step do not actually handle the comparison property of an experiment");
}
final Map<String, List<Integer>> conditionsMap = new HashMap<>();
final List<Integer> rSampleIds = new ArrayList<>();
final List<String> rSampleNames = new ArrayList<>();
final List<String> rCondNames = new ArrayList<>();
final List<String> rRepTechGroup = new ArrayList<>();
int i = 0;
// Get samples ids, conditions names/indexes and repTechGoups
for (Sample s : experiment.getSamples()) {
final String condition = DesignUtils.getCondition(experiment, s);
if (condition == null) {
throw new EoulsanException("No condition field found in design file.");
}
if ("".equals(condition)) {
throw new EoulsanException("No value for condition in sample: "
+ s.getName() + " (" + s.getId() + ")");
}
final String repTechGroup = DesignUtils.getRepTechGroup(experiment, s);
if (repTechGroup != null && !"".equals(repTechGroup)) {
rRepTechGroup.add(repTechGroup);
}
final List<Integer> index;
if (!conditionsMap.containsKey(condition)) {
index = new ArrayList<>();
conditionsMap.put(condition, index);
} else {
index = conditionsMap.get(condition);
}
index.add(i);
rSampleIds.add(s.getNumber());
rSampleNames.add(s.getName());
rCondNames.add(condition);
i++;
}
checkRepTechGroupCoherence(rRepTechGroup, rCondNames);
// Create Rnw script stringbuilder with preamble
String pdfTitle =
escapeUnderScore(experiment.getName()) + " differential analysis";
String filePrefix = "diffana_" + escapeUnderScore(experiment.getName());
final StringBuilder sb =
generateRnwpreamble(experiment.getSamples(), pdfTitle, filePrefix);
/*
* Replace "na" values of repTechGroup by unique sample ids to avoid pooling
* problem while executing R script
*/
replaceRtgNA(rRepTechGroup, rSampleNames);
// Add reference if there is one
writeReferenceField(experiment, sb);
// Add sampleNames vector
generateSampleNamePart(rSampleNames, sb);
// Add SampleIds vector
generateSampleIdsPart(rSampleIds, sb);
// Add file names vector
generateExpressionFileNamesPart(sb);
// Add repTechGroupVector
generateRepTechGroupPart(rRepTechGroup, sb);
// Add condition to R script
generateConditionPart(rCondNames, sb);
// Add projectPath, outPath and projectName
sb.append("# projectPath : path of count files directory\n");
sb.append("projectPath <- \"\"\n");
sb.append("# outPath path of the outputs\n");
sb.append("outPath <- \"./\"\n");
sb.append("projectName <- \"");
sb.append(experiment.getName());
sb.append("\"\n@\n\n");
sb.append(readStaticScript(TARGET_CREATION));
sb.append("\\section{Analysis}\n\n");
sb.append("<<beginAnalysis>>=\n");
// Add delete unexpressed gene call
sb.append("target$counts <- deleteUnexpressedGene(target$counts)\n");
if (isTechnicalReplicates(rRepTechGroup)) {
sb.append("target <- poolTechRep(target)\n\n");
}
sb.append("target <- sortTarget(target)\n");
sb.append("countDataSet <- normDESeq(target$counts, target$condition)\n");
sb.append("@\n");
// generate dispersion estimation part
String dispersionEstimation = readStaticScript(DISPERSION_ESTIMATION);
if (!isBiologicalReplicates(conditionsMap, rCondNames, rRepTechGroup)) {
if (!(this.dispEstMethod == DispersionMethod.BLIND)
|| !(this.dispEstSharingMode == DispersionSharingMode.FIT_ONLY)) {
throw new EoulsanException(
"There is no replicates in this experiment, you have to use "
+ "disp_est_method=blind and disp_est_sharingMode=fit-only in "
+ "diffana parameters");
}
}
dispersionEstimation =
dispersionEstimation.replace("${METHOD}", this.dispEstMethod.getName());
dispersionEstimation = dispersionEstimation.replace("${SHARINGMODE}",
this.dispEstSharingMode.getName());
dispersionEstimation = dispersionEstimation.replace("${FITTYPE}",
this.dispEstFitType.getName());
// Add dispersion estimation part to stringbuilder
sb.append(dispersionEstimation);
// Add plot dispersion
if (this.dispEstMethod.equals(DispersionMethod.PER_CONDITION)) {
List<String> passedConditionName = new ArrayList<>();
for (String cond : rCondNames) {
if (passedConditionName.indexOf(cond) == -1) {
sb.append("<<dispersionPlot_");
sb.append(cond);
sb.append(", fig=TRUE>>=\n");
sb.append("fitInfo <- fitInfo(countDataSet, name = \"");
sb.append(cond);
sb.append("\")\n");
sb.append("plotDispEsts(countDataSet, fitInfo, \"");
sb.append(cond);
sb.append("\")\n");
sb.append("@\n");
passedConditionName.add(cond);
}
}
} else {
sb.append("<<dispersionPlot, fig=TRUE>>=\n");
sb.append("fitInfo <- fitInfo(countDataSet)\n");
sb.append("plotDispEsts(countDataSet, fitInfo)\n");
sb.append("@\n");
}
String anadiffPart = "";
// check if there is a reference
if (isReference(experiment)) {
anadiffPart = readStaticScript(ANADIFF_WITH_REFERENCE);
} else {
anadiffPart = readStaticScript(ANADIFF_WITHOUT_REFERENCE);
}
anadiffPart =
anadiffPart.replace("${METHOD}", this.dispEstMethod.getName());
sb.append(anadiffPart);
// end document
sb.append("\\end{document}");
return sb.toString();
}
//
// Private methods
//
/**
* Determine if there is biological replicates in an experiment
* @param conditionsMap map of the conditions
* @param rCondNames r condition names
* @param rRepTechGroup replicate tech group
* @return a boolean
*/
private boolean isBiologicalReplicates(
final Map<String, List<Integer>> conditionsMap,
final List<String> rCondNames, final List<String> rRepTechGroup) {
for (String condition : rCondNames) {
List<Integer> condPos = conditionsMap.get(condition);
for (int i = 0; i < condPos.size() - 1; i++) {
int pos1 = condPos.get(i);
int pos2 = condPos.get(i + 1);
if (!rRepTechGroup.get(pos1).equals(rRepTechGroup.get(pos2))) {
return true;
}
}
}
return false;
}
/**
* Test if there is reference in an experiment
* @param experiment the experiment
* @return boolean isRef
*/
private boolean isReference(final Experiment experiment) {
if (experiment == null || experiment.getSamples().isEmpty()) {
return false;
}
if (DesignUtils.containsReferenceField(experiment)) {
// Check if one ore more value of the reference field enable a reference
for (ExperimentSample es : experiment.getExperimentSamples()) {
// Search the reference value is all the possible fields
final String ref = DesignUtils.getReference(es);
if (DesignUtils.referenceValueToInt(ref, null) == 1) {
return true;
}
}
return false;
}
// Get the experiment reference
final String refExp = experiment.getMetadata().getReference();
// Check if the sample condition equals to the experiement condition
if (refExp != null) {
for (ExperimentSample es : experiment.getExperimentSamples()) {
final String condition = DesignUtils.getCondition(es);
if (refExp.equals(condition)) {
return true;
}
}
}
return false;
}
/**
* Add the reference to R script if there is one
* @param experiment the experiment
* @param sb the StringBuilder to append
*/
private void writeReferenceField(final Experiment experiment,
final StringBuilder sb) throws EoulsanException {
// Get experiment reference if exists
final String refExp = experiment.getMetadata().getReference();
String refValue = null;
for (ExperimentSample es : experiment.getExperimentSamples()) {
final int ref =
DesignUtils.referenceValueToInt(DesignUtils.getReference(es), refExp);
switch (ref) {
case -1:
throw new EoulsanException(
"Reference value lower than 0 and not handled by the Diffana step (sample: "
+ es.getSample().getId() + ")");
case 0:
break;
case 1:
String newRefValue = DesignUtils.getCondition(es);
if (newRefValue != null) {
newRefValue = newRefValue.trim();
if (refValue != null && !refValue.equals(newRefValue)) {
throw new EoulsanException("Found a reference value ("
+ newRefValue + ") that is not the current reference value ("
+ refValue + ") in the Diffana step (sample: "
+ es.getSample().getId() + ")");
}
refValue = newRefValue;
}
break;
default:
throw new EoulsanException(
"Reference value greater than 1 and not handled by the Diffana step (sample: "
+ es.getSample().getId() + ")");
}
}
// Add reference to R script if found
if (refValue != null) {
sb.append("ref <- \"");
sb.append(refValue);
sb.append("\"\n\n");
}
}
//
// Constructor
//
/**
* Public constructor.
* @param executor executor to use to execute the differential analysis
* @param design The design object
* @param dispEstMethod dispersion estimation method
* @param dispEstSharingMode dispersion estimation sharing mode
* @param dispEstFitType dispersion estimation fit type
* @throws EoulsanException if an error occurs if connection to RServe server
* cannot be established
*/
public DiffAna(final RExecutor executor, final Design design,
final DispersionMethod dispEstMethod,
final DispersionSharingMode dispEstSharingMode,
final DispersionFitType dispEstFitType) throws EoulsanException {
super(executor, design);
if (dispEstMethod == null
|| dispEstFitType == null || dispEstSharingMode == null) {
throw new NullPointerException(
"dispersion estimation fit type or method or sharing mode is null");
} else {
this.dispEstMethod = dispEstMethod;
this.dispEstFitType = dispEstFitType;
this.dispEstSharingMode = dispEstSharingMode;
}
}
}