/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.diffana;
import static fr.ens.biologie.genomique.eoulsan.core.ParallelizationMode.OWN_PARALLELIZATION;
import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.ADDITIONAL_ANNOTATION_TSV;
import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.ANNOTATED_EXPRESSION_RESULTS_ODS;
import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.ANNOTATED_EXPRESSION_RESULTS_TSV;
import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.ANNOTATED_EXPRESSION_RESULTS_XLSX;
import static fr.ens.biologie.genomique.eoulsan.translators.TranslatorUtils.loadTranslator;
import java.io.File;
import java.io.IOException;
import java.nio.file.FileSystems;
import java.nio.file.PathMatcher;
import java.util.*;
import com.google.common.base.Splitter;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.annotations.LocalOnly;
import fr.ens.biologie.genomique.eoulsan.annotations.RequiresAllPreviousSteps;
import fr.ens.biologie.genomique.eoulsan.core.InputPorts;
import fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder;
import fr.ens.biologie.genomique.eoulsan.core.Modules;
import fr.ens.biologie.genomique.eoulsan.core.OutputPorts;
import fr.ens.biologie.genomique.eoulsan.core.OutputPortsBuilder;
import fr.ens.biologie.genomique.eoulsan.core.ParallelizationMode;
import fr.ens.biologie.genomique.eoulsan.core.Parameter;
import fr.ens.biologie.genomique.eoulsan.core.StepConfigurationContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskResult;
import fr.ens.biologie.genomique.eoulsan.core.TaskStatus;
import fr.ens.biologie.genomique.eoulsan.core.Version;
import fr.ens.biologie.genomique.eoulsan.data.Data;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormat;
import fr.ens.biologie.genomique.eoulsan.modules.AbstractModule;
import fr.ens.biologie.genomique.eoulsan.translators.Translator;
import fr.ens.biologie.genomique.eoulsan.translators.TranslatorUtils;
import fr.ens.biologie.genomique.eoulsan.translators.io.ODSTranslatorOutputFormat;
import fr.ens.biologie.genomique.eoulsan.translators.io.TSVTranslatorOutputFormat;
import fr.ens.biologie.genomique.eoulsan.translators.io.TranslatorOutputFormat;
import fr.ens.biologie.genomique.eoulsan.translators.io.XLSXTranslatorOutputFormat;
import fr.ens.biologie.genomique.eoulsan.util.StringUtils;
/**
* This class define a module that create annotated expression files in TSV, ODS
* or XLSX format.
* @since 2.0
* @author Laurent Jourdren
*/
@LocalOnly
@RequiresAllPreviousSteps
public class DiffanaResultsAnnotationModule extends AbstractModule {
public static final String MODULE_NAME = "diffanaresultsannotation";
private static final DataFormat DEFAULT_FORMAT =
ANNOTATED_EXPRESSION_RESULTS_TSV;
private static final String DEFAULT_FILE_INPUT_GLOB_PATTERN =
"{diffana_*.tsv,deseq2_*.tsv}";
private final Map<String, DataFormat> outputFormats = new HashMap<>();
private PathMatcher pathMatcher;
private String outputPrefix;
private boolean useAdditionalAnnotationFile = true;
//
// Module methods
//
@Override
public String getName() {
return MODULE_NAME;
}
@Override
public String getDescription() {
return "This module add annotation to diffana files.";
}
@Override
public Version getVersion() {
return Globals.APP_VERSION;
}
@Override
public InputPorts getInputPorts() {
// Add the port for the additional annotation
if (this.useAdditionalAnnotationFile) {
return InputPortsBuilder.singleInputPort(ADDITIONAL_ANNOTATION_TSV);
}
return InputPortsBuilder.noInputPort();
}
@Override
public OutputPorts getOutputPorts() {
return OutputPortsBuilder.noOutputPort();
}
@Override
public ParallelizationMode getParallelizationMode() {
final Collection<DataFormat> formats = this.outputFormats.values();
// XLSX and ODS file creation require lot of memory so multithreading is
// disable to avoid out of memory
if (formats.contains(ANNOTATED_EXPRESSION_RESULTS_ODS)
|| formats.contains(ANNOTATED_EXPRESSION_RESULTS_XLSX)) {
return OWN_PARALLELIZATION;
}
// TSV creation can be multithreaded
return ParallelizationMode.STANDARD;
}
@Override
public void configure(final StepConfigurationContext context,
final Set<Parameter> stepParameters) throws EoulsanException {
String pattern = DEFAULT_FILE_INPUT_GLOB_PATTERN;
this.outputPrefix = context.getCurrentStep().getId() + '_';
for (final Parameter p : stepParameters) {
switch (p.getName()) {
case "annotationfile":
Modules.removedParameter(context, p);
break;
case "use.additional.annotation.file":
this.useAdditionalAnnotationFile = p.getBooleanValue();
break;
case "outputformat":
Modules.renamedParameter(context, p, "output.format");
case "output.format":
// Set output format
for (String format : Splitter.on(',').trimResults().omitEmptyStrings()
.split(p.getValue())) {
switch (format) {
case "tsv":
this.outputFormats.put(format, ANNOTATED_EXPRESSION_RESULTS_TSV);
break;
case "ods":
this.outputFormats.put(format, ANNOTATED_EXPRESSION_RESULTS_ODS);
break;
case "xlsx":
this.outputFormats.put(format, ANNOTATED_EXPRESSION_RESULTS_XLSX);
break;
default:
throw new EoulsanException("Unknown output format: " + format);
}
}
break;
case "files":
pattern = p.getStringValue();
break;
case "output.prefix":
this.outputPrefix = p.getStringValue();
break;
default:
// Unknown option
Modules.unknownParameter(context, p);
break;
}
}
// Set the default format
if (this.outputFormats.isEmpty()) {
this.outputFormats.put(DEFAULT_FORMAT.getDefaultExtension().substring(1),
DEFAULT_FORMAT);
}
// Set the PathMatcher
this.pathMatcher =
FileSystems.getDefault().getPathMatcher("glob:" + pattern);
}
@Override
public TaskResult execute(final TaskContext context,
final TaskStatus status) {
// Get hypertext links file
final DataFile linksFile =
TranslatorUtils.getLinksFileFromSettings(context.getSettings());
// Load translator
final Translator translator;
try {
if (this.useAdditionalAnnotationFile) {
// If no annotation file parameter set
Data additionalAnnotationData =
context.getInputData(ADDITIONAL_ANNOTATION_TSV);
// Create translator with additional annotation file
translator =
loadTranslator(additionalAnnotationData.getDataFile(), linksFile);
} else {
// Create translator without additional annotation file
translator = TranslatorUtils.loadTranslator(linksFile);
}
} catch (IOException e) {
return status.createTaskResult(e);
}
// Description string
final StringBuilder descriptionString = new StringBuilder();
try {
final DataFile outputDir = context.getStepOutputDirectory();
final List<DataFile> files = new ArrayList<>();
final List<DataFile> filesToConvert = new ArrayList<>();
context.getLogger().info("Search files in directory: " + context.getOutputDirectory());
context.getLogger().info("Output directory: " + outputDir);
// Handle step output directory
for (DataFile f : context.getOutputDirectory().list()) {
if (!f.getMetaData().isDir()) {
files.add(f);
} else if(f.getName().endsWith(Globals.STEP_OUTPUT_DIRECTORY_SUFFIX)) {
files.addAll(f.list());
}
}
// Filter files to convert
for (DataFile f : files) {
if (this.pathMatcher.matches(new File(f.getName()).toPath())) {
filesToConvert.add(f);
}
}
Set<String> processedFilenames = new HashSet<>();
// Annotate all selected files
for (DataFile inFile : filesToConvert) {
// Do not process 2 times the same file
if (processedFilenames.contains(inFile.getName())) {
continue;
} else {
processedFilenames.add(inFile.getName());
}
// For each formats
for (Map.Entry<String, DataFormat> e : this.outputFormats.entrySet()) {
// Get format
final DataFormat format = e.getValue();
final String prefix = this.outputPrefix
+ StringUtils.filenameWithoutExtension(inFile.getName());
final TranslatorOutputFormat of;
final DataFile outFile;
if (format == ANNOTATED_EXPRESSION_RESULTS_XLSX) {
// XLSX output
outFile = new DataFile(outputDir, prefix
+ ANNOTATED_EXPRESSION_RESULTS_XLSX.getDefaultExtension());
checkIfFileExists(outFile, context);
of = new XLSXTranslatorOutputFormat(outFile.create());
} else if (format == ANNOTATED_EXPRESSION_RESULTS_ODS) {
// ODS output
outFile = new DataFile(outputDir, prefix
+ ANNOTATED_EXPRESSION_RESULTS_ODS.getDefaultExtension());
checkIfFileExists(outFile, context);
of = new ODSTranslatorOutputFormat(outFile.create());
} else {
// TSV output
outFile = new DataFile(outputDir, prefix
+ ANNOTATED_EXPRESSION_RESULTS_TSV.getDefaultExtension());
checkIfFileExists(outFile, context);
of = new TSVTranslatorOutputFormat(outFile.create());
}
TranslatorUtils.addTranslatorFields(inFile.open(), 0, translator, of);
descriptionString.append("Convert ");
descriptionString.append(inFile);
descriptionString.append(" to ");
descriptionString.append(outFile);
descriptionString.append("\n");
}
}
} catch (IOException e) {
return status.createTaskResult(e);
}
// Set the description of the context
status.setDescription(descriptionString.toString());
// Return the result
return status.createTaskResult();
}
/**
* Check if the output file already exists.
* @param file the output file
* @param context the step context
* @throws IOException if the the output file already exists
*/
private static void checkIfFileExists(final DataFile file,
final TaskContext context) throws IOException {
if (file.exists()) {
throw new IOException("Output file of the \""
+ context.getCurrentStep().getId() + "\" already exists: " + file);
}
}
}