/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules;
import static fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder.DEFAULT_SINGLE_INPUT_PORT_NAME;
import static fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder.singleInputPort;
import static fr.ens.biologie.genomique.eoulsan.core.OutputPortsBuilder.DEFAULT_SINGLE_OUTPUT_PORT_NAME;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import fr.ens.biologie.genomique.eoulsan.EoulsanException;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.annotations.LocalOnly;
import fr.ens.biologie.genomique.eoulsan.annotations.NoLog;
import fr.ens.biologie.genomique.eoulsan.annotations.NoOutputDirectory;
import fr.ens.biologie.genomique.eoulsan.annotations.ReuseModuleInstance;
import fr.ens.biologie.genomique.eoulsan.core.DataUtils;
import fr.ens.biologie.genomique.eoulsan.core.FileNaming;
import fr.ens.biologie.genomique.eoulsan.core.InputPorts;
import fr.ens.biologie.genomique.eoulsan.core.Modules;
import fr.ens.biologie.genomique.eoulsan.core.OutputPorts;
import fr.ens.biologie.genomique.eoulsan.core.OutputPortsBuilder;
import fr.ens.biologie.genomique.eoulsan.core.Parameter;
import fr.ens.biologie.genomique.eoulsan.core.StepConfigurationContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskContext;
import fr.ens.biologie.genomique.eoulsan.core.TaskResult;
import fr.ens.biologie.genomique.eoulsan.core.TaskStatus;
import fr.ens.biologie.genomique.eoulsan.core.Version;
import fr.ens.biologie.genomique.eoulsan.data.Data;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFiles;
import fr.ens.biologie.genomique.eoulsan.data.DataFormat;
import fr.ens.biologie.genomique.eoulsan.data.DataFormatRegistry;
import fr.ens.biologie.genomique.eoulsan.data.protocols.DataProtocol;
import fr.ens.biologie.genomique.eoulsan.data.protocols.StorageDataProtocol;
import fr.ens.biologie.genomique.eoulsan.io.CompressionType;
/**
* Copy input files of a format in another location or in different compression
* format.
* @author Laurent Jourdren
* @since 2.0
*/
@LocalOnly
@ReuseModuleInstance
@NoLog
@NoOutputDirectory
public class CopyInputDataModule extends AbstractModule {
public static final String MODULE_NAME = "_copyinputformat";
public static final String FORMAT_PARAMETER = "format";
public static final String OUTPUT_COMPRESSION_PARAMETER =
"output.compression";
public static final String OUTPUT_COMPRESSIONS_ALLOWED_PARAMETER =
"output.compressions.allowed";
private DataFormat format;
private CompressionType outputCompression;
private EnumSet<CompressionType> outputCompressionsAllowed =
EnumSet.allOf(CompressionType.class);
@Override
public String getName() {
return MODULE_NAME;
}
@Override
public Version getVersion() {
return Globals.APP_VERSION;
}
@Override
public InputPorts getInputPorts() {
return singleInputPort(this.format);
}
@Override
public OutputPorts getOutputPorts() {
return new OutputPortsBuilder().addPort(DEFAULT_SINGLE_OUTPUT_PORT_NAME,
this.format, this.outputCompression).create();
}
@Override
public void configure(final StepConfigurationContext context,
final Set<Parameter> stepParameters) throws EoulsanException {
for (Parameter p : stepParameters) {
switch (p.getName()) {
case FORMAT_PARAMETER:
this.format = DataFormatRegistry.getInstance()
.getDataFormatFromName(p.getValue());
break;
case OUTPUT_COMPRESSION_PARAMETER:
this.outputCompression = CompressionType.valueOf(p.getValue());
break;
case OUTPUT_COMPRESSIONS_ALLOWED_PARAMETER:
this.outputCompressionsAllowed =
decodeAllowedCompressionsParameterValue(p.getValue());
break;
default:
Modules.unknownParameter(context, p);
}
}
if (this.format == null) {
Modules.invalidConfiguration(context, "No format set");
}
if (this.outputCompression == null) {
Modules.invalidConfiguration(context, "No output compression set");
}
if (this.outputCompressionsAllowed.isEmpty()) {
throw new EoulsanException(OUTPUT_COMPRESSIONS_ALLOWED_PARAMETER
+ " parameter value cannot be empty");
}
}
@Override
public TaskResult execute(final TaskContext context,
final TaskStatus status) {
try {
final Data inData = context.getInputData(DEFAULT_SINGLE_INPUT_PORT_NAME);
final Data outData =
context.getOutputData(DEFAULT_SINGLE_OUTPUT_PORT_NAME, inData);
copyData(inData, outData, context);
status.setProgress(1.0);
} catch (IOException e) {
return status.createTaskResult(e);
}
return status.createTaskResult();
}
//
// Other methods
//
/**
* Check input and output files.
* @param inFile input file
* @param outFile output file
* @throws IOException if copy cannot be started
*/
private static void checkFiles(final DataFile inFile, final DataFile outFile)
throws IOException {
if (inFile.equals(outFile)) {
throw new IOException("Cannot copy file on itself: " + inFile);
}
if (!inFile.exists()) {
throw new FileNotFoundException("Input file not found: " + inFile);
}
if (outFile.exists()) {
throw new IOException("Output file already exists: " + outFile);
}
}
/**
* Get the real underlying file if the file protocol is a StorageDataProtocol
* instance.
* @param file the file
* @return the underlying file if exists or the file itself
*/
private DataFile getRealDataFile(final DataFile file) {
try {
final DataProtocol protocol = file.getProtocol();
// Get the underlying file if the file protocol is a storage protocol
if (protocol instanceof StorageDataProtocol) {
return ((StorageDataProtocol) protocol).getUnderLyingData(file);
}
return file;
} catch (IOException e) {
return file;
}
}
/**
* Copy files for a format and a samples.
* @param inData input data
* @param outData output data
* @param context task context
* @throws IOException if an error occurs while copying
*/
private void copyData(final Data inData, final Data outData,
final TaskContext context) throws IOException {
if (inData.getFormat().getMaxFilesCount() == 1) {
//
// Handle standard case
//
// Copy the file
final DataFile outputFile = copyFile(inData.getDataFile(), -1,
outData.getName(), outData.getPart(), context);
// Set the file in the data object
DataUtils.setDataFile(outData, outputFile);
} else {
//
// Handle multi file format like FASTQ files
//
// Get the count of input files
final int count = inData.getDataFileCount();
// The list of output files
final List<DataFile> dataFiles = new ArrayList<>();
for (int i = 0; i < count; i++) {
// Copy the file
final DataFile outputFile = copyFile(inData.getDataFile(i), i,
outData.getName(), outData.getPart(), context);
dataFiles.add(outputFile);
}
// Set the files in the data object
DataUtils.setDataFiles(outData, dataFiles);
}
}
/**
* Copy an input file to its destination.
* @param inputFile the input file
* @param fileIndex the output file index
* @param outDataName the output data name
* @param outDataPart the output part
* @param context the step context
* @return the output file
* @throws IOException if an error occurs while copying the data
*/
private DataFile copyFile(final DataFile inputFile, final int fileIndex,
final String outDataName, final int outDataPart,
final TaskContext context) throws IOException {
final String stepId = context.getCurrentStep().getId();
final DataFile outputDir = context.getStepOutputDirectory();
// Get the real input file
final DataFile in = getRealDataFile(inputFile);
// Define the compression of the output
final CompressionType compression = getOutputCompressionType(in);
// Define the output filename
final String outFilename =
FileNaming.filename(stepId, DEFAULT_SINGLE_OUTPUT_PORT_NAME,
this.format, outDataName, fileIndex, outDataPart, compression);
// Define the output file
final DataFile out = new DataFile(outputDir, outFilename);
// Check input and output files
checkFiles(in, out);
// Copy file
DataFiles.symlinkOrCopy(in, out, true);
return out;
}
/**
* Get the compression type to use for the output file.
* @param inputFile the input file
* @return the compression type to use for the output file
*/
private CompressionType getOutputCompressionType(final DataFile inputFile) {
final CompressionType inCompression = inputFile.getCompressionType();
if (this.outputCompressionsAllowed.contains(inCompression)) {
return inCompression;
}
if (this.outputCompressionsAllowed.contains(CompressionType.NONE)) {
return CompressionType.NONE;
}
// Get the first allowed compression
return this.outputCompressionsAllowed.iterator().next();
}
/**
* Method to encode an EnumSet of the allowed compressions parameter in a
* string.
* @param outputCompressionAllowed the EnumSet to encode
* @return a string with the EnumSet encoded
*/
public static final String encodeAllowedCompressionsParameterValue(
final EnumSet<CompressionType> outputCompressionAllowed) {
if (outputCompressionAllowed == null) {
return null;
}
return Joiner.on('\t').join(outputCompressionAllowed);
}
/**
* Method to decode the allowed compressions parameter.
* @param value the parameter value as a string
* @return the parameter value as an EnumSet
* @throws EoulsanException if the value parameter is null
*/
private static EnumSet<CompressionType> decodeAllowedCompressionsParameterValue(
final String value) throws EoulsanException {
if (value == null) {
throw new EoulsanException(
OUTPUT_COMPRESSIONS_ALLOWED_PARAMETER + " parameter cannot be null");
}
final Set<CompressionType> result = new HashSet<>();
for (String s : Splitter.on('\t').omitEmptyStrings().trimResults()
.split(value)) {
final CompressionType compression = CompressionType.valueOf(s);
if (compression != null) {
result.add(compression);
}
}
return EnumSet.copyOf(result);
}
}