/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.mgmt.upload;
import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.Settings;
import fr.ens.biologie.genomique.eoulsan.annotations.Terminal;
import fr.ens.biologie.genomique.eoulsan.core.TaskContext;
import fr.ens.biologie.genomique.eoulsan.core.Step;
import fr.ens.biologie.genomique.eoulsan.core.TaskResult;
import fr.ens.biologie.genomique.eoulsan.core.TaskStatus;
import fr.ens.biologie.genomique.eoulsan.core.Version;
import fr.ens.biologie.genomique.eoulsan.core.workflow.AbstractWorkflow;
import fr.ens.biologie.genomique.eoulsan.core.workflow.WorkflowContext;
import fr.ens.biologie.genomique.eoulsan.core.workflow.StepOutputDataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFile;
import fr.ens.biologie.genomique.eoulsan.data.DataFormat;
import fr.ens.biologie.genomique.eoulsan.data.DataFormatRegistry;
import fr.ens.biologie.genomique.eoulsan.design.Design;
import fr.ens.biologie.genomique.eoulsan.design.DesignUtils;
import fr.ens.biologie.genomique.eoulsan.design.Sample;
import fr.ens.biologie.genomique.eoulsan.design.io.DesignWriter;
import fr.ens.biologie.genomique.eoulsan.design.io.Eoulsan1DesignWriter;
import fr.ens.biologie.genomique.eoulsan.modules.AbstractModule;
import fr.ens.biologie.genomique.eoulsan.util.FileUtils;
import fr.ens.biologie.genomique.eoulsan.util.hadoop.HadoopJarRepackager;
/**
* This class define a abstract module class for files uploading.
* @since 1.0
* @author Laurent Jourdren
*/
@Terminal
public abstract class UploadModule extends AbstractModule {
private final DataFile dest;
//
// Getter
//
protected DataFile getDest() {
return this.dest;
}
//
// Module methods
//
@Override
public TaskResult execute(final TaskContext context,
final TaskStatus status) {
final StringBuilder log = new StringBuilder();
// Save and change base pathname
final WorkflowContext fullContext =
((AbstractWorkflow) context.getWorkflow()).getWorkflowContext();
final Map<DataFile, DataFile> filesToCopy = new HashMap<>();
File repackagedJarFile = null;
try {
final Design design = context.getWorkflow().getDesign();
for (Sample sample : design.getSamples()) {
filesToCopy.putAll(findDataFilesInWorkflow(sample, context));
}
removeNotExistingDataFile(filesToCopy);
// Check if destination path already exists
if (getDest().exists()) {
throw new IOException(
"The uploading destination already exists: " + getDest());
}
// Repackage the jar file if necessary
if (!context.getRuntime().getMode().isHadoopMode()) {
repackagedJarFile = HadoopJarRepackager.repack();
final DataFile jarDataFile =
new DataFile(repackagedJarFile.getAbsolutePath());
filesToCopy.put(jarDataFile, getUploadedDataFile(jarDataFile));
}
final Settings settings = context.getRuntime().getSettings();
// Add all files to upload in a map
reWriteDesign(context, filesToCopy);
// Obfuscate design is needed
if (settings.isObfuscateDesign()) {
DesignUtils.obfuscate(design,
settings.isObfuscateDesignRemoveReplicateInfo());
}
// Create a new design file
final File newDesignFile = writeTempDesignFile(context, design);
final DataFile uploadedDesignDataFile =
getUploadedDataFile(context.getDesignFile());
filesToCopy.put(new DataFile(newDesignFile.getAbsolutePath()),
uploadedDesignDataFile);
// Add workflow file to the list of file to upload
final DataFile currentParamDataFile = context.getWorkflowFile();
final DataFile uploadedParamDataFile =
getUploadedDataFile(currentParamDataFile);
filesToCopy.put(currentParamDataFile, uploadedParamDataFile);
// Create log entry
for (Map.Entry<DataFile, DataFile> e : filesToCopy.entrySet()) {
log.append("Copy ");
log.append(e.getKey());
log.append(" to ");
log.append(e.getValue());
log.append('\n');
}
// Copy the files
copy(filesToCopy);
// Remove temporary design file
if (!newDesignFile.delete()) {
getLogger()
.warning("Cannot remove temporary design file: " + newDesignFile);
}
// Change the path of design and workflow file in the context
fullContext
.setDesignFile(new DataFile(uploadedDesignDataFile.getSource()));
fullContext
.setWorkflowFile(new DataFile(uploadedParamDataFile.getSource()));
} catch (IOException e) {
return status.createTaskResult(e);
}
// The base path is now the place where files where uploaded.
// TODO Warning, the context.setBasePathname() no more exist
// Upload step must be rewritten or replace by something better
// fullContext.setBasePathname(getDest().toString());
// The path to the jar file
if (!context.getRuntime().getMode().isHadoopMode()) {
fullContext.setJarFile(new DataFile(
getDest().toString() + "/" + repackagedJarFile.getName()));
}
status.setProgressMessage(log.toString());
return status.createTaskResult();
}
@Override
public String getName() {
return "upload";
}
@Override
public Version getVersion() {
return Globals.APP_VERSION;
}
//
// Abstract methods
//
/**
* Generate the DataFile Object for the uploaded DataFile
* @param file DataFile to upload
* @return a new DataFile object with the path to the upload DataFile
* @throws IOException if an error occurs while creating the result DataFile
*/
protected abstract DataFile getUploadedDataFile(final DataFile file)
throws IOException;
/**
* Generate the DataFile Object for the uploaded DataFile
* @param file DataFile to upload
* @param step step that create the data
* @param format the format of the file to upload
* @param portName the port name
* @param sample the sample for the source
* @return a new DataFile object with the path to the upload DataFile
* @throws IOException if an error occurs while creating the result DataFile
*/
private DataFile getUploadedDataFile(final DataFile file, final Step step,
final Sample sample, final String portName, final DataFormat format)
throws IOException {
return getUploadedDataFile(file, step, sample, portName, format, -1);
}
/**
* Generate the DataFile Object for the uploaded DataFile
* @param file DataFile to upload
* @param portName the port name
* @param format the format of the file to upload
* @param sample the sample for the source
* @param fileIndex file index for multifile data
* @return a new DataFile object with the path to the upload DataFile
* @throws IOException if an error occurs while creating the result DataFile
*/
protected abstract DataFile getUploadedDataFile(final DataFile file,
final Step step, final Sample sample, final String portName,
final DataFormat format, final int fileIndex) throws IOException;
/**
* Copy files to destinations.
* @param files map with source and destination for each file
* @throws IOException if an error occurs while copying files
*/
protected abstract void copy(Map<DataFile, DataFile> files)
throws IOException;
//
// Other methods
//
/**
* Find DataFiles used by the steps of a Workflow for a sample
* @param sample sample
* @param context Execution context
* @return a set of DataFile used by the workflow for the sample
* @throws IOException
*/
private Map<DataFile, DataFile> findDataFilesInWorkflow(final Sample sample,
final TaskContext context) throws IOException {
final Map<DataFile, DataFile> result = new HashMap<>();
Set<StepOutputDataFile> inFiles =
// context.getWorkflow().getWorkflowFilesAtFirstStep().getInputFiles();
new HashSet<>();
for (StepOutputDataFile file : inFiles) {
final DataFile in = file.getDataFile();
final DataFile out =
getUploadedDataFile(in, file.getStep(), file.getSample(),
file.getPortName(), file.getFormat(), file.getFileIndex());
result.put(in, out);
}
return result;
}
/**
* Remove the DataFiles that not exists in a set of DataFiles.
* @param files Set of DataFile to filter
*/
private void removeNotExistingDataFile(final Map<DataFile, DataFile> files) {
Set<DataFile> filesToRemove = new HashSet<>();
for (DataFile file : files.keySet()) {
if (!file.exists()) {
filesToRemove.add(file);
}
}
for (DataFile file : filesToRemove) {
files.remove(file);
}
}
private void reWriteDesign(final TaskContext context,
final Map<DataFile, DataFile> filesToCopy) throws IOException {
final DataFormatRegistry registry = DataFormatRegistry.getInstance();
final Step designStep = context.getWorkflow().getDesignStep();
final Design design = context.getWorkflow().getDesign();
final Set<String> fieldWithFiles = new HashSet<>();
boolean first = true;
for (final Sample s : design.getSamples()) {
if (first) {
for (String fieldName : s.getMetadata().keySet()) {
if (registry.getDataFormatForDesignMetadata(fieldName) != null) {
fieldWithFiles.add(fieldName);
}
}
first = false;
}
for (final String field : fieldWithFiles) {
final List<String> oldValues = s.getMetadata().getAsList(field);
final List<String> newValues = new ArrayList<>();
final int nValues = oldValues.size();
if (nValues == 1) {
final DataFile inFile = new DataFile(oldValues.get(0));
// final DataFormat format = inFile.getDataFormat();
Set<DataFormat> formats =
registry.getDataFormatsFromExtension(inFile.getExtension());
final DataFormat format;
// Not very pretty
if (formats.size() == 1) {
format = formats.iterator().next();
} else {
format = inFile.getMetaData().getDataFormat();
}
final DataFile outFile;
if (format.getMaxFilesCount() == 1) {
outFile = getUploadedDataFile(inFile, designStep, s,
format.getName(), format);
} else {
outFile = getUploadedDataFile(inFile, designStep, s,
format.getName(), format, 0);
}
filesToCopy.put(inFile, outFile);
newValues.add(outFile.toString());
} else if (nValues > 1) {
for (int i = 0; i < nValues; i++) {
final DataFile inFile = new DataFile(oldValues.get(i));
final DataFormat format = inFile.getDataFormat();
final DataFile outFile = getUploadedDataFile(inFile, designStep, s,
format.getName(), format, i);
filesToCopy.put(inFile, outFile);
newValues.add(outFile.toString());
}
}
// Replace old paths with new path in design
s.getMetadata().set(field, newValues);
}
}
}
/**
* Write temporary design file
* @param context context object
* @param design Design object
* @return the temporary design file
* @throws IOException if an error occurs while writing the design file
*/
private File writeTempDesignFile(final TaskContext context,
final Design design) throws IOException {
final File result = context.getRuntime().createTempFile("design-", ".txt");
DesignWriter writer =
new Eoulsan1DesignWriter(FileUtils.createOutputStream(result));
writer.write(design);
return result;
}
//
// Constructor
//
/**
* Public constructor.
* @param destination destination of the uploaded files
*/
public UploadModule(final DataFile destination) {
if (destination == null) {
throw new NullPointerException("The destination file is null.");
}
this.dest = destination;
}
}