/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.modules.mgmt.upload; import java.io.IOException; import java.util.HashMap; import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import fr.ens.biologie.genomique.eoulsan.annotations.HadoopOnly; import fr.ens.biologie.genomique.eoulsan.core.Step; import fr.ens.biologie.genomique.eoulsan.core.workflow.StepOutputDataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFormat; import fr.ens.biologie.genomique.eoulsan.data.DataFormatConverter; import fr.ens.biologie.genomique.eoulsan.data.protocols.StorageDataProtocol; import fr.ens.biologie.genomique.eoulsan.design.Sample; import fr.ens.biologie.genomique.eoulsan.io.CompressionType; import fr.ens.biologie.genomique.eoulsan.util.hadoop.PathUtils; /** * This class define a module for Hadoop file uploading. * @since 1.0 * @author Laurent Jourdren */ @HadoopOnly public class HadoopUploadModule extends UploadModule { private final Configuration conf; @Override protected DataFile getUploadedDataFile(final DataFile file) throws IOException { return new DataFile(getDest(), file.getName()); } @Override protected DataFile getUploadedDataFile(final DataFile file, final Step step, final Sample sample, final String portName, final DataFormat format, final int fileIndex) throws IOException { final String filename; if (sample == null || portName == null) { if (file == null) { throw new IOException("Input file is null."); } filename = file.getName(); } else { filename = StepOutputDataFile.newStandardFilename(step, portName, format, sample, fileIndex, CompressionType.NONE); } return new DataFile(getDest(), filename); } @Override protected void copy(final Map<DataFile, DataFile> files) throws IOException { if (files == null) { throw new NullPointerException("The files argument is null."); } // Process to local copies for (Map.Entry<DataFile, DataFile> e : new HashMap<>(files).entrySet()) { final DataFile src = e.getKey(); final DataFile dest = e.getValue(); if (src == null || dest == null) { continue; } // Test if the file exists if (!src.exists()) { throw new IOException("The file does not exists: " + src); } // If the file is local file to a local copy/conversion if (src.toFile() != null) { // Process to copy now new DataFormatConverter(new DataFile(src.toFile()), dest).convert(); // Remove the file from the list of files to copy files.remove(src); } else // If the file comes from a storage if (src.getProtocol() instanceof StorageDataProtocol) { final DataFile newSrc = ((StorageDataProtocol) src.getProtocol()).getUnderLyingData(src); // Update the map of files to copy if (src != null) { files.remove(src); files.put(newSrc, dest); } } } // Process to distributed copies if (files.size() > 0) { final Path jobPath = PathUtils.createTempPath( new Path(getDest().getSource()), "distcp-", "", this.conf); new DataFileDistCp(this.conf, jobPath).copy(files); } } // // Constructor // /** * Public constructor. * @param dest destination of the files to upload * @param conf Hadoop configuration */ public HadoopUploadModule(final DataFile dest, final Configuration conf) { super(dest); if (conf == null) { throw new NullPointerException("The configuration object is null"); } this.conf = conf; } }