/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.modules; import static fr.ens.biologie.genomique.eoulsan.io.CompressionType.NONE; import static fr.ens.biologie.genomique.eoulsan.io.CompressionType.getCompressionTypeByContentEncoding; import java.io.IOException; import java.util.Collections; import java.util.Comparator; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Set; import com.google.common.collect.ArrayListMultimap; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.annotations.LocalOnly; import fr.ens.biologie.genomique.eoulsan.annotations.ReuseModuleInstance; import fr.ens.biologie.genomique.eoulsan.core.DataUtils; import fr.ens.biologie.genomique.eoulsan.core.InputPorts; import fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder; import fr.ens.biologie.genomique.eoulsan.core.Modules; import fr.ens.biologie.genomique.eoulsan.core.Naming; import fr.ens.biologie.genomique.eoulsan.core.OutputPorts; import fr.ens.biologie.genomique.eoulsan.core.OutputPortsBuilder; import fr.ens.biologie.genomique.eoulsan.core.Parameter; import fr.ens.biologie.genomique.eoulsan.core.StepConfigurationContext; import fr.ens.biologie.genomique.eoulsan.core.TaskContext; import fr.ens.biologie.genomique.eoulsan.core.TaskResult; import fr.ens.biologie.genomique.eoulsan.core.TaskStatus; import fr.ens.biologie.genomique.eoulsan.core.Version; import fr.ens.biologie.genomique.eoulsan.data.Data; import fr.ens.biologie.genomique.eoulsan.data.DataFile; import fr.ens.biologie.genomique.eoulsan.data.DataFormat; import fr.ens.biologie.genomique.eoulsan.data.DataFormatRegistry; import fr.ens.biologie.genomique.eoulsan.io.CompressionType; import fr.ens.biologie.genomique.eoulsan.splitermergers.Merger; /** * This class define a generic merger module. * @author Laurent Jourdren * @since 2.0 */ @LocalOnly @ReuseModuleInstance public class MergerModule extends AbstractModule { public static final String MODULE_NAME = "merger"; private Merger merger; private CompressionType compression = NONE; // // Inner class // /** * This inner class allow to create iterator needed by SplitterMerger.merge() * method. */ private final class MergerIterator { private final ListMultimap<String, Data> map = ArrayListMultimap.create(); private int maxFileIndex = 1; public Set<String> getDataNames() { return this.map.keySet(); } public List<Data> getListData(final String dataName) { return this.map.get(dataName); } public int getMaxFileIndex() { return this.maxFileIndex; } public Iterator<DataFile> getIterator(final String dataName) throws EoulsanException { return getIterator(dataName, -1); } public Iterator<DataFile> getIterator(final String dataName, final int fileIndex) throws EoulsanException { final List<Data> list = Lists.newArrayList(this.map.get(dataName)); // Sort Data by their part number Collections.sort(list, new Comparator<Data>() { @Override public int compare(final Data a, final Data b) { return Integer.compare(a.getPart(), b.getPart()); } }); // Check if two data has the same part number if (checkForPartDuplicates()) { final Set<Integer> partNumbers = new HashSet<>(); for (Data data : list) { if (partNumbers.contains(data.getPart())) { throw new EoulsanException( "Found two or more data with the same part: " + data.getName()); } partNumbers.add(data.getPart()); } } final Iterator<Data> it = list.iterator(); // Create the iterator itself return new Iterator<DataFile>() { @Override public boolean hasNext() { return it.hasNext(); } @Override public DataFile next() { if (fileIndex == -1) { return it.next().getDataFile(); } else { return it.next().getDataFile(fileIndex); } } @Override public void remove() { throw new UnsupportedOperationException(); } }; } /** * Check that two keys cannot produce the same data name. * @throws EoulsanException if two keys share the same data name */ private void checkKeys() throws EoulsanException { final Set<String> validNames = new HashSet<>(); for (String name : getDataNames()) { final String validName = Naming.toValidName(name); if (validNames.contains(validName)) { throw new EoulsanException( "Two merger keys share the same data name (" + name + " -> " + validName + ")"); } validNames.add(validName); } } /** * Constructor. * @param data the data * @throws EoulsanException if two keys share the same data name */ public MergerIterator(final Data data) throws EoulsanException { for (Data d : data.getListElements()) { final String key = getMapKey(d); if (key != null) { this.map.put(key, d); if (d.getDataFileCount() > this.maxFileIndex) { this.maxFileIndex = d.getDataFileCount(); } } } // Check keys checkKeys(); } } // // Protected methods // /** * Define the key to use for replicate merging. * @param data data to merge * @return the merging key */ protected String getMapKey(final Data data) { return data.getName(); } protected boolean checkForPartDuplicates() { return true; } // // Module methods // @Override public String getName() { return MODULE_NAME; } @Override public Version getVersion() { return Globals.APP_VERSION; } @Override public InputPorts getInputPorts() { return new InputPortsBuilder() .addPort("input", true, this.merger.getFormat()).create(); } @Override public OutputPorts getOutputPorts() { return new OutputPortsBuilder() .addPort("output", true, this.merger.getFormat(), this.compression) .create(); } @Override public void configure(final StepConfigurationContext context, final Set<Parameter> stepParameters) throws EoulsanException { final Set<Parameter> mergerParameters = new HashSet<>(); for (Parameter p : stepParameters) { switch (p.getName()) { case "format": // Get format final DataFormat format = DataFormatRegistry.getInstance() .getDataFormatFromNameOrAlias(p.getValue()); // Check if the format exists if (format == null) { Modules.badParameterValue(context, p, "Unknown format: " + p.getValue()); } // Check if a merger exists for the format if (!format.isMerger()) { Modules.badParameterValue(context, p, "No splitter exists for format: " + format.getName()); } // Set the merger this.merger = format.getMerger(); break; case "compression": this.compression = getCompressionTypeByContentEncoding(p.getValue()); break; default: mergerParameters.add(p); break; } } // Check if a format has been set if (this.merger == null) { Modules.invalidConfiguration(context, "No format set for merge"); } // Configure the merger this.merger.configure(mergerParameters); } @Override public TaskResult execute(final TaskContext context, final TaskStatus status) { final DataFormat format = this.merger.getFormat(); // Get input and output data final Data inListData = context.getInputData(format); final Data outListData = context.getOutputData(format, inListData); try { final MergerIterator it = new MergerIterator(inListData); for (String dataName : it.getDataNames()) { final Data outData = outListData.addDataToList(Naming.toValidName(dataName)); // Set metadata for output data DataUtils.setDataMetadata(outData, it.getListData(dataName)); // If Mono-file format if (format.getMaxFilesCount() == 1) { // Get output file final DataFile outFile = outData.getDataFile(); // Launch merger this.merger.merge(it.getIterator(dataName), outFile); } else { // For each file of the multi-file format for (int fileIndex = 0; fileIndex < it .getMaxFileIndex(); fileIndex++) { // Get output file final DataFile outFile = outData.getDataFile(fileIndex); // Launch splitting this.merger.merge(it.getIterator(dataName, fileIndex), outFile); } } } // Successful result return status.createTaskResult(); } catch (IOException e) { // Fail of the task return status.createTaskResult(e); } catch (EoulsanException e) { // Fail of the task return status.createTaskResult(e); } } }