/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.modules.mapping; import static fr.ens.biologie.genomique.eoulsan.CommonHadoop.HADOOP_REDUCER_TASK_COUNT_PARAMETER_NAME; import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger; import static fr.ens.biologie.genomique.eoulsan.core.OutputPortsBuilder.singleOutputPort; import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.MAPPER_RESULTS_SAM; import static fr.ens.biologie.genomique.eoulsan.data.DataFormats.READS_FASTQ; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractReadsMapperModule.HADOOP_THREADS_PARAMETER_NAME; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractReadsMapperModule.MAPPER_ARGUMENTS_PARAMETER_NAME; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractReadsMapperModule.MAPPER_FLAVOR_PARAMETER_NAME; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractReadsMapperModule.MAPPER_NAME_PARAMETER_NAME; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.AbstractReadsMapperModule.MAPPER_VERSION_PARAMETER_NAME; import java.io.IOException; import java.util.Map; import java.util.Set; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.MultiReadAlignmentsFilterBuilder; import fr.ens.biologie.genomique.eoulsan.bio.readsfilters.MultiReadFilterBuilder; import fr.ens.biologie.genomique.eoulsan.bio.readsmappers.SequenceReadsMapper; import fr.ens.biologie.genomique.eoulsan.bio.readsmappers.SequenceReadsMapperService; import fr.ens.biologie.genomique.eoulsan.core.InputPorts; import fr.ens.biologie.genomique.eoulsan.core.InputPortsBuilder; import fr.ens.biologie.genomique.eoulsan.core.Modules; import fr.ens.biologie.genomique.eoulsan.core.OutputPorts; import fr.ens.biologie.genomique.eoulsan.core.Parameter; import fr.ens.biologie.genomique.eoulsan.core.StepConfigurationContext; import fr.ens.biologie.genomique.eoulsan.core.Version; import fr.ens.biologie.genomique.eoulsan.modules.AbstractModule; /** * This class define an abstract module for read filtering, mapping and * alignments filtering. * @since 1.0 * @author Laurent Jourdren */ public abstract class AbstractFilterAndMapReadsModule extends AbstractModule { public static final String MODULE_NAME = "filterandmap"; private static final String COUNTER_GROUP = "filter_map_reads"; protected static final String READS_PORT_NAME = "reads"; protected static final String MAPPER_INDEX_PORT_NAME = "mapperindex"; protected static final String GENOME_DESCRIPTION_PORT_NAME = "genomedescription"; protected static final int HADOOP_TIMEOUT = AbstractReadsMapperModule.HADOOP_TIMEOUT; private boolean pairedEnd; private Map<String, String> readsFiltersParameters; private Map<String, String> alignmentsFiltersParameters; private SequenceReadsMapper mapper; private String mapperVersion = ""; private String mapperFlavor = ""; private String mapperArguments; private int reducerTaskCount = -1; private int hadoopThreads = -1; private final int mappingQualityThreshold = -1; private int hadoopMapperRequiredMemory = AbstractReadsMapperModule.DEFAULT_MAPPER_REQUIRED_MEMORY; // // Getters // /** * Get the counter group to use for this module. * @return the counter group of this module */ protected String getCounterGroup() { return COUNTER_GROUP; } /** * Test if the step works in pair end mode. * @return true if the pair end mode is enable */ protected boolean isPairedEnd() { return this.pairedEnd; } /** * Get the name of the mapper to use. * @return Returns the mapperName */ protected String getMapperName() { return this.mapper.getMapperName(); } /** * Get the version of the mapper to use. * @return the version of the mapper to use */ protected String getMapperVersion() { return this.mapperVersion; } /** * Get the flavor of the mapper to use. * @return the flavor of the mapper to use */ protected String getMapperFlavor() { return this.mapperVersion; } /** * Get the name of the mapper to use. * @return Returns the mapperName */ protected String getMapperArguments() { return this.mapperArguments; } /** * Get the name of the mapper to use. * @return Returns the mapperName */ protected int getMapperHadoopThreads() { return this.hadoopThreads; } /** * Get the amount in MB of memory required to execute the mapper. This value * is required by Hadoop scheduler and if the mapper require more memory than * declared the mapper process will be killed. * @return the amount of memory required by the mapper in MB */ protected int getMapperHadoopMemoryRequired() { return this.hadoopMapperRequiredMemory; } /** * Get the mapper. * @return the mapper object */ protected SequenceReadsMapper getMapper() { return this.mapper; } /** * Get the mapping quality threshold. * @return the quality mapping threshold */ protected int getMappingQualityThreshold() { return this.mappingQualityThreshold; } /** * Get the reducer task count. * @return the reducer task count */ protected int getReducerTaskCount() { return this.reducerTaskCount; } /** * Get the parameters of the read filters. * @return a map with all the parameters of the filters */ protected Map<String, String> getReadFilterParameters() { return this.readsFiltersParameters; } /** * Get the parameters of the read alignments filters. * @return a map with all the parameters of the filters */ protected Map<String, String> getAlignmentsFilterParameters() { return this.alignmentsFiltersParameters; } // // Module methods // @Override public String getName() { return MODULE_NAME; } @Override public Version getVersion() { return Globals.APP_VERSION; } @Override public InputPorts getInputPorts() { final InputPortsBuilder builder = new InputPortsBuilder(); builder.addPort(READS_PORT_NAME, READS_FASTQ); builder.addPort(MAPPER_INDEX_PORT_NAME, this.mapper.getArchiveFormat()); return builder.create(); } @Override public OutputPorts getOutputPorts() { return singleOutputPort(MAPPER_RESULTS_SAM); } @Override public String getDescription() { return "This step filters, map reads and filter alignment results."; } @Override public void configure(final StepConfigurationContext context, final Set<Parameter> stepParameters) throws EoulsanException { String mapperName = null; final MultiReadFilterBuilder readFilterBuilder = new MultiReadFilterBuilder(); final MultiReadAlignmentsFilterBuilder alignmentsFilterBuilder = new MultiReadAlignmentsFilterBuilder(); for (Parameter p : stepParameters) { // Check if the parameter is deprecated AbstractReadsFilterModule.checkDeprecatedParameter(context, p); AbstractReadsMapperModule.checkDeprecatedParameter(context, p); AbstractSAMFilterModule.checkDeprecatedParameter(context, p); switch (p.getName()) { case MAPPER_NAME_PARAMETER_NAME: mapperName = p.getStringValue(); break; case MAPPER_VERSION_PARAMETER_NAME: mapperVersion = p.getStringValue(); break; case MAPPER_FLAVOR_PARAMETER_NAME: mapperFlavor = p.getStringValue(); break; case MAPPER_ARGUMENTS_PARAMETER_NAME: this.mapperArguments = p.getStringValue(); break; case HADOOP_THREADS_PARAMETER_NAME: this.hadoopThreads = p.getIntValueGreaterOrEqualsTo(1); break; case HADOOP_REDUCER_TASK_COUNT_PARAMETER_NAME: this.reducerTaskCount = p.getIntValueGreaterOrEqualsTo(1); break; default: // Add read filters parameters if (!(readFilterBuilder.addParameter(p.getName(), p.getStringValue(), true) || // Add read alignments filters parameters alignmentsFilterBuilder.addParameter(p.getName(), p.getStringValue(), true))) { Modules.unknownParameter(context, p); } } } // Force parameter checking readFilterBuilder.getReadFilter(); alignmentsFilterBuilder.getAlignmentsFilter(); this.readsFiltersParameters = readFilterBuilder.getParameters(); this.alignmentsFiltersParameters = alignmentsFilterBuilder.getParameters(); if (mapperName == null) { Modules.invalidConfiguration(context, "No mapper set"); } this.mapper = SequenceReadsMapperService.getInstance().newService(mapperName); if (this.mapper == null) { Modules.invalidConfiguration(context, "Unknown mapper: " + mapperName); } if (this.mapper.isIndexGeneratorOnly()) { Modules.invalidConfiguration(context, "The selected mapper can only be used for index generation: " + mapperName); } // Check if the binary for the mapper is available try { this.mapper.setMapperVersionToUse(this.mapperVersion); this.mapper.setMapperFlavorToUse(this.mapperFlavor); this.mapper.prepareBinaries(); } catch (IOException e) { throw new EoulsanException(e); } final int requiredMemory = context.getCurrentStep().getRequiredMemory(); if (requiredMemory > 0) { this.hadoopMapperRequiredMemory = requiredMemory; } // Log Step parameters getLogger().info("In " + getName() + ", mapper=" + this.mapper.getMapperName() + " (version: " + this.mapper.getMapperVersion() + ")"); getLogger() .info("In " + getName() + ", mapperarguments=" + this.mapperArguments); } }