/* * Eoulsan development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public License version 2.1 or * later and CeCILL-C. This should be distributed with the code. * If you do not have a copy, see: * * http://www.gnu.org/licenses/lgpl-2.1.txt * http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt * * Copyright for this code is held jointly by the Genomic platform * of the Institut de Biologie de l'École normale supérieure and * the individual authors. These should be listed in @author doc * comments. * * For more information on the Eoulsan project and its aims, * or to join the Eoulsan Google group, visit the home page * at: * * http://outils.genomique.biologie.ens.fr/eoulsan * */ package fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop; import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.ALIGNMENTS_REJECTED_BY_FILTERS_COUNTER; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.OUTPUT_FILTERED_ALIGNMENTS_COUNTER; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.HadoopMappingUtils.jobConfToParameters; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.SAMHeaderHadoopUtils.createSAMSequenceDictionaryFromSAMHeader; import static fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop.SAMHeaderHadoopUtils.loadSAMHeaders; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.List; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; import com.google.common.base.Joiner; import fr.ens.biologie.genomique.eoulsan.EoulsanException; import fr.ens.biologie.genomique.eoulsan.EoulsanLogger; import fr.ens.biologie.genomique.eoulsan.EoulsanRuntime; import fr.ens.biologie.genomique.eoulsan.Globals; import fr.ens.biologie.genomique.eoulsan.HadoopEoulsanRuntime; import fr.ens.biologie.genomique.eoulsan.bio.SAMComparator; import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.MultiReadAlignmentsFilter; import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.MultiReadAlignmentsFilterBuilder; import fr.ens.biologie.genomique.eoulsan.bio.alignmentsfilters.ReadAlignmentsFilterBuffer; import fr.ens.biologie.genomique.eoulsan.util.hadoop.HadoopReporterIncrementer; import htsjdk.samtools.SAMFileHeader; import htsjdk.samtools.SAMLineParser; import htsjdk.samtools.SAMRecord; /** * This class define a reducer for alignments filtering. * @since 1.0 * @author Laurent Jourdren */ public class SAMFilterReducer extends Reducer<Text, Text, Text, Text> { static final String GENOME_DESC_PATH_KEY = Globals.PARAMETER_PREFIX + ".samfilter.genome.desc.file"; static final String MAP_FILTER_PARAMETER_KEY_PREFIX = Globals.PARAMETER_PREFIX + ".filter.alignments.parameter."; private final SAMLineParser parser = new SAMLineParser(new SAMFileHeader()); private String counterGroup; private MultiReadAlignmentsFilter filter; private final Text outKey = new Text(); private final Text outValue = new Text(); private final List<SAMRecord> records = new ArrayList<>(); @Override protected void setup(final Context context) throws IOException, InterruptedException { EoulsanLogger.initConsoleHandler(); getLogger().info("Start of setup()"); // Get configuration object final Configuration conf = context.getConfiguration(); // Initialize Eoulsan DataProtocols if (!EoulsanRuntime.isRuntime()) { HadoopEoulsanRuntime.newEoulsanRuntime(conf); } // Counter group this.counterGroup = conf.get(Globals.PARAMETER_PREFIX + ".counter.group"); if (this.counterGroup == null) { throw new IOException("No counter group defined"); } // Set the filters try { final MultiReadAlignmentsFilterBuilder mrafb = new MultiReadAlignmentsFilterBuilder(); // Add the parameters from the job configuration to the builder mrafb.addParameters( jobConfToParameters(conf, MAP_FILTER_PARAMETER_KEY_PREFIX)); this.filter = mrafb.getAlignmentsFilter( new HadoopReporterIncrementer(context), this.counterGroup); getLogger().info("Read alignments filters to apply: " + Joiner.on(", ").join(this.filter.getFilterNames())); } catch (EoulsanException e) { throw new IOException(e); } // Write SAM header final List<String> samHeader = loadSAMHeaders(context); this.outKey.set(""); for (String line : samHeader) { outValue.set(line); context.write(this.outKey, this.outValue); } // Set the sequences sizes in the parser this.parser.getFileHeader().setSequenceDictionary( createSAMSequenceDictionaryFromSAMHeader(samHeader)); getLogger().info("End of setup()"); } /** * 'key': identifier of the aligned read, without the integer indicating the * pair member if data are in paired-end mode. 'value': alignments without the * identifier part of the SAM line. */ @Override protected void reduce(final Text key, final Iterable<Text> values, final Context context) throws IOException, InterruptedException { // Creation of a buffer object to store alignments with the same read name final ReadAlignmentsFilterBuffer rafb = new ReadAlignmentsFilterBuffer(this.filter); int cptRecords = 0; String strRecord = null; this.records.clear(); for (Text val : values) { cptRecords++; strRecord = key.toString() + val.toString(); rafb.addAlignment(this.parser.parseLine(strRecord)); } this.records.addAll(rafb.getFilteredAlignments()); context .getCounter(this.counterGroup, ALIGNMENTS_REJECTED_BY_FILTERS_COUNTER.counterName()) .increment(cptRecords - this.records.size()); // sort alignments of the current read Collections.sort(this.records, new SAMComparator()); // Writing records for (SAMRecord r : this.records) { strRecord = r.getSAMString().replaceAll("\n", ""); // Set output key final int indexOfFirstTab = strRecord.indexOf("\t"); this.outKey.set(strRecord.substring(0, indexOfFirstTab)); // Set output value this.outValue.set(strRecord); // Write the entry context.write(this.outKey, this.outValue); // Increment the counter context.getCounter(this.counterGroup, OUTPUT_FILTERED_ALIGNMENTS_COUNTER.counterName()).increment(1); } } }