/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop;
import static fr.ens.biologie.genomique.eoulsan.EoulsanLogger.getLogger;
import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.INPUT_ALIGNMENTS_COUNTER;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import com.google.common.base.Splitter;
import fr.ens.biologie.genomique.eoulsan.CommonHadoop;
import fr.ens.biologie.genomique.eoulsan.EoulsanLogger;
import fr.ens.biologie.genomique.eoulsan.EoulsanRuntime;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.HadoopEoulsanRuntime;
/**
* This class defines a mapper for alignment filtering.
* @since 1.0
* @author Laurent Jourdren
*/
public class SAMFilterMapper extends Mapper<Text, Text, Text, Text> {
// Parameters keys
static final String MAPPING_QUALITY_THRESOLD_KEY =
Globals.PARAMETER_PREFIX + ".samfilter.mapping.quality.threshold";
private static final Splitter ID_SPLITTER = Splitter.on(':').trimResults();
private final List<String> idFields = new ArrayList<>();
private String counterGroup;
private SAMHeaderHadoopUtils.SAMHeaderWriter samHeaderWriter;
private final Text outKey = new Text();
private final Text outValue = new Text();
@Override
protected void setup(final Context context)
throws IOException, InterruptedException {
EoulsanLogger.initConsoleHandler();
getLogger().info("Start of setup()");
// Get configuration object
final Configuration conf = context.getConfiguration();
// Initialize Eoulsan DataProtocols
if (!EoulsanRuntime.isRuntime()) {
HadoopEoulsanRuntime.newEoulsanRuntime(conf);
}
// Counter group
this.counterGroup = conf.get(CommonHadoop.COUNTER_GROUP_KEY);
if (this.counterGroup == null) {
throw new IOException("No counter group defined");
}
// SAM header writer
this.samHeaderWriter = new SAMHeaderHadoopUtils.SAMHeaderWriter(
context.getTaskAttemptID().toString());
getLogger().info("End of setup()");
}
/**
* 'key': offset of the beginning of the line from the beginning of the SAM
* file if data are in single-end mode or in TSAM file if data are in
* paired-end mode. 'value': the SAM or TSAM line.
*/
@Override
protected void map(final Text key, final Text value, final Context context)
throws IOException, InterruptedException {
final String line = value.toString();
// Avoid empty and header lines
if (this.samHeaderWriter.writeIfHeaderLine(context, line)) {
return;
}
context
.getCounter(this.counterGroup, INPUT_ALIGNMENTS_COUNTER.counterName())
.increment(1);
final int indexOfFirstTab = line.indexOf("\t");
String completeId = line.substring(0, indexOfFirstTab);
int endReadId;
this.idFields.clear();
for (String e : ID_SPLITTER.split(completeId)) {
this.idFields.add(e);
}
// Read identifier format : before Casava 1.8 or other technologies that
// Illumina
if (this.idFields.size() < 7) {
endReadId = completeId.indexOf('/');
// single-end mode
if (endReadId == -1) {
this.outKey.set(completeId);
this.outValue.set(line.substring(indexOfFirstTab));
}
// paired-end mode
else {
this.outKey.set(line.substring(0, endReadId + 1));
this.outValue.set(line.substring(endReadId + 1));
}
}
// Read identifier format : Illumina - Casava 1.8
else {
endReadId = completeId.indexOf(' ');
// mapped read
if (endReadId == -1) {
this.outKey.set(completeId);
this.outValue.set(line.substring(indexOfFirstTab));
}
// unmapped read
else {
this.outKey.set(line.substring(0, endReadId));
this.outValue.set(line.substring(endReadId));
}
}
context.write(this.outKey, this.outValue);
}
@Override
protected void cleanup(final Context context)
throws IOException, InterruptedException {
// Write SAM header if there is no SAM entries
this.samHeaderWriter.close(context);
}
}