/*
* Eoulsan development code
*
* This code may be freely distributed and modified under the
* terms of the GNU Lesser General Public License version 2.1 or
* later and CeCILL-C. This should be distributed with the code.
* If you do not have a copy, see:
*
* http://www.gnu.org/licenses/lgpl-2.1.txt
* http://www.cecill.info/licences/Licence_CeCILL-C_V1-en.txt
*
* Copyright for this code is held jointly by the Genomic platform
* of the Institut de Biologie de l'École normale supérieure and
* the individual authors. These should be listed in @author doc
* comments.
*
* For more information on the Eoulsan project and its aims,
* or to join the Eoulsan Google group, visit the home page
* at:
*
* http://outils.genomique.biologie.ens.fr/eoulsan
*
*/
package fr.ens.biologie.genomique.eoulsan.modules.mapping.hadoop;
import static fr.ens.biologie.genomique.eoulsan.modules.mapping.MappingCounters.INVALID_INPUT_PRETREATMENT_READS_COUNTER;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import fr.ens.biologie.genomique.eoulsan.Globals;
import fr.ens.biologie.genomique.eoulsan.bio.ReadSequence;
/**
* This class define a reducer for the pretreatment of paired-end data before
* the reads filtering step.
* @since 1.2
* @author Claire Wallon
*/
public class PreTreatmentReducer extends Reducer<Text, Text, Text, Text> {
private String counterGroup;
private ReadSequence read1 = null, read2 = null;
private String completeId1, completeId2;
private Text outValue, outKey;
@Override
protected void setup(final Context context)
throws IOException, InterruptedException {
final Configuration conf = context.getConfiguration();
// Counter group
this.counterGroup = conf.get(Globals.PARAMETER_PREFIX + ".counter.group");
if (this.counterGroup == null) {
throw new IOException("No counter group defined");
}
}
/**
* 'key': the identifier of the read without the integer indicating the member
* of the pair. 'values': the rest of the paired TFQ lines (the member '1' and
* then the member '2').
*/
@Override
protected void reduce(final Text key, final Iterable<Text> values,
final Context context) throws IOException, InterruptedException {
String[] fields;
String stringVal;
for (Text val : values) {
stringVal = val.toString();
if (stringVal.charAt(0) != '1' && stringVal.charAt(0) != '2') {
context
.getCounter(this.counterGroup,
INVALID_INPUT_PRETREATMENT_READS_COUNTER.counterName())
.increment(1);
return;
}
fields = stringVal.split("\t");
// Illumina technology and Casava 1.8 format for the '@' line
if (stringVal.charAt(1) == ':') {
if (stringVal.charAt(0) == '1') {
this.read1 = new ReadSequence();
this.read1.setSequence(fields[1]);
this.read1.setQuality(fields[2]);
this.completeId1 = key.toString() + " " + fields[0];
} else {
this.read2 = new ReadSequence();
this.read2.setSequence(fields[1]);
this.read2.setQuality(fields[2]);
this.completeId2 = key.toString() + " " + fields[0];
}
}
// Before Casava 1.8 or technology other than Illumina
else {
if (stringVal.charAt(0) == '1') {
this.read1 = new ReadSequence();
this.read1.setSequence(fields[1]);
this.read1.setQuality(fields[2]);
this.completeId1 = key.toString() + fields[0];
} else {
this.read2 = new ReadSequence();
this.read2.setSequence(fields[1]);
this.read2.setQuality(fields[2]);
this.completeId2 = key.toString() + fields[0];
}
}
}
if (this.read1 == null || this.read2 == null) {
context
.getCounter(this.counterGroup,
INVALID_INPUT_PRETREATMENT_READS_COUNTER.counterName())
.increment(1);
return;
}
this.outKey = new Text(this.completeId1);
// Write results
this.outValue = new Text(this.read1.getSequence()
+ "\t" + this.read1.getQuality() + "\t" + this.completeId2 + "\t"
+ this.read2.getSequence() + "\t" + this.read2.getQuality());
context.write(this.outKey, this.outValue);
}
}