// Copyright (C) 2011-2012 CRS4. // // This file is part of Seal. // // Seal is free software: you can redistribute it and/or modify it // under the terms of the GNU General Public License as published by the Free // Software Foundation, either version 3 of the License, or (at your option) // any later version. // // Seal is distributed in the hope that it will be useful, but // WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY // or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License // for more details. // // You should have received a copy of the GNU General Public License along // with Seal. If not, see <http://www.gnu.org/licenses/>. package it.crs4.seal.recab; import it.crs4.seal.common.IMRContext; import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.io.Text; public class RecabTableReducer { public static final String CONF_SMOOTHING = "seal.recab.smoothing"; public static final float CONF_SMOOTHING_DEFAULT = 0.0f; public static final String CONF_MAX_QSCORE = "seal.recab.max-qscore"; public static final int CONF_MAX_QSCORE_DEFAULT = 40; public static final double PERROR_EPS = 0.0001; protected double smoothing = CONF_SMOOTHING_DEFAULT; protected int maxQscore = CONF_MAX_QSCORE_DEFAULT; protected ObservationCount sum = new ObservationCount(); protected StringBuilder sbuilder = new StringBuilder(100); protected Text outputValue = new Text(); public void setup(Configuration conf) { smoothing = conf.getFloat(CONF_SMOOTHING, CONF_SMOOTHING_DEFAULT); if (smoothing < 0.0) throw new IllegalArgumentException(CONF_SMOOTHING + " can't be less than 0"); maxQscore = conf.getInt(CONF_MAX_QSCORE, CONF_MAX_QSCORE_DEFAULT); if (maxQscore <= 0) throw new IllegalArgumentException(CONF_MAX_QSCORE + " must be greater than 0"); } public void reduce(Text key, Iterable<ObservationCount> values, IMRContext<Text, Text> context) throws IOException, InterruptedException { sum.set(0,0); sbuilder.delete(0, sbuilder.length()); for (ObservationCount counts: values) sum.addToThis(counts); if (sum.getObservations() > 0) { outputValue.set(key); sbuilder. append(sum.getObservations()).append(RecabTable.TableDelim). append(sum.getMismatches()).append(RecabTable.TableDelim). append(empiricalQuality(sum)); String tmp = sbuilder.toString(); outputValue.append(tmp.getBytes(RecabTable.ASCII), 0, tmp.length()); context.write(null, outputValue); } } protected int empiricalQuality(ObservationCount observations) { double perror = (observations.getMismatches() + smoothing) / (observations.getObservations() + smoothing) + PERROR_EPS; int score = (int)Math.rint(-10.0*Math.log10(perror)); // bound score. GATK bounds quality between 1 and MAX_REASONABLE_Q_SCORE if (score > maxQscore) score = maxQscore; else if (score <= 0) score = 1; return score; } }