package org.seqcode.math.diff; import java.util.ArrayList; import java.util.Collections; /** * MedianRatiosNormalization: a Normalization class that implements the median of ratios normalization * as proposed by Anders & Huber, Genome Biology, 2010 * @author Shaun Mahony * @version %I%, %G% */ public class MedianRatiosNormalization extends Normalization{ int ref=0; /** * Constructor: provide the number of samples, the fraction of extreme M values to trim, and the fraction of extreme A values to trim * @param numSamples * @param Mtrim * @param Atrim */ public MedianRatiosNormalization(int numSamples) { super(numSamples); } @Override public double[] normalize(CountsDataset data) { //Set one sample as the reference (the deepest sequenced sample in the focal condition) double maxTotal=0; for(int s=0; s<data.numSamples; s++) if(data.design[s] == data.focalCondition) if(data.totals[s]>maxTotal){ ref=s; maxTotal=data.totals[s]; } //Make a pseudo-reference double [] pseudo = new double[data.numUnits]; double frac = 1/(double)data.numSamples; for(int d=0; d<data.numUnits; d++){ double prod = 1; for(int s=0; s<data.numSamples; s++) prod*=data.getCount(d,s); if(prod==0) pseudo[d]=-1; else pseudo[d] = Math.pow(prod, frac); } //Arrays of ratios against the pseudo reference for(int s=0; s<data.numSamples; s++){ ArrayList<Double> ratios = new ArrayList<Double>(); for(int d=0; d<data.numUnits; d++) if(pseudo[d]>0) ratios.add(data.getCount(d,s)/pseudo[d]); //Scaling factor is the median ratio Collections.sort(ratios); depthScaling[s]= (data.totals[s] / data.totals[ref]); scalingFactors[s] = ratios.get(ratios.size()/2); propScaling[s] = scalingFactors[s]/depthScaling[s]; } data.setScalingFactors(scalingFactors); return scalingFactors; } @Override public String selfDescriptor() { return "Median ratios normalization as proposed by Anders & Huber, Genome Biology, 2010 (DESeq)."; } }