package org.seqcode.deepseq.events; import java.util.Collections; import java.util.Comparator; import java.util.List; import org.seqcode.deepseq.experiments.ControlledExperiment; import org.seqcode.deepseq.experiments.ExperimentCondition; import org.seqcode.deepseq.experiments.ExperimentManager; import cern.jet.random.Binomial; import cern.jet.random.ChiSquare; import cern.jet.random.Poisson; import cern.jet.random.engine.DRand; /** * Test the significance of count enrichment vs control * @author Shaun Mahony * @version %I%, %G% */ public class EnrichmentSignificance { protected EventsConfig config; protected ExperimentManager manager; protected BindingManager bindingManager; protected double minFoldChange; protected double genomeLength; protected Binomial binomial; protected Poisson poisson; protected ChiSquare chisquare; public EnrichmentSignificance(EventsConfig con, ExperimentManager exptman, BindingManager bman, double minFoldChange, double genomeLength){ this.config = con; this.manager = exptman; this.bindingManager = bman; this.minFoldChange = minFoldChange; this.genomeLength = genomeLength; binomial = new Binomial(100, .5, new DRand()); poisson = new Poisson(1, new DRand()); chisquare = new ChiSquare(1, new DRand()); } /** * Evaluate the significance of a set of EnrichedFeatures using Binomial and Poisson tests * Assumes that the counts in the features are not already scaled * @param modelRange */ public void execute() {this.execute(-1);} public void execute(int modelRange){ //Calculate relative replicate weights using signal vs noise fractions in each signal channel double[] repWeights = new double[manager.getReplicates().size()]; for(ExperimentCondition c : manager.getConditions()){ double totalSig =0; for(ControlledExperiment r : c.getReplicates()) totalSig += r.getSignalVsNoiseFraction() * r.getSignal().getHitCount(); for(ControlledExperiment r : c.getReplicates()) repWeights[r.getIndex()]=(r.getSignalVsNoiseFraction() * r.getSignal().getHitCount())/totalSig; } //Compute fold difference, log-likelihood, and p-values. //To be clear about what each variable is measuring: // sigCtrlFold: weighted average of per-replicate fold difference between signal reads assigned to event and scaled control reads assigned to event. // sigCtrlP: Outcome of binomial test between sum of per-replicate signal read counts at event and sum of scaled per-replicate control counts at event. If multiple replicates use the same control, the counts are used redundantly since this is equivalent to summing the scaling factors across replicates. // LL: log-likelihood loss if component was eliminated from model. // LLp: Chi-square distributed p-value corresponding to LL. for (BindingEvent cf: bindingManager.getBindingEvents()){ for(ExperimentCondition c1 : manager.getConditions()){ double c1Sig = cf.getCondSigHitsFromReps(c1); double ctrlCountScaled = cf.getCondCtrlHitsScaledFromReps(c1); //Weighted fold difference, signal vs control double sigCtrlFold = 0; for(ControlledExperiment r : c1.getReplicates()){ double repFold = cf.getRepCtrlHits(r)>1 ? cf.getRepSigHits(r)/(cf.getRepCtrlHits(r)*r.getControlScaling()) : cf.getRepSigHits(r); sigCtrlFold += repFold * repWeights[r.getIndex()]; } //P-value, signal vs control double sigCtrlP = evaluateSignificance(c1Sig, ctrlCountScaled, cf.getCondTotalSigHitsFromReps(c1), modelRange==-1 ? bindingManager.getMaxInfluenceRange(c1):modelRange); cf.setCondSigVCtrlFold(c1, sigCtrlFold); cf.setCondSigVCtrlP(c1, sigCtrlP); //Log-likelihood p-value if(config.CALC_EVENTS_LL){ cf.setLLp(c1, chisquare.cdf(cf.getLLd(c1))); System.out.println(String.format("%s\t%s\t%.0f\t%e",cf.getPoint().getLocationString(),c1.getName(),cf.getLLd(c1),cf.getLLp(c1))); } } } // calculate q-values, correction for multiple testing benjaminiHochbergCorrection(bindingManager.getBindingEvents()); }//end of evaluateConfidence method /** * Evaluate the significance using Binomial and Poisson distributions */ private double evaluateSignificance(double countA, double countB, double total, int modelWidth) { double pValuePoisson, pValueBalance; if(countA+countB<=0){ return(1); }else{ try{ binomial.setNandP((int)Math.ceil(countA + countB), 1.0 / (minFoldChange + 1)); pValueBalance = binomial.cdf((int)Math.ceil(countB)); poisson.setMean(minFoldChange * Math.max(countB, total * (double)modelWidth / (double)genomeLength )); int cA = (int)Math.ceil(countA); pValuePoisson = 1 - poisson.cdf(cA) + poisson.pdf(cA); } catch(Exception err){ err.printStackTrace(); throw new RuntimeException(err.toString(), err); } return(Math.max(pValueBalance, pValuePoisson)); } } /** * Multiple hypothesis testing correction */ private void benjaminiHochbergCorrection(List<BindingEvent> features){ double total = features.size(); //Signal-vs-Control corrections by condition for(ExperimentCondition c : manager.getConditions()){ BindingEvent.setSortingCond(c); Collections.sort(features, new Comparator<BindingEvent>(){ public int compare(BindingEvent o1, BindingEvent o2) {return o1.compareBySigCtrlPvalue(o2);} }); double rank =1.0; for(BindingEvent cf : features){ cf.setCondSigVCtrlP(c, Math.min(1.0, cf.getCondSigVCtrlP(c)*(total/rank))); rank++; } } //LL p-value corrections by condition if(config.CALC_EVENTS_LL){ for(ExperimentCondition c : manager.getConditions()){ BindingEvent.setSortingCond(c); Collections.sort(features, new Comparator<BindingEvent>(){ public int compare(BindingEvent o1, BindingEvent o2) {return o1.compareByLLPvalue(o2);} }); double rank =1.0; for(BindingEvent cf : features){ cf.setCondSigVCtrlP(c, Math.min(1.0, cf.getCondSigVCtrlP(c)*(total/rank))); rank++; } } } //Finally, sort on the first condition BindingEvent.setSortingCond(manager.getConditions().get(0)); Collections.sort(features, new Comparator<BindingEvent>(){ public int compare(BindingEvent o1, BindingEvent o2) {return o1.compareBySigCtrlPvalue(o2);} }); }//end of benjaminiHochbergCorrection method }