package org.seqcode.projects.seed; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.seqcode.deepseq.StrandedBaseCount; import org.seqcode.deepseq.experiments.ExperimentCondition; import org.seqcode.deepseq.experiments.ExperimentManager; import org.seqcode.deepseq.experiments.ExptConfig; import org.seqcode.deepseq.experiments.Sample; import org.seqcode.genome.GenomeConfig; import org.seqcode.genome.location.Point; import org.seqcode.genome.location.Region; import org.seqcode.projects.seed.SEEDConfig.PeakFindingMethod; import org.seqcode.projects.seed.features.EnrichedFeature; import org.seqcode.projects.seed.features.EnrichedPeakFeature; import org.seqcode.projects.seed.features.Feature; public class PeakFinder extends DomainFinder { public static String version = "0.1"; public PeakFinder(GenomeConfig gcon, ExptConfig econ, SEEDConfig scon, ExperimentManager man) { super(gcon, econ, scon, man); } /** * Return the class name */ public String getProgramName(){ return "org.seqcode.projects.seed.PeakFinder"; } /** * Return a thread for this implementation */ public FeatureDetectionThread getMyThread(List<Region> regs){ return new PeakFinderThread(regs); } /** * @param args */ public static void main(String[] args) { System.setProperty("java.awt.headless", "true"); System.err.println("PeakFinder version "+PeakFinder.version+"\n\n"); GenomeConfig gcon = new GenomeConfig(args); ExptConfig econ = new ExptConfig(gcon.getGenome(), args); SEEDConfig scon = new SEEDConfig(gcon, args); if(scon.helpWanted()){ //System.out.println(PeakFinder.getDomainFinderArgs()); System.err.println(gcon.getArgsList()+ econ.getArgsList()+ scon.getArgsList()); }else{ ExperimentManager man = new ExperimentManager(econ); PeakFinder finder = new PeakFinder(gcon, econ, scon, man); System.err.println("\nBeginning peak finding..."); finder.execute(); man.close(); } } /** * Return a help string * @return */ public static String getPeakFinderArgs() { //TODO return("TODO"); } /** * Multiple-hypothesis correction. Print the output files. * Assumes that all peaks have been found. * @return : final features */ public Map<ExperimentCondition, List<Feature>> postProcess() { System.err.println("\nPeak finding complete."); for(ExperimentCondition cond : manager.getConditions()){ stats.benjaminiHochbergCorrection(features.get(cond)); } Map<ExperimentCondition, List<Feature>> signifFeatures = this.filter(features, sconfig.perBinBinomialPThres, true); //All domains this.printEventsFile(features, ".all.peaks"); //Filtered by q-value this.printEventsFile(signifFeatures, ".p"+sconfig.perBinBinomialPThres+".peaks"); //Summarize for(ExperimentCondition cond : manager.getConditions()) System.err.println(cond.getName()+"\t"+features.get(cond).size()+" peaks\t"+signifFeatures.get(cond).size()+" peaks below threshold."); return features; } /** * PeakFinderThread: thread that searches for domains * @author mahony * */ public class PeakFinderThread extends DomainFinderThread { public PeakFinderThread(List<Region> regs) { super(regs); } /** * findFeatures: find potentially enriched domains on the genome by comparing tag counts to a background model, * and find the peaks of enrichment within these domains using a choice of methods. * * @param subRegion : region to run analysis on * @return Map of Lists of Features in the subRegion, Indexed by ExperimentCondition */ public Map<ExperimentCondition, List<Feature>> findFeatures(Region subRegion) { //Get the enriched domains using the functionality from the superclass (DomainFinderThread) //and the over-ridden processDomain methods below Map<ExperimentCondition, List<Feature>> peaks = super.findFeatures(subRegion); return peaks; } /** * ProcessDomains: * - Trims feature coordinates back to agree with overlapping hits. * - Counts hits in each feature, per sample * - Finds the peak position in the domain according to a choice of methods. * * Specifically, the procedure characterizes enriched domains using the functionality in the superclass (DomainFinder), * and then finds peaks using either: * - Point of maximum density * - Point of balance between positive and negative strand densities * - Point of maximum likelihood, given an empirical read distribution (stranded) * * @param currFeatures * @param current region * @return : Lists of EnrichedFeatures, indexed by condition */ protected Map<ExperimentCondition, List<EnrichedFeature>> processDomains(Map<ExperimentCondition,List<EnrichedFeature>> currFeatures, Region currSubRegion){ Map<ExperimentCondition, List<EnrichedFeature>> peakFeatures = new HashMap<ExperimentCondition, List<EnrichedFeature>>(); for(ExperimentCondition cond : manager.getConditions()) peakFeatures.put(cond, new ArrayList<EnrichedFeature>()); for(ExperimentCondition currCondition : manager.getConditions()){ for(EnrichedFeature currDomain : currFeatures.get(currCondition)){ Map<Sample, List<StrandedBaseCount>> fHitsPos = overlappingHits(hitsPos, currDomain); Map<Sample, List<StrandedBaseCount>> fHitsNeg = overlappingHits(hitsNeg, currDomain); //Trim the coordinates trimFeature(currDomain, fHitsPos, fHitsNeg, currCondition); //Quantify the feature in each Sample and in the condition in which it was found quantifyFeature(currDomain, fHitsPos, fHitsNeg, currCondition); //Find the peaks EnrichedPeakFeature peak; if(sconfig.getPeakFindingApproach() == PeakFindingMethod.LRBALANCE) peak = findPeakLRBalance(fHitsPos, fHitsNeg, (EnrichedFeature)currDomain, currCondition); //else if(sconfig.getPeakFindingApproach() == PeakFindingMethod.TAGDISTRIBUTION && sconfig.getEventModel()!=null) // peak = findPeakWithBindingModel(fHitsPos, fHitsNeg, currDomain, currCondition); else if(sconfig.getPeakFindingApproach() == PeakFindingMethod.MAXDENSITY) peak = findPeakMaxHit(fHitsPos, fHitsNeg, (EnrichedFeature)currDomain, currCondition); else{ System.err.println("PeakFindingMethod "+sconfig.getPeakFindingApproach()+" not defined. Using max density."); peak = findPeakMaxHit(fHitsPos, fHitsNeg, (EnrichedFeature)currDomain, currCondition); } if(peak!=null) peakFeatures.get(currCondition).add(peak); } } return(peakFeatures); } /** * Find the peak locations based on maximum overlapping read counts. * * @return : Feature (EnrichedPeakFeature) */ protected EnrichedPeakFeature findPeakMaxHit(Map<Sample, List<StrandedBaseCount>> fHitsPos, Map<Sample, List<StrandedBaseCount>> fHitsNeg, EnrichedFeature domain, ExperimentCondition currCondition){ float [] sum = new float[domain.getCoords().getWidth()+1]; for(int s=0; s<=domain.getCoords().getWidth(); s++){sum[s]=0; } for(Sample s : currCondition.getSignalSamples()){ if(!strandedEventDetection || domain.getCoords().getStrand()=='+') for(StrandedBaseCount h : fHitsPos.get(s)){ int start = getLeft(h)-domain.getCoords().getStart(); int stop= getRight(h)-domain.getCoords().getStart(); for(int i=start; i<=stop; i++) if(i>=0 && i<sum.length) sum[i]+=h.getCount(); } if(!strandedEventDetection || domain.getCoords().getStrand()=='-') for(StrandedBaseCount h : fHitsNeg.get(s)){ int start = getLeft(h)-domain.getCoords().getStart(); int stop= getRight(h)-domain.getCoords().getStart(); for(int i=start; i<=stop; i++) if(i>=0 && i<sum.length) sum[i]+=h.getCount(); } } float max = 0; int maxPos = -1; for(int s=0; s<sum.length; s++){ if(sum[s]>max){ max= sum[s]; maxPos=s; } } Point p = new Point(gen, domain.getCoords().getChrom(), maxPos+domain.getCoords().getStart()); EnrichedPeakFeature epf=null; try { epf = new EnrichedPeakFeature(domain.getCoords(), p, domain.getSampleCountsPos(), domain.getSampleCountsNeg(), domain.getSignalCount(), domain.getControlCount(), domain.getScore(), max); } catch (Exception e) {} return(epf); } /** * Find the peak locations based on left-right balance of forward/reverse reads. * Motivated by SISSRS. * * @param hits * @param coords * @return */ protected EnrichedPeakFeature findPeakLRBalance(Map<Sample, List<StrandedBaseCount>> fHitsPos, Map<Sample, List<StrandedBaseCount>> fHitsNeg, EnrichedFeature domain, ExperimentCondition currCondition){ float [] forward = new float [domain.getCoords().getWidth()+1]; float [] reverse = new float [domain.getCoords().getWidth()+1]; float [] density = new float[domain.getCoords().getWidth()+1]; for(int s=0; s<=domain.getCoords().getWidth(); s++){forward[s]=0; reverse[s]=0;} if(strandedEventDetection){ System.err.println("Don't use this method for single-stranded event detection!"); System.exit(1); } for(Sample s : currCondition.getSignalSamples()){ for(StrandedBaseCount h : fHitsPos.get(s)){ int fivePrime = getShifted5Prime(h)-domain.getCoords().getStart(); if(fivePrime>=0 && fivePrime<forward.length) forward[fivePrime]+=h.getCount(); int start = getLeft(h)-domain.getCoords().getStart(); int stop= getRight(h)-domain.getCoords().getStart(); for(int i=start; i<stop; i++) if(i>=0 && i<density.length) density[i]+=h.getCount(); } for(StrandedBaseCount h : fHitsNeg.get(s)){ int fivePrime = getShifted5Prime(h)-domain.getCoords().getStart(); if(fivePrime>=0 && fivePrime<reverse.length) reverse[fivePrime]+=h.getCount(); int start = getLeft(h)-domain.getCoords().getStart(); int stop= getRight(h)-domain.getCoords().getStart(); for(int i=start; i<stop; i++) if(i>=0 && i<density.length) density[i]+=h.getCount(); } } int minBal = Integer.MAX_VALUE, minPos = -1; for(int s=0; s<=domain.getCoords().getWidth(); s++){ int left=0, right=0; for(int l=0; l<=s; l++){left+=forward[l];} for(int r=domain.getCoords().getWidth(); r>s; r--){right+=reverse[r];} if(Math.abs(left-right)<minBal){ minBal =Math.abs(left-right); minPos = s; } } Point p = new Point(gen, domain.getCoords().getChrom(), minPos+domain.getCoords().getStart()); EnrichedPeakFeature epf=null; try { epf = new EnrichedPeakFeature(domain.getCoords(), p, domain.getSampleCountsPos(), domain.getSampleCountsNeg(), domain.getSignalCount(), domain.getControlCount(), domain.getScore(), density[minPos]); } catch (Exception e) {} return(epf); } /* Find exact peak using a BindingModel */ /* protected EnrichedFeature findPeakWithBindingModel(Map<Sample, List<StrandedBaseCount>> fHitsPos, Map<Sample, List<StrandedBaseCount>> fHitsNeg, EnrichedFeature domain, ExperimentCondition currCondition, BindingModel model){ float [] p = new float[domain.getCoords().getWidth()+1]; for(int s=0; s<=domain.getCoords().getWidth(); s++){p[s]=0; } int maxPos=0; float maxScore=0; //Populate the probability landscape - need to correct for strandedness of events here. for(Sample s : currCondition.getSignalSamples()){ for(StrandedBaseCount h : fHitsPos.get(s)){ int fivePrime = getShifted5Prime(h)-domain.getCoords().getStart(); if(fivePrime>=0 && fivePrime<p.length){ for(int i=Math.max(model.getMin()+fivePrime, 0); i<=Math.min(domain.getCoords().getWidth(), fivePrime+model.getMax()); i++) p[i]+=model.probability(i-fivePrime); } } for(StrandedBaseCount h : fHitsNeg.get(s)){ int fivePrime = getShifted5Prime(h)-domain.getCoords().getStart(); if(fivePrime>=0 && fivePrime<p.length){ for(int i=Math.max(fivePrime-model.getMax(), 0); i<=Math.min(domain.getCoords().getWidth(), fivePrime-model.getMin()); i++) p[i]+=model.probability(fivePrime-i); } } } for(int k=0; k<=domain.getCoords().getWidth(); k++) if(p[k]>maxScore){maxScore=p[k]; maxPos=k;} Point pt = new Point(gen, domain.getCoords().getChrom(), domain.getCoords().getStart()+maxPos); EnrichedPeakFeature epf=null; try { epf = new EnrichedPeakFeature(domain.getCoords(), pt, domain.getSampleCountsPos(), domain.getSampleCountsNeg(), domain.getSignalCount(), domain.getControlCount(), domain.getScore(), maxScore); } catch (Exception e) {} return(epf); } */ } }