package org.seqcode.projects.seed; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Iterator; import org.seqcode.deepseq.StrandedBaseCount; import org.seqcode.deepseq.experiments.ExperimentCondition; import org.seqcode.deepseq.experiments.ExperimentManager; import org.seqcode.deepseq.experiments.ExptConfig; import org.seqcode.deepseq.experiments.Sample; import org.seqcode.genome.GenomeConfig; import org.seqcode.genome.location.Region; import org.seqcode.genome.location.StrandedPoint; import org.seqcode.projects.seed.features.EnrichedFeature; import org.seqcode.projects.seed.features.Feature; import org.seqcode.projects.seed.features.SuperEnrichedFeature; /** * * @author akshaykakumanu * */ public class SuperEnhancerFinder extends DomainFinder{ // pseudocount to calculate fold-change while finding inflectioin point for super enhancers public final int PSEUDOCOUINT = 1; // At the moment only one TSS is alowed to overlap identified super enahncers public final int NO_OVERLAPPING_TSS_ALLOWED = 1; // P-value threshold for selcting enriched features to be stitched public final double PER_ENRICHED_FEATURE_THRES = 0.001; // Minimum length of an enriched feature to be considered for stitching to get a super enhancer public final int MIN_ENRICHED_FEATURE_LENGHT = 100; //Srep size to calcluate slope public final int SLOPE_CALCULATING_STEP_SIZE = 40; //Min no of data points to change at inflection point public final double GRADIENT_STRICT = 1.0; public final double GRADIENT_RELAX = 0.9; // Min distance from known TSSs for a feature to be called distal protected int minDistalDistance; // Super enhancer stitch distance protected int superStitchWin; // Reference TSSs loaded from strandedpoints peaks file protected List<StrandedPoint> refTSSs; public static String version = "0.1"; /** * Return the class name */ public String getProgramName(){ return "org.seqcode.projects.seed.SuperEnhancerFinder"; } /** * Return a thread for this implementation */ public FeatureDetectionThread getMyThread(List<Region> regs){ return new SuperEnhancerFinderThread(regs); } public SuperEnhancerFinder(GenomeConfig gcon, ExptConfig econ, SEEDConfig scon, ExperimentManager man) { super(gcon, econ, scon, man); // TODO Auto-generated constructor stub minDistalDistance = sconfig.getMinDistalDistance(); superStitchWin = sconfig.getSuperStitchWin(); refTSSs = sconfig.getRefTSSs(); } public Map<ExperimentCondition, List<Feature>> postProcess() { System.err.println("\nSuper Enhancer finding complete."); //Sort super enhancers by signal count Map<ExperimentCondition, List<SuperEnrichedFeature>> signal_sorted_features = new HashMap<ExperimentCondition,List<SuperEnrichedFeature>>(); for( ExperimentCondition ec : manager.getConditions()){ signal_sorted_features.put(ec, new ArrayList<SuperEnrichedFeature>()); for(Feature f : features.get(ec)){ SuperEnrichedFeature sfe = (SuperEnrichedFeature)f; sfe.setScore(sfe.getSignalCount()); signal_sorted_features.get(ec).add(sfe); } Collections.sort(signal_sorted_features.get(ec)); } //Calculating slope to find inflection point int inflection_point_index_strict = 0; int inflection_point_index_relax = 0; for( ExperimentCondition ec : manager.getConditions()){ boolean reachedInflectionPoint_strict = false; boolean reachedInflectionPoint_relax = false; for(int i=0; i<signal_sorted_features.get(ec).size(); i++){ int x=i-(int)(SLOPE_CALCULATING_STEP_SIZE/2); int y = i+(int)(SLOPE_CALCULATING_STEP_SIZE/2); if(x<0){x=0;} if(y >signal_sorted_features.get(ec).size()){y=signal_sorted_features.get(ec).size();} float[] vals = new float[y-x]; for(int j=x;j<y;j++){ vals[j-x] = signal_sorted_features.get(ec).get(j).getSignalCount(); } double slope = getSlope(vals); if(slope >=GRADIENT_STRICT && !reachedInflectionPoint_strict){ inflection_point_index_strict = i; reachedInflectionPoint_strict = true; } if(slope >=GRADIENT_RELAX && !reachedInflectionPoint_relax){ inflection_point_index_relax = i; reachedInflectionPoint_relax = true; } signal_sorted_features.get(ec).get(i).setSlope(slope); } } Map<ExperimentCondition, List<Feature>> finalfeature = new HashMap<ExperimentCondition,List<Feature>>(); for(ExperimentCondition ec : manager.getConditions()){ finalfeature.put(ec, new ArrayList<Feature>()); finalfeature.get(ec).addAll(signal_sorted_features.get(ec)); } Map<ExperimentCondition, List<Feature>> superEnhancers_strict = new HashMap<ExperimentCondition,List<Feature>>(); Map<ExperimentCondition, List<Feature>> superEnhancers_relax = new HashMap<ExperimentCondition,List<Feature>>(); for(ExperimentCondition ec : manager.getConditions()){ superEnhancers_strict.put(ec, new ArrayList<Feature>()); superEnhancers_relax.put(ec, new ArrayList<Feature>()); int index = 0; for(Feature ff :finalfeature.get(ec)){ if(index >= inflection_point_index_strict){ superEnhancers_strict.get(ec).add(ff); } if(index >= inflection_point_index_relax){ superEnhancers_relax.get(ec).add(ff); } index++; } Collections.reverse(superEnhancers_strict.get(ec)); Collections.reverse(superEnhancers_relax.get(ec)); } //All features this.printEventsFile(finalfeature, ".all.domains"); // Super enhancer features this.printEventsFile(superEnhancers_strict, ".superenhancer_strict.domains"); this.printEventsFile(superEnhancers_relax, ".superenhancer_relax.domains"); //TFs of SFs this.printTFsOfSFs(superEnhancers_relax, ".TFs_of_SFs_relax.domains"); this.printTFsOfSFs(superEnhancers_strict, ".TFs_of_SFs_strict.domains"); return features; } /** * Help String for Super Enhancer Finder * @return */ public static String getDomainFinderArgs() { return(new String("" + "SuperEnhancerFinder arguments:\n"+ "\t--supStitchWin <Window to stitch typical enhancers into super enhancers (default: supStitchWin=12500bp)>\n" + "\t--distalDistance <Minmum distnace from TSSs for calling domains (default: distalDistance= 2000bp)>\n" + "\t--refTSSs <TSS annotations, eg :- chr2:45667-45767:+ >\n" + "")); } protected double getSlope(float values[]){ double slope=0; for(int i=0; i<(values.length-1); i++){ if(values[i+1] >values[i]){ slope++; } } return slope/(values.length-1); } protected void printTFsOfSFs(Map<ExperimentCondition,List<Feature>> SuperFeatures,String suffix){ try { for(ExperimentCondition cond : manager.getConditions()){ String condName = cond.getName().equals("experiment") ? "" : "_"+cond.getName(); String filename = sconfig.getOutputParentDir()+File.separator+sconfig.getOutBase()+condName+suffix; FileWriter fout = new FileWriter(filename); fout.write("#"+getProgramName()+"\n"); fout.write("#Arguments:\t"+sconfig.getArgs()+"\n"); fout.write("##date "+(new Date()).toString()+"\n"); for(Feature f : SuperFeatures.get(cond)){ if(f instanceof SuperEnrichedFeature){ SuperEnrichedFeature sf = (SuperEnrichedFeature) f; for(EnrichedFeature ef : sf.getTEFs()){ fout.write(ef.getCoords().getLocationString()+"\t"+sf.getCoords().getLocationString()+"\n"); } } } fout.close(); } }catch(IOException e){ e.printStackTrace(); } } public static void main(String[] args){ System.setProperty("java.awt.headless", "true"); System.err.println("SuperEnhancerFinder version "+SuperEnhancerFinder.version+"\n\n"); String SuperEnhancerFinderHelp = new String("" + "SuperEnhancerFinder arguments:\n"+ "\t--supStitchWin <Window to stitch typical enhancers into super enhancers (default: supStitchWin=12500bp)>\n" + "\t--distalDistance <Minmum distnace from TSSs for calling domains (default: distalDistance= 2000bp)>\n" + "\t--refTSSs <TSS annotations, eg :- chr2:45667-45767:+ >\n" + ""); GenomeConfig gcon = new GenomeConfig(args); ExptConfig econ = new ExptConfig(gcon.getGenome(), args); SEEDConfig scon = new SEEDConfig(gcon, args); if(scon.helpWanted()){ //System.out.println(DomainFinder.getDomainFinderArgs()); System.err.println(gcon.getArgsList()+ econ.getArgsList()+ scon.getArgsList()+ SuperEnhancerFinderHelp); }else{ ExperimentManager man = new ExperimentManager(econ); SuperEnhancerFinder finder = new SuperEnhancerFinder(gcon, econ, scon, man); System.err.println("\nBeginning Super Enhancer finding..."); finder.execute(); man.close(); } } /** * SuperEnhancetFindetThread that searches for super enhancers * @author akshaykakumanu * */ public class SuperEnhancerFinderThread extends DomainFinderThread{ public SuperEnhancerFinderThread(List<Region> regs) { super(regs); } /** * findFeatures: find potentially enriched domains on the genome by comparing tag counts to a background model, * and then finds super enhancers as described in ...et.al */ public Map<ExperimentCondition, List<Feature>> findFeatures(Region subRegion){ // Get the enriched domains using the functionality from the superclass (DomainFinderThread) // and the over-ridden processDomain methods below return super.findFeatures(subRegion); } /** * */ protected Map<ExperimentCondition, List<EnrichedFeature>> processDomains(Map<ExperimentCondition,List<EnrichedFeature>> currFeatures, Region currSubRegion){ Map<ExperimentCondition, List<EnrichedFeature>> superEnhancers = new HashMap<ExperimentCondition, List<EnrichedFeature>>(); for (ExperimentCondition ec : manager.getConditions()){ superEnhancers.put(ec, new ArrayList<EnrichedFeature>()); } // Removing all enriched domains close to promoters for (ExperimentCondition ec : manager.getConditions()){ keepOnlyDistalFeature(currFeatures.get(ec)); } // Remove enriched domains less than 100bp. Playing around with the parameter at the moment (Needed especially when extending reads) // Also removes enriched feature that are have a p-value < 0.001(PER_ENRICHED_FEATURE_THRES). Playing around with the parameter at the moment for (ExperimentCondition ec : manager.getConditions()){ Iterator<EnrichedFeature> it = currFeatures.get(ec).iterator(); while(it.hasNext()){ EnrichedFeature ef = it.next(); Map<Sample, List<StrandedBaseCount>> efHitsPos = overlappingHits(hitsPos, ef); Map<Sample, List<StrandedBaseCount>> efHitsNeg = overlappingHits(hitsNeg, ef); quantifyFeature(ef,efHitsPos,efHitsNeg,ec); if(ef.getCoords().getWidth() < MIN_ENRICHED_FEATURE_LENGHT || ef.getScore() >= PER_ENRICHED_FEATURE_THRES){ it.remove(); } } } // Do a bunch of more operations of the enriched features for(ExperimentCondition ec : manager.getConditions()){ // Stitching typical enhancers into super enhancers List<SuperEnrichedFeature> stitchedTPEs = this.stitchTypicalEnhancers(currFeatures.get(ec)); // Removing SEs spanning more than 1 TSSs removeMultiGeneSpanningSEs(stitchedTPEs); // Quantify the features for(SuperEnrichedFeature sfe : stitchedTPEs){ Map<Sample, List<StrandedBaseCount>> sfeHitsPos = overlappingHits(hitsPos, sfe); Map<Sample, List<StrandedBaseCount>> sfeHitsNeg = overlappingHits(hitsNeg, sfe); //Trim the coordinates trimFeature(sfe, sfeHitsPos, sfeHitsNeg, ec); //Quantify the feature in each Sample and in the condition in which it was found quantifyFeature(sfe,sfeHitsPos,sfeHitsNeg,ec); } superEnhancers.get(ec).addAll(stitchedTPEs); } return superEnhancers; } /* * Keep only those enriched domains that are distal to known TSSs */ protected void keepOnlyDistalFeature(List<EnrichedFeature> enrichedFeatures){ Map<String, List<StrandedPoint>> refTSSsbyChrom = new HashMap<String, List<StrandedPoint>>(); for(StrandedPoint sp: refTSSs){ if(refTSSsbyChrom.containsKey(sp.getChrom())){refTSSsbyChrom.get(sp.getChrom()).add(sp);} else{refTSSsbyChrom.put(sp.getChrom(),new ArrayList<StrandedPoint>()); refTSSsbyChrom.get(sp.getChrom()).add(sp);} } Iterator<EnrichedFeature> it = enrichedFeatures.iterator(); while(it.hasNext()){ EnrichedFeature ef = it.next(); int minDistance = Integer.MAX_VALUE; if(refTSSsbyChrom.containsKey(ef.getCoords().getChrom())){ for(StrandedPoint sp : refTSSsbyChrom.get(ef.getCoords().getChrom())){ int dis = Math.min(sp.distance(ef.getCoords().startPoint()),sp.distance(ef.getCoords().endPoint())); if(dis < minDistance){ minDistance = dis; } } } if(minDistance <= minDistalDistance ){ it.remove(); } } } protected List<SuperEnrichedFeature> stitchTypicalEnhancers(List<EnrichedFeature> enhancers){ List<SuperEnrichedFeature> superEnhancers = new ArrayList<SuperEnrichedFeature>(); SuperEnrichedFeature lastadded = null; try { for(EnrichedFeature ef : enhancers){ if(lastadded == null){ superEnhancers.add(new SuperEnrichedFeature(ef)); lastadded = superEnhancers.get(superEnhancers.size()-1); }else{ if(lastadded.getLastTEF().getCoords().getMidpoint().distance(ef.getCoords().getMidpoint()) < superStitchWin){ superEnhancers.get(superEnhancers.size()-1).addTypicalFeature(ef); lastadded = superEnhancers.get(superEnhancers.size()-1); }else{ superEnhancers.add(new SuperEnrichedFeature(ef)); lastadded = superEnhancers.get(superEnhancers.size()-1); } } } } catch (Exception e) { e.printStackTrace(); } return superEnhancers; } protected void removeMultiGeneSpanningSEs(List<SuperEnrichedFeature> SEs){ Map<String, List<StrandedPoint>> refTSSsbyChrom = new HashMap<String, List<StrandedPoint>>(); for(StrandedPoint sp: refTSSs){ if(refTSSsbyChrom.containsKey(sp.getChrom())){refTSSsbyChrom.get(sp.getChrom()).add(sp);} else{refTSSsbyChrom.put(sp.getChrom(),new ArrayList<StrandedPoint>()); refTSSsbyChrom.get(sp.getChrom()).add(sp);} } Iterator<SuperEnrichedFeature> it = SEs.iterator(); while(it.hasNext()){ int numTSSs = 0; SuperEnrichedFeature sef = it.next(); if(refTSSsbyChrom.containsKey(sef.getCoords().getChrom())){ for(StrandedPoint sp : refTSSs){ if(sef.getCoords().contains(sp)){ numTSSs++; } } } if(numTSSs > NO_OVERLAPPING_TSS_ALLOWED){ it.remove(); } } } } }