package org.seqcode.deepseq.utils; import java.io.FileWriter; import java.io.IOException; import java.sql.SQLException; import java.util.List; import org.seqcode.deepseq.StrandedBaseCount; import org.seqcode.deepseq.experiments.ControlledExperiment; import org.seqcode.deepseq.experiments.ExperimentCondition; import org.seqcode.deepseq.experiments.ExperimentManager; import org.seqcode.deepseq.experiments.ExptConfig; import org.seqcode.genome.GenomeConfig; import org.seqcode.genome.location.NamedRegion; import org.seqcode.genome.location.Region; import org.seqcode.gsebricks.verbs.location.ChromRegionIterator; import org.seqcode.gseutils.Args; import org.seqcode.gseutils.NotFoundException; public class BEDExporter { protected GenomeConfig gconfig; protected ExptConfig econfig; protected ExperimentManager manager; protected int [] stackedHitCountsPos; protected int [] stackedHitCountsNeg; final int MAXSECTION=50000000; private int readLength=1; private String outName="out"; public static void main(String[] args) throws SQLException, NotFoundException { GenomeConfig gconfig = new GenomeConfig(args); ExptConfig econfig = new ExptConfig(gconfig.getGenome(), args); if(args.length==0 || gconfig.helpWanted()){ System.err.println("BEDExporter usage:\n" + GenomeConfig.getArgsList()+"\n"+ ExptConfig.getArgsList()+"n"+ "BEDExporter:\n"+ "\t--readlen <read length>\n" + "\t--out <output file name>"); System.exit(1); }else{ String outName = Args.parseString(args,"out","out"); int readLen = Args.parseInteger(args,"readlen",40); BEDExporter exporter = new BEDExporter(gconfig, econfig, outName, readLen); exporter.execute(); exporter.close(); } } public BEDExporter(GenomeConfig gcon, ExptConfig econ, String out, int rL) { gconfig = gcon; econfig = econ; manager = new ExperimentManager(econfig); readLength = rL; } public void execute(){ for(ExperimentCondition c : manager.getConditions()){ for(ControlledExperiment rep : c.getReplicates()){ System.err.println("Condition "+c.getName()+":\tRep "+rep.getName()); try { FileWriter fw = new FileWriter(outName+"."+c.getName()+"."+rep.getName()+".bed"); //fw.write("chrom\tindex\tforward\treverse\tvalue\n"); double basesDone=0, printStep=10000000, numPrint=0; ChromRegionIterator chroms = new ChromRegionIterator(gconfig.getGenome()); while(chroms.hasNext()){ NamedRegion currentRegion = chroms.next(); //Split the job up into chunks of 100Mbp for(int x=currentRegion.getStart(); x<=currentRegion.getEnd(); x+=100000000){ int y = x+100000000; if(y>currentRegion.getEnd()){y=currentRegion.getEnd();} Region currSubRegion = new Region(gconfig.getGenome(), currentRegion.getChrom(), x, y); List<StrandedBaseCount> hits = rep.getSignal().getBases(currSubRegion); double stackedHitCountsPos[] = make5PrimeLandscape(hits, currSubRegion, '+'); double stackedHitCountsNeg[] = make5PrimeLandscape(hits, currSubRegion, '-'); //Scan regions for(int i=currSubRegion.getStart(); i<currSubRegion.getEnd(); i++){ int offset = i-currSubRegion.getStart(); double posHits=stackedHitCountsPos[offset]; double negHits=stackedHitCountsNeg[offset]; if(posHits>0 || negHits>0){ for(int hitc=0; hitc<(int)posHits; hitc++){ fw.write("chr"+currSubRegion.getChrom()+"\t"+i+"\t"+(i+readLength)+"\t"+"+"+"\n"); } for(int hitc=0; hitc<(int)negHits; hitc++){ fw.write("chr"+currSubRegion.getChrom()+"\t"+(i+1-readLength)+"\t"+(i+1)+"\t"+"-"+"\n"); } } //Print out progress basesDone++; if(basesDone > numPrint*printStep){ if(numPrint%10==0){System.out.print(String.format("(%.0f)", (numPrint*printStep)));} else{System.out.print(".");} if(numPrint%50==0 && numPrint!=0){System.out.print("\n");} numPrint++; } } } } System.out.print("\n"); fw.close(); } catch (IOException e) { e.printStackTrace(); } } } } public void close(){ manager.close(); } protected double[] make5PrimeLandscape(List<StrandedBaseCount> hits, Region currReg, char strand){ double[] startcounts = new double[(int)currReg.getWidth()+1]; for(int i=0; i<=currReg.getWidth(); i++){startcounts[i]=0;} for(StrandedBaseCount r : hits){ if(strand=='.' || r.getStrand()==strand){ if(r.getCoordinate()>=currReg.getStart() && r.getCoordinate()<=currReg.getEnd()){ int offset5=inBounds(r.getCoordinate()-currReg.getStart(),0,currReg.getWidth()); startcounts[offset5]+=r.getCount(); } } } return(startcounts); } //keep the number in bounds protected final double inBounds(double x, double min, double max){ if(x<min){return min;} if(x>max){return max;} return x; } protected final int inBounds(int x, int min, int max){ if(x<min){return min;} if(x>max){return max;} return x; } }