package org.seqcode.deepseq.utils;
import java.io.FileWriter;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import org.seqcode.deepseq.StrandedBaseCount;
import org.seqcode.deepseq.StrandedBaseCountFilterByBase;
import org.seqcode.deepseq.experiments.ControlledExperiment;
import org.seqcode.deepseq.experiments.ExperimentCondition;
import org.seqcode.deepseq.experiments.ExperimentManager;
import org.seqcode.deepseq.experiments.ExptConfig;
import org.seqcode.genome.GenomeConfig;
import org.seqcode.genome.location.NamedRegion;
import org.seqcode.genome.location.Region;
import org.seqcode.gsebricks.verbs.location.ChromRegionIterator;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
/**
* Outputs a GeneTrack index format file for a deep-seq experiment.
*
* @author Shaun Mahony
* @version %I%, %G%
*/
public class IDXExporter {
protected ExperimentManager manager;
protected GenomeConfig gcon=null;
protected ExptConfig econ=null;
protected int [] stackedHitCountsPos;
protected int [] stackedHitCountsNeg;
protected String outName="out";
protected char baseLimit='.'; //Only use tags with this character at baseLimitRelPosition relative to 5' end (. = use all tags)
protected int baseLimitRelPosition=0;
protected StrandedBaseCountFilterByBase sbcFilter;
protected boolean filterByBase=false;
public static void main(String[] args) throws SQLException, NotFoundException {
IDXExporter exporter = new IDXExporter(args);
exporter.execute();
exporter.close();
}
public IDXExporter(String [] args) {
if(args.length==0){
System.err.println("IDXExporter usage:\n" +
GenomeConfig.getArgsList()+"\n"+
ExptConfig.getArgsList()+"n"+
"IDXExporter:\n"+
"\t--out <output file name>\n"+
"\t--baselimit <./A/C/G/T: only use tags with this base at below position>\n" +
"\t--baselimitposition <only use tags with above base at this position>\n");
System.exit(1);
}else{
gcon = new GenomeConfig(args);
econ = new ExptConfig(gcon.getGenome(), args);
manager = new ExperimentManager(econ);
outName = Args.parseString(args,"out",outName);
baseLimit = Args.parseString(args, "baselimit", ".").charAt(0);
baseLimitRelPosition = Args.parseInteger(args, "baselimitposition", 0);
if(baseLimit!='.'){
sbcFilter = new StrandedBaseCountFilterByBase(gcon, baseLimit, baseLimitRelPosition);
filterByBase=true;
}
}
}
public void execute(){
for(ExperimentCondition c : manager.getConditions()){
for(ControlledExperiment rep : c.getReplicates()){
System.err.println("Condition "+c.getName()+":\tRep "+rep.getName());
try {
FileWriter fw = new FileWriter(outName+"."+c.getName()+"."+rep.getName()+".idx");
fw.write("chrom\tindex\tforward\treverse\tvalue\n");
double basesDone=0, printStep=10000000, numPrint=0;
ChromRegionIterator chroms = new ChromRegionIterator(gcon.getGenome());
while(chroms.hasNext()){
NamedRegion currentRegion = chroms.next();
//Split the job up into chunks of 100Mbp
for(int x=currentRegion.getStart(); x<=currentRegion.getEnd(); x+=100000000){
int y = x+100000000;
if(y>currentRegion.getEnd()){y=currentRegion.getEnd();}
Region currSubRegion = new Region(gcon.getGenome(), currentRegion.getChrom(), x, y);
List<StrandedBaseCount> hits = rep.getSignal().getBases(currSubRegion);
if(filterByBase)
hits = sbcFilter.execute(currSubRegion, hits);
double stackedHitCountsPos[] = make5PrimeLandscape(hits, currSubRegion, '+');
double stackedHitCountsNeg[] = make5PrimeLandscape(hits, currSubRegion, '-');
//Scan regions
for(int i=currSubRegion.getStart(); i<currSubRegion.getEnd(); i++){
int offset = i-currSubRegion.getStart();
double posHits=stackedHitCountsPos[offset];
double negHits=stackedHitCountsNeg[offset];
double sum = posHits+negHits;
if(posHits>0 || negHits>0){
fw.write("chr"+currSubRegion.getChrom()+"\t"+i+"\t"+String.format("%.0f\t%.0f\t%.0f", posHits, negHits, sum) +"\n");
}
//Print out progress
basesDone++;
if(basesDone > numPrint*printStep){
if(numPrint%10==0){System.out.print(String.format("(%.0f)", (numPrint*printStep)));}
else{System.out.print(".");}
if(numPrint%50==0 && numPrint!=0){System.out.print("\n");}
numPrint++;
}
}
}
}
System.out.print("\n");
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
public void close(){
if(manager!=null)
manager.close();
}
protected double[] make5PrimeLandscape(List<StrandedBaseCount> hits, Region currReg, char strand){
double[] startcounts = new double[(int)currReg.getWidth()+1];
for(int i=0; i<=currReg.getWidth(); i++){startcounts[i]=0;}
for(StrandedBaseCount r : hits){
if(strand=='.' || r.getStrand()==strand){
if(r.getCoordinate()>=currReg.getStart() && r.getCoordinate()<=currReg.getEnd()){
int offset5=inBounds(r.getCoordinate()-currReg.getStart(),0,currReg.getWidth());
startcounts[offset5]+=r.getCount();
}
}
}
return(startcounts);
}
//keep the number in bounds
protected final double inBounds(double x, double min, double max){
if(x<min){return min;}
if(x>max){return max;}
return x;
}
protected final int inBounds(int x, int min, int max){
if(x<min){return min;}
if(x>max){return max;}
return x;
}
}