package org.seqcode.deepseq.utils;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import org.seqcode.data.io.RegionFileUtilities;
import org.seqcode.deepseq.StrandedBaseCount;
import org.seqcode.deepseq.experiments.ControlledExperiment;
import org.seqcode.deepseq.experiments.ExperimentCondition;
import org.seqcode.deepseq.experiments.ExperimentManager;
import org.seqcode.deepseq.experiments.ExptConfig;
import org.seqcode.genome.GenomeConfig;
import org.seqcode.genome.location.NamedRegion;
import org.seqcode.genome.location.Point;
import org.seqcode.genome.location.Region;
import org.seqcode.gsebricks.verbs.location.ChromRegionIterator;
import org.seqcode.gseutils.ArgParser;
import org.seqcode.gseutils.Args;
/**
* This class aims to test the sequencing read coverage at a set of points (e.g. coverage overlapping a set of SNPs).
*
* @author mahony
*
*/
public class PointCoverage {
private GenomeConfig gConfig;
private ExptConfig eConfig;
private List<Point> testSites;
private ExperimentManager manager=null;
private String outName = "out";
private Integer readLen = 36;
public PointCoverage(GenomeConfig gcon, ExptConfig econ, List<Point> sites, int readLength){
gConfig = gcon;
eConfig = econ;
testSites = sites;
manager = new ExperimentManager(eConfig);
readLen = readLength;
}
public void execute(){
for(ExperimentCondition c : manager.getConditions()){
for(ControlledExperiment rep : c.getReplicates()){
System.err.println("Condition "+c.getName()+":\tRep "+rep.getName());
try {
FileWriter fw = new FileWriter(outName+"."+c.getName()+"."+rep.getName()+".point-coverage.txt");
ChromRegionIterator chroms = new ChromRegionIterator(gConfig.getGenome());
while(chroms.hasNext()){
NamedRegion currentRegion = chroms.next();
//Split the job up into chunks of 100Mbp
for(int x=currentRegion.getStart(); x<=currentRegion.getEnd(); x+=100000000){
int y = x+100000000;
if(y>currentRegion.getEnd()){y=currentRegion.getEnd();}
Region currSubRegion = new Region(gConfig.getGenome(), currentRegion.getChrom(), x, y);
List<StrandedBaseCount> hits = rep.getSignal().getBases(currSubRegion);
double stackedReadCoverage[] = makeReadCoverageLandscape(hits, currSubRegion);
//Get coverage of points that lie within the current region
for(Point pt : testSites){
if(currSubRegion.contains(pt)){
int offset = pt.getLocation()-currSubRegion.getStart();
double sum=stackedReadCoverage[offset];
fw.write("chr"+pt.getChrom()+":"+pt.getLocation()+"\t"+String.format("%.0f", sum) +"\n");
}
}
}
}
System.out.print("\n");
fw.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
protected double[] makeReadCoverageLandscape(List<StrandedBaseCount> hits, Region currReg){
double[] counts = new double[(int)currReg.getWidth()+1];
for(int i=0; i<=currReg.getWidth(); i++){counts[i]=0;}
for(StrandedBaseCount r : hits){
if(r.getCoordinate()>=currReg.getStart() && r.getCoordinate()<=currReg.getEnd()){
if(r.getStrand()=='+'){
int offsetStart=inBounds(r.getCoordinate()-currReg.getStart(),0,currReg.getWidth());
int offsetEnd=inBounds(r.getCoordinate()+readLen-currReg.getStart(),0,currReg.getWidth());
for(int o=offsetStart; o<offsetEnd; o++)
counts[o]+=r.getCount();
}else{
int offsetStart=inBounds(r.getCoordinate()-readLen+1-currReg.getStart(),0,currReg.getWidth());
int offsetEnd=inBounds(r.getCoordinate()-currReg.getStart()+1,0,currReg.getWidth());
for(int o=offsetStart; o<offsetEnd; o++)
counts[o]+=r.getCount();
}
}
}
return(counts);
}
//keep the number in bounds
protected final double inBounds(double x, double min, double max){
if(x<min){return min;}
if(x>max){return max;}
return x;
}
protected final int inBounds(int x, int min, int max){
if(x<min){return min;}
if(x>max){return max;}
return x;
}
public void close(){
if(manager !=null)
manager.close();
}
/**
* @param args
*/
public static void main(String[] args) {
ArgParser ap = new ArgParser(args);
if(args.length==0 || ap.hasKey("h")){
System.err.println("PointCoverage:");
System.err.println("Genome:" +
"\t--species <Species;Genome>\n" +
"\tOR\n" +
"\t--geninfo <genome info file> AND --seq <fasta seq directory>\n" +
"Coverage Testing:\n" +
"\t--sites <test site coords>\n" +
"\t--readlen <read length>\n"
);
}else{
GenomeConfig gcon = new GenomeConfig(args);
ExptConfig econ = new ExptConfig(gcon.getGenome(), args);
List<Point> testSites = new ArrayList<Point>();
Collection<String> siteFiles = Args.parseStrings(args, "sites");
for(String sf : siteFiles)
testSites.addAll(RegionFileUtilities.loadPointsFromFile(sf, gcon.getGenome()));
Integer readL = Args.parseInteger(args, "readlen", 36);
PointCoverage rct = new PointCoverage(gcon, econ, testSites, readL);
rct.execute();
rct.close();
}
}
}