package org.seqcode.data.seqdata.tools;
import java.util.*;
import java.sql.*;
import java.io.*;
import org.seqcode.genome.Genome;
import org.seqcode.genome.location.ExonicGene;
import org.seqcode.genome.location.Gene;
import org.seqcode.genome.location.Region;
import org.seqcode.genome.location.StrandedRegion;
import org.seqcode.gsebricks.verbs.*;
import org.seqcode.gsebricks.verbs.location.RefGeneGenerator;
import org.seqcode.gseutils.Args;
import org.seqcode.gseutils.NotFoundException;
public class GeneBasedReport {
/**
* java org.seqcode.gse.tools.chipseq.GeneBasedReport --species "$MM;mm9" \
* --genes refGene [--proxup 5000] [--proxdown200] [--up 10000] [--intronlen 10000]
* [--noalias]
* --regions foo.txt
*
* --regions - to read from STDIN
*
* Output columns are
* 0) gene name
* 1) positions of distal binding events
* 2) positions of proximal binding events
* 3) positions of intronic or exonic binding events
*/
private Genome genome;
private List<Region> allRegions;
private List<RefGeneGenerator> geneGenerators;
private int proxup, proxdown, up, intronlen, analysisdbid;
private boolean firstIntron, noAlias;
public static void main(String args[]) throws Exception {
GeneBasedReport report = new GeneBasedReport();
report.parseArgs(args);
report.getRegions(args);
report.report();
}
public Genome getGenome() {return genome;}
public void parseArgs(String args[]) throws SQLException, NotFoundException {
genome = Args.parseGenome(args).cdr();
geneGenerators = Args.parseGenes(args);
proxup = Args.parseInteger(args,"proxup",4000);
up = Args.parseInteger(args,"up",10000);
intronlen = Args.parseInteger(args,"intronlen",10000);
proxdown = Args.parseInteger(args,"proxdown",200);
firstIntron = Args.parseFlags(args).contains("firstintron");
noAlias = Args.parseFlags(args).contains("noalias");
if (intronlen < proxdown) {
intronlen = proxdown + 1;
}
}
public void getRegions (String args[]) throws IOException, NotFoundException{
allRegions = Args.readLocations(args,"regions");
}
public Collection<? extends Region> getOverlappingRegions(Region target) throws Exception {
ArrayList<Region> output = new ArrayList<Region>();
for (Region r : allRegions) {
if (r.overlaps(target)) {
output.add(r);
}
}
return output;
}
public void report() throws Exception {
ArrayList<Region> proxevents = new ArrayList<Region>(),
distalevents= new ArrayList<Region>() , intronevents = new ArrayList<Region>();
for (RefGeneGenerator generator : geneGenerators) {
generator.retrieveExons(firstIntron);
generator.setWantAlias(!noAlias);
generator.setWantSymbol(!noAlias);
Iterator<Gene> all = generator.getAll();
while (all.hasNext()) {
Gene g = all.next();
proxevents.clear(); distalevents.clear(); intronevents.clear();
StrandedRegion wholeRegion = g.expand(up,0);
int thisintronlen = Math.min(intronlen, g.getWidth());
Region distalPromoter = g.getStrand() == '+' ? new Region(g.getGenome(), g.getChrom(), g.getStart() - up, g.getStart() - proxup) :
new Region(g.getGenome(), g.getChrom(), g.getEnd() + proxup, g.getEnd() + up);
Region proximalPromoter = g.getStrand() == '+' ? new Region(g.getGenome(), g.getChrom(), g.getStart() - proxup, g.getStart() + proxdown) :
new Region(g.getGenome(), g.getChrom(), g.getEnd() - proxdown, g.getEnd() + proxup);
Region intronicRegion = null;
if (firstIntron ) {
Iterator<Region> iter = ((ExonicGene)g).getExons();
while (iter.hasNext()) {
Region intron = iter.next();
if (intronicRegion == null || (intron.distance(proximalPromoter) < intronicRegion.distance(proximalPromoter))) {
intronicRegion = intron;
}
}
if (intronicRegion == null) {
intronicRegion = new Region(g.getGenome(), g.getChrom(), 1,1);
}
} else {
intronicRegion = g.getStrand() == '+' ? new Region(g.getGenome(), g.getChrom(), g.getStart() + proxdown, g.getStart() + proxdown + thisintronlen) :
new Region(g.getGenome(), g.getChrom(), g.getEnd() - proxdown - thisintronlen, g.getEnd() - proxdown);
}
for (Region r : getOverlappingRegions(wholeRegion)) {
if (r.overlaps(distalPromoter)) {
distalevents.add(r);
} else if (r.overlaps(proximalPromoter)) {
proxevents.add(r);
} else if (r.overlaps(intronicRegion)) {
intronevents.add(r);
}
}
System.out.println(String.format("%s\t%s\t%s\t%s",
g.getName(),
distalevents.toString(),
proxevents.toString(),
intronevents.toString()));
}
}
}
}