package org.seqcode.tutorials; import java.util.Iterator; import java.util.List; import org.seqcode.deepseq.StrandedBaseCount; import org.seqcode.deepseq.experiments.ControlledExperiment; import org.seqcode.deepseq.experiments.ExperimentManager; import org.seqcode.deepseq.experiments.ExptConfig; import org.seqcode.genome.Genome; import org.seqcode.genome.GenomeConfig; import org.seqcode.genome.location.Region; import org.seqcode.gsebricks.verbs.location.ChromosomeGenerator; /** * Simple package testing class to cache a set of experiments and print total hit counts. * * Unit testing: * java -Xmx2G org.seqcode.deepseq.utils.SeqExperimentLoadingExample --species "Mus musculus;mm9" --rdbexptC1 "ES2MN Day0(iCdx2.V5+Dox) iCdx2.V5 Ainv15_iCdx2.V5;1;bowtie_unique" --fixedpb 100000 (--nocache) * Result: C1:rep1 4700200.0 4700200.0 * * @author mahony * */ public class SeqExperimentLoadingExample { final int MAXSECTION = 50000000; GenomeConfig gconfig; ExptConfig econfig; ExperimentManager manager; public static void main(String[] args){ //GenomeConfig and ExptConfig read various command-line options in order to load requested genome and sequencing datasets (resp.) GenomeConfig gconfig = new GenomeConfig(args); ExptConfig econfig = new ExptConfig(gconfig.getGenome(), args); if(args.length==0 || gconfig.helpWanted()){ System.err.println("TestExptLoading\n"+gconfig.getArgsList()+"\n"+econfig.getArgsList()); } //Everything required to get the data requested from the command-line args is now in the *Config objects SeqExperimentLoadingExample tel = new SeqExperimentLoadingExample(gconfig, econfig); tel.execute(); //Always close connections at the end of processing tel.close(); } public SeqExperimentLoadingExample(GenomeConfig g, ExptConfig e){ this.gconfig = g; this.econfig = e; } /** * The functionality here is trivial - just counts the number of hits in the experiment. * The point is to demonstrate iteration over chromosomes and * to test read loading per chromosome. */ public void execute(){ //ExperimentManager initializes the data structures associated with the experimental data pointed to on cmd line manager = new ExperimentManager(econfig); //Appropriate Genome will already be loaded by GenomeConfig Genome gen = gconfig.getGenome(); //If we have multiple experiments, process one at a time for(ControlledExperiment expt : manager.getReplicates()){ float totalCount = 0; //Iterate over chromosomes in the genome Iterator<Region> chroms = new ChromosomeGenerator().execute(gen); while (chroms.hasNext()) { double chrCount =0; Region currentRegion = chroms.next(); //Split the chromosome up into large chunks for(int x=currentRegion.getStart(); x<=currentRegion.getEnd(); x+=MAXSECTION){ int y = x+MAXSECTION; if(y>currentRegion.getEnd()){y=currentRegion.getEnd();} Region currSubRegion = new Region(gen, currentRegion.getChrom(), x, y); //Get hits in this subregion from the current experiment's signal track List<StrandedBaseCount> ipHits = expt.getSignal().getBases(currSubRegion); for(StrandedBaseCount z : ipHits){ totalCount+=z.getCount(); chrCount += z.getCount(); } } System.out.println("\t"+currentRegion.getLocationString()+"\t"+chrCount); } System.out.println(expt.getName()+"\t"+expt.getSignal().getHitCount()+"\t"+totalCount); } } protected void close(){ //Always close database connections (if applicable) via a call to ExperimentManager.close() manager.close(); } }