package org.seqcode.deepseq.experiments; import java.util.ArrayList; import java.util.Collection; import java.util.List; import org.seqcode.deepseq.ExtReadHit; import org.seqcode.deepseq.ReadHit; import org.seqcode.deepseq.StrandedBaseCount; import org.seqcode.deepseq.StrandedPair; import org.seqcode.deepseq.hitloaders.*; import org.seqcode.genome.Genome; import org.seqcode.genome.location.Region; /** * Sample represents a single experimental sample whose hits are sourced from one or more HitLoaders. * * @author mahony * */ public class Sample { private int index; private Collection<HitLoader> loaders; private HitCache cache=null; private ExptConfig econfig; private Genome gen; protected String name; protected String sourceName=""; //String describing the source files or DBIDs protected double totalHits; //totalHits is the sum of alignment weights protected double totalHitsPos; //totalHitsPos is the sum of alignment weights on the plus strand protected double totalHitsNeg; //totalHitsNeg is the sum of alignment weights on the minus strand protected double uniqueHits; //count of unique mapped positions (just counts the number of bases with non-zero counts - does not treat non-uniquely mapped positions differently) protected double totalPairs=0; //count of the total number of paired hits protected double uniquePairs=0; //count of the total number of unique paired hits protected float maxReadsPerBP=-1; protected boolean isSignal=true; /** * Constructor * @param g Genome (can be null to estimate from data) * @param name String */ public Sample(int index, ExptConfig c, String name, float perBaseReadMax, boolean signal){ this.index = index; this.name = name; econfig = c; gen=c.getGenome(); totalHits=0; loaders = new ArrayList<HitLoader>(); maxReadsPerBP= perBaseReadMax; isSignal = signal; } //Accessors public int getIndex(){return index;} public Genome getGenome(){return(gen);} public String getName(){return name;} public String getSourceName(){return sourceName;} public double getHitCount(){return(totalHits);} public double getStrandedHitCount(char strand){return(strand=='+' ? totalHitsPos : totalHitsNeg);} public double getHitPositionCount(){return(uniqueHits);} public double getPairCount(){return(totalPairs);} public double getUniquePairCount(){return(uniquePairs);} public void setGenome(Genome g){gen=g; cache.setGenome(g);} public boolean isSignal(){return isSignal;} /** * Add a HitLoader to the set * @param h HitLoader */ public void addHitLoader(HitLoader h){ loaders.add(h); sourceName= sourceName.equals("") ? h.getSourceName() : sourceName+";"+h.getSourceName(); } /** * Initialize the cache * @param cacheEntireGenome : boolean to keep the full set of hits cached * @param initialCachedRegions : list of regions to keep cached at the start (can be null) */ public void initializeCache(boolean cacheEntireGenome, List<Region> initialCachedRegions){ cache = new HitCache(econfig.getLoadPairs(), econfig, loaders, maxReadsPerBP, cacheEntireGenome, initialCachedRegions); totalHits = cache.getHitCount(); totalHitsPos = cache.getHitCountPos(); totalHitsNeg = cache.getHitCountNeg(); uniqueHits = cache.getHitPositionCount(); totalPairs = cache.getPairCount(); uniquePairs = cache.getUniquePairCount(); if(gen==null) gen = cache.getGenome(); } /** * Load all base counts in a region, regardless of strand. * If caching in local files, group calls to this method by same chromosome. * @param r Region * @return List of StrandedBaseCounts */ public List<StrandedBaseCount> getBases(Region r) { return cache.getBases(r); } /** * Loads hits from a given strand in the region. * If caching in local files, group calls to this method by same chromosome. * @param r Region * @return List of StrandedBaseCounts */ public List<StrandedBaseCount> getStrandedBases(Region r, char strand) { return cache.getStrandedBases(r, strand); } /** * Load all paired hits that have an R1 read in a region. * If caching in local files, group calls to this method by same chromosome. * @param r Region * @return List of StrandedBaseCounts */ public List<StrandedPair> getPairs(Region r) { return cache.getPairs(r); } /** * Sum of all hit weights in a region. * If caching in local files, group calls to this method by same chromosome. * @param r Region * @return float */ public float countHits(Region r) { return cache.countHits(r); } /** * Sum of hit weights in one strand of a region. * If caching in local files, group calls to this method by same chromosome. * @param r Region * @return float */ public float countStrandedBases(Region r, char strand) { return cache.countStrandedBases(r, strand); } /** * Covert all hits into ReadHits for a given region * @param r * @param readLen */ public List<ReadHit> exportReadHits(Region r, int readLen){ return cache.exportReadHits(r, readLen); } /** * Convert all hits into ReadHits * @param readLen * @return */ public List<ReadHit> exportReadHits(int readLen){ return(cache.exportReadHits(readLen)); } public List<ExtReadHit> exportExtReadHits(Region r, int readLen, int startShift, int fivePrimeExt, int threePrimeExt){ return(cache.exportExtReadHits(r, readLen, startShift, fivePrimeExt, threePrimeExt)); } /** * Simple count correction with a scaling factor and a floor of one. * Beware: only works if all reads are loaded. * @param perBaseScaling float threshold */ public void linearCountCorrection(float perBaseScaling){ cache.linearCountCorrection(perBaseScaling); } /** * Cleanup */ public void close(){ cache.close(); } }