package org.seqcode.deepseq.hitloaders; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import org.seqcode.deepseq.HitPair; import org.seqcode.deepseq.Read; import org.seqcode.deepseq.ReadHit; /** * HitLoaders load alignment hits & pairs from various sources, including ReadDB and various files. * Five-prime positions and associated weight sums are loaded into ArrayLists. * Pairing information is loaded if requested and if it exits. * Where/how those hits & pairs are sourced is implementation-specific. * * Five prime positions and weights are loaded into two collections of ArrayLists, where the collections are indexed by chromosome name. * Within each chromosome's set, a 2D array of ArrayLists collects data for each strand. * However, the ArrayLists are temporary -- once a Sample loads the hits into primitive arrays, the ArrayLists are reset and the * garbage collector is called. * * @author mahony * This class combines functionality from ReadLoaders, AlignmentFileReaders, and ReadCache in the old setup. */ public abstract class HitLoader { protected boolean loadType1=true; //Load type1 reads protected boolean loadType2=false; //Load type2 reads (if exists) protected boolean loadRead2=true; //Load read 2 in paired-end protected boolean loadPairs=false; //Load pair information (if exists) protected boolean hasPairs = false; //Flag to say there are pairs in the sample protected double totalHits; //totalHits is the sum of alignment weights protected String sourceName=""; //String describing the source /** * Five prime ends of the read hits. <br> * HashMap is indexed by chromosome name. <br> * Dimension in the array of ArrayLists represents the strand. 0 for '+', 1 for '-' */ private HashMap<String, ArrayList<Integer>[]> fivePrimePosList = null; /** * Sum of read hit weights that corresponds to the 5' position * HashMap is indexed by chromosome name. <br> * Dimension in the array of ArrayLists represents the strand. 0 for '+', 1 for '-' * Ordering of each ArrayList is the same as fivePrimePosList */ private HashMap<String, ArrayList<Float>[]> fivePrimeCountsList = null; /** * R2 read hit pairing information for each R1 read hit (if pairs exist) * HashMap is indexed by R1 read chromosome name. <br> * Dimension in the array of ArrayLists represents the R1 read strand. 0 for '+', 1 for '-' * Ordering of each ArrayList is the same as fivePrimePosList. * */ private HashMap<String, ArrayList<HitPair>[]> hitPairsList = null; /** * Constructor * @param g Genome */ public HitLoader(boolean loadT1, boolean loadT2, boolean loadRead2, boolean loadPairs){ this.loadType1=loadT1; this.loadType2=loadT2; this.loadRead2 = loadRead2; this.loadPairs=loadPairs; totalHits=0; } //Accessors public boolean hasPairedReads(){return hasPairs;} public double getHitCount(){return(totalHits);} public String getSourceName(){return sourceName;} public HashMap<String, ArrayList<Integer>[]> getFivePrimePositions(){return fivePrimePosList;} public HashMap<String, ArrayList<Float>[]> getFivePrimeCounts(){return fivePrimeCountsList;} public HashMap<String, ArrayList<HitPair>[]> getPairs(){return hitPairsList;} //Abstract methods /** * Get all hits from the appropriate source (implementation-specific). * Loads single end data to the fivePrimePosList and fivePrimeCountsList. * Enforcing which reads to load (Type1 and/or Type2) is also implementation-specific. * Loads pairs to hitPairsList (if requested & if they exist). * */ public abstract void sourceAllHits(); //Shared methods /** * Initialize the genome and data structures. Source hits for the lists */ public void initialize(){ resetLoader(); fivePrimePosList = new HashMap<String, ArrayList<Integer>[]>(); fivePrimeCountsList = new HashMap<String, ArrayList<Float>[]>(); if(loadPairs) hitPairsList = new HashMap<String, ArrayList<HitPair>[]>(); } /** * Reset the loaders -- destroy the lists and call the garbage collector */ public void resetLoader(){ //Free memory if(fivePrimePosList!=null){ for(String chr: fivePrimePosList.keySet()){ fivePrimePosList.get(chr)[0].clear(); fivePrimePosList.get(chr)[1].clear(); } fivePrimePosList.clear(); } if(fivePrimeCountsList!=null){ for(String chr: fivePrimeCountsList.keySet()){ fivePrimeCountsList.get(chr)[0].clear(); fivePrimeCountsList.get(chr)[1].clear(); } fivePrimeCountsList.clear(); } if(loadPairs && hitPairsList!=null){ for(String chr: hitPairsList.keySet()){ hitPairsList.get(chr)[0].clear(); hitPairsList.get(chr)[1].clear(); } hitPairsList.clear(); } System.gc(); } /** * Add hits to the list data structures. * It may be called multiple times to retrieve all the data, then populateArrays() is called */ protected void addHits(String chrom, char strand, Collection<Integer> coords, Collection<Float> counts){ int strandInd = strand == '+' ? 0 : 1; if(!fivePrimePosList.containsKey(chrom)) addChr(chrom); fivePrimePosList.get(chrom)[strandInd].addAll(coords); fivePrimeCountsList.get(chrom)[strandInd].addAll(counts); for (float c: counts) totalHits += c; }//end of addHits method /** * Add hits to the list data structures from a Read * @param r Read */ protected void addHits(Read r){ for(ReadHit h : r.getHits()){ char strand = h.getStrand(); int strandInd = strand == '+' ? 0 : 1; if(!fivePrimePosList.containsKey(h.getChrom())) addChr(h.getChrom()); fivePrimePosList.get(h.getChrom())[strandInd].add(strand == '+' ?h.getStart():h.getEnd()); fivePrimeCountsList.get(h.getChrom())[strandInd].add(h.getWeight()); totalHits++; } }//end of addHits method /** * Add paired hit information to the list data structure * @param HitPair collection */ protected void addPairs(String chrom, char strand, Collection<HitPair> pairs){ if(!hasPairs){ //This is the first pair being added. hasPairs=true; } int strandInd = strand == '+' ? 0 : 1; if(!hitPairsList.containsKey(chrom)) addChr(chrom); hitPairsList.get(chrom)[strandInd].addAll(pairs); } /** * Add paired hit information to the list data structure * @param HitPair */ protected void addPair(String chrom, char strand, HitPair pair){ if(!hasPairs){ //This is the first pair being added. hasPairs=true; } int strandInd = strand == '+' ? 0 : 1; if(!hitPairsList.containsKey(chrom)) addChr(chrom); hitPairsList.get(chrom)[strandInd].add(pair); } /** * Add a chromosome to the hit lists * @param chr String */ protected void addChr(String chr){ ArrayList<Integer>[] currIArrayList = new ArrayList[2]; currIArrayList[0]=new ArrayList<Integer>(); currIArrayList[1]=new ArrayList<Integer>(); fivePrimePosList.put(chr, currIArrayList); ArrayList<Float>[] currFArrayList = new ArrayList[2]; currFArrayList[0]=new ArrayList<Float>(); currFArrayList[1]=new ArrayList<Float>(); fivePrimeCountsList.put(chr, currFArrayList); if(loadPairs){ ArrayList<HitPair>[] currPArrayList = new ArrayList[2]; currPArrayList[0]=new ArrayList<HitPair>(); currPArrayList[1]=new ArrayList<HitPair>(); hitPairsList.put(chr, currPArrayList); } } /** * Perform any necessary cleanup. For ReadDB, this means close the clients. */ public abstract void cleanup(); }