package org.seqcode.deepseq.hitloaders;
import java.io.IOException;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.TreeMap;
import org.seqcode.data.readdb.*;
import org.seqcode.data.seqdata.SeqAlignment;
import org.seqcode.data.seqdata.SeqDataLoader;
import org.seqcode.data.seqdata.SeqLocator;
import org.seqcode.deepseq.HitPair;
import org.seqcode.genome.Genome;
import org.seqcode.genome.location.Region;
import org.seqcode.gseutils.NotFoundException;
import org.seqcode.gseutils.Pair;
/**
* ReadDBHitLoader: load read alignment five primes from a collection of ReadDB alignments (SeqLocators)
* @author mahony
*
*/
public class ReadDBHitLoader extends HitLoader{
private Genome gen=null;
private Client client=null; //ReadDB client
private SeqDataLoader loader=null;
private List<SeqLocator> exptLocs;
private List<String> exptNames =new ArrayList<String>();
private List<SeqAlignment> aligns = new ArrayList<SeqAlignment>();
private Collection<String> alignIDs= new ArrayList<String>();
private HashMap<SeqAlignment, Set<Integer>> availSingleChroms = new HashMap<SeqAlignment, Set<Integer>>();
private HashMap<SeqAlignment, Set<Integer>> availSingleType2Chroms = new HashMap<SeqAlignment, Set<Integer>>();
private HashMap<SeqAlignment, Set<Integer>> availPairedChroms = new HashMap<SeqAlignment, Set<Integer>>();
private boolean hasPairedAligns=false;
private final int MAXRDBLOAD = 1000000; //Maximum number of hits to load from ReadDB in one chunk
/**
* Constructor: initialize the experiments.
* @param g Genome
* @param locs ChipSeqLocator
* @param loadType1Reads boolean (load standard single-end/paired-end reads)
* @param loadType2Reads boolean (load type 2 reads if they exist)
* @param loadPairs boolean
*/
public ReadDBHitLoader(SeqDataLoader loader, Genome g, List<SeqLocator> locs, boolean loadT1Reads, boolean loadT2Reads, boolean loadPairs){
super(loadT1Reads, loadT2Reads, true, loadPairs);
gen=g;
exptLocs = locs;
if(gen==null){
System.err.println("ReadDBHitLoader: null genome provided. ReadDB requires a defined genome. ");
}
if(!loadType1 && !loadType2 && !loadPairs){
System.err.println("ReadDBHitLoader: Error: you didn't request to load any read types. ");
}
if (exptLocs.size() == 0) {
System.err.println("Created a ReadDBHitLoader with no SeqLocators");
}
try {
//Start a new ReadDB client
if(client==null)
client = new Client();
//Process the SeqLocators
for(SeqLocator locator : exptLocs){
String exptName = locator.getExptName(); exptNames.add(exptName);
aligns.addAll(loader.loadAlignments(locator, gen));
}
for(SeqAlignment alignment : aligns) {
if(client.exists(Integer.toString(alignment.getDBID()))){
alignIDs.add(Integer.toString(alignment.getDBID()));
this.sourceName=this.sourceName.equals("") ?
alignment.getExpt().getName()+";"+alignment.getExpt().getReplicate()+";"+alignment.getName() :
this.sourceName+":"+alignment.getExpt().getName()+";"+alignment.getExpt().getReplicate()+";"+alignment.getName();
hasPairedAligns = hasPairedAligns || (alignment.getAlignType().getName().equals("PAIRED") || alignment.getAlignType().getName().equals("MIXED"));
//Find the available chromosomes for each alignment
availSingleChroms.put(alignment, new HashSet<Integer>());
if(alignment.getNumHits()>0){
availSingleChroms.get(alignment).addAll(client.getChroms(Integer.toString(alignment.getDBID()), false,false, null));
}
availSingleType2Chroms.put(alignment, new HashSet<Integer>());
if(alignment.getNumType2Hits()>0){
availSingleType2Chroms.get(alignment).addAll(client.getChroms(Integer.toString(alignment.getDBID()), true,false, null));
}
availPairedChroms.put(alignment, new HashSet<Integer>());
if(alignment.getNumPairs()>0){
availPairedChroms.get(alignment).addAll(client.getChroms(Integer.toString(alignment.getDBID()), false,true, null));
}
}else{
System.err.println("ReadDBHitLoader: Error: "+alignment.getExpt().getName()+";"+alignment.getExpt().getReplicate()+";"+alignment.getName()+"\tRDBID:"+alignment.getDBID()+" does not exist in ReadDB.");
System.exit(1);
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (ClientException e) {
e.printStackTrace();
} catch (SQLException e) {
e.printStackTrace();
} catch (NotFoundException e) {
e.printStackTrace();
}
}
/**
* Load the five primes from ReadDB
*/
public void sourceAllHits(){
this.initialize();
try {
//Start a new ReadDB client
if(client==null)
client = new Client();
//Iterate over each chromosome
for (String chrom: gen.getChromList()){
// load data for this chromosome.
int length = gen.getChromLength(chrom);
Region wholeChrom = new Region(gen, chrom, 1, length);
int count = 0;
for(SeqAlignment alignment : aligns) {
if(loadType1)
if(availSingleChroms.get(alignment).contains(gen.getChromID(wholeChrom.getChrom()))){
count += client.getCount(Integer.toString(alignment.getDBID()),
gen.getChromID(wholeChrom.getChrom()),
false,
false,
wholeChrom.getStart(),
wholeChrom.getEnd(),
null,
null,
null);
}
if(loadType2){
if(availSingleType2Chroms.get(alignment).contains(gen.getChromID(wholeChrom.getChrom()))){
count += client.getCount(Integer.toString(alignment.getDBID()),
gen.getChromID(wholeChrom.getChrom()),
true,
false,
wholeChrom.getStart(),
wholeChrom.getEnd(),
null,
null,
null);
}
}
}
ArrayList<Region> chunks = new ArrayList<Region>();
// if there are too many reads in a chrom, read smaller chunks
if (count>MAXRDBLOAD){
int chunkNum = count/MAXRDBLOAD*2+1;
int chunkLength = length/chunkNum;
int start = 0;
while (start<=length){
int end = Math.min(length, start+chunkLength-1);
Region r = new Region(gen, chrom, start, end);
start = end+1;
chunks.add(r);
}
}else
chunks.add(wholeChrom);
for (Region chunk: chunks){
Pair<ArrayList<Integer>,ArrayList<Float>> hits;
if(loadType1){
hits = loadStrandedBaseCounts(chunk, '+', false);
addHits(chrom, '+', hits.car(), hits.cdr());
hits = loadStrandedBaseCounts(chunk, '-', false);
addHits(chrom, '-', hits.car(), hits.cdr());
}
if(loadType2){
hits = loadStrandedBaseCounts(chunk, '+', true);
addHits(chrom, '+', hits.car(), hits.cdr());
hits = loadStrandedBaseCounts(chunk, '-', true);
addHits(chrom, '-', hits.car(), hits.cdr());
}
if(hasPairedAligns && loadPairs){
ArrayList<HitPair> pairs = loadStrandedPairs(chunk, '+');
addPairs(chrom, '+', pairs);
pairs = loadStrandedPairs(chunk, '-');
addPairs(chrom, '-', pairs);
}
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (ClientException e) {
//Do nothing here: ClientException could be thrown because chromosome doesn't contain any hist.
}
}
/**
* load pairs of read hit 5' coordinates (sorted) and counts
*
*/
private Pair<ArrayList<Integer>,ArrayList<Float>> loadStrandedBaseCounts(Region r, char strand, boolean loadRead2){
TreeMap<Integer,Float> allHits = null;
ArrayList<Integer> coords = new ArrayList<Integer>();
ArrayList<Float> counts = new ArrayList<Float>();
try {
//Start a new ReadDB client
if(client==null)
client = new Client();
allHits = client.getWeightHistogram(alignIDs,
r.getGenome().getChromID(r.getChrom()),
loadRead2,
false,
0,
1,
r.getStart(),
r.getEnd(),
null,
strand == '+');
if (allHits == null) {
if (alignIDs.size() != 0) {
//throw new NullPointerException("ReadDBHitLoader: client.getWeightHistogram returned null");
}
} else {
coords.addAll(allHits.keySet());
counts.addAll(allHits.values());
}
} catch (IOException e) {
e.printStackTrace();
} catch (ClientException e) {
//Do nothing here; ClientException could be thrown because a chromosome doesn't contain any hits
}
return new Pair<ArrayList<Integer>,ArrayList<Float>>(coords, counts);
}
/**
* load paired reads in region
*
*/
private ArrayList<HitPair> loadStrandedPairs(Region r, char strand){
ArrayList<HitPair> pairs = new ArrayList<HitPair>();
try{
//Start a new ReadDB client
if(client==null)
client = new Client();
for (String alignid : alignIDs) {
List<PairedHit> ph = client.getPairedHits(alignid,
r.getGenome().getChromID(r.getChrom()),
true,
r.getStart(),
r.getEnd(),
null,
strand=='+');
for (PairedHit h : ph) {
if(h.pairCode==1)
pairs.add(new HitPair(h.leftPos, r.getGenome().getChromName(h.rightChrom), h.rightPos, h.rightStrand?0:1, h.weight));
}
}
} catch (IOException e) {
e.printStackTrace();
} catch (ClientException e) {
//Do nothing here; ClientException could be thrown because a chromosome doesn't contain any hits
}
return pairs;
}
/**
* Close the client
*/
public void cleanup(){
if(client!=null)
client.close();
if(loader!=null)
loader.close();
}
}