package org.seqcode.deepseq.hitloaders;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.seqcode.deepseq.Read;
import org.seqcode.deepseq.ReadHit;
/**
* BowtieFileHitLoader: a FileHitLoader from Bowtie native format
*
* @author mahony
*
*/
public class BowtieFileHitLoader extends FileHitLoader {
public BowtieFileHitLoader(File f, boolean nonUnique, boolean loadT1Reads, boolean loadT2Reads, boolean loadPairs){
super(f, nonUnique, true, false, false, loadPairs);
if(!loadT1Reads || loadT2Reads)
System.err.println("BowtieFileHitLoader: You asked to load only Type1 or Type2 reads, we do not load this information from Bowtie native format.");
if(loadPairs)
System.err.println("BowtieFileHitLoader: You asked to load pairs, but we do not load paired read data from Bowtie native format.");
}
/**
* Get the reads from the appropriate source (implementation-specific).
* Loads data to the fivePrimesList and hitsCountList
* Nothing loaded to hitPairsList, since we do not load this type of data from Bowtie native
*/
public void sourceAllHits() {
this.initialize();
try {
BufferedReader reader = new BufferedReader(new FileReader(file));
String line, lastID="";
double currReadHitCount=0;
Read currRead=null;
int readLength=-1;
while ((line = reader.readLine()) != null) {
line = line.trim();
if (line.length()==0) continue;
if(line.charAt(0)!='#'){
String[] words = line.split("\\t");
String chr="."; char strand = '.';
int start=0, end=0;
String ID = words[0];
if(readLength==-1)
readLength = words[4].length();
boolean newRead=true;
if(ID.equals(lastID)){
currReadHitCount++;
newRead=false;
}else{
if(currRead!=null){
if(currRead.getNumHits()==1 || useNonUnique){
//Add the hits to the data structure
addHits(currRead);
}currRead=null;
}
currReadHitCount=1;
}
int mis=0;
if(words.length>7 && words[7].length()>1){
mis = words[7].split(",").length;
}
chr = words[2];
String[] tmp = chr.split("\\.");
chr=tmp[0].replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", "");
chr=chr.replaceFirst("^>", "");
start = new Integer(words[3]).intValue()+1; //Bowtie raw output is 0-based, we want 1-based
end =start+readLength-1;
strand = words[1].charAt(0);
ReadHit currHit = new ReadHit(chr, start, end, strand, 1);
if(newRead || currRead==null){
currRead = new Read();
}
currRead.addHit(currHit);
lastID=ID;
}
}
if(currRead!=null){
if(currRead.getNumHits()==1 || useNonUnique){
//Add the hits to the data structure
addHits(currRead);
}
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}//end of countReads method
}//end of BowtieFileReader class