package org.seqcode.deepseq.hitloaders;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.seqcode.deepseq.Read;
import org.seqcode.deepseq.ReadHit;
/**
* NovoFileHitLoader: a FileHitLoader from NovoAlign output.
* This loader has not been tested for a while since we don't use NovoAlign.
*
* @author mahony
*
*/
public class NovoFileHitLoader extends FileHitLoader {
public NovoFileHitLoader(File f, boolean nonUnique, boolean loadT1Reads, boolean loadT2Reads, boolean loadPairs){
super(f, nonUnique, true, false, false, loadPairs);
if(!loadT1Reads || loadT2Reads)
System.err.println("NovoFileHitLoader: You asked to load only Type1 or Type2 reads, we do not load this information from NovoAlign format.");
if(loadPairs)
System.err.println("NovoFileHitLoader: You asked to load pairs, but we do not load paired read data from NovoAlign format.");
}
/**
* Get the reads from the appropriate source (implementation-specific).
* Loads data to the fivePrimesList and hitsCountList
* Nothing loaded to hitPairsList since we do not load pairing information from NovoAlign format
*/
public void sourceAllHits() {
this.initialize();
try {
int readLength=-1;
BufferedReader reader = new BufferedReader(new FileReader(file));
String line, lastID="";
float currReadHitCount=0;
Read currRead=null;
while ((line = reader.readLine()) != null) {
line = line.trim();
if(line.charAt(0)!='#'){
String[] words = line.split("\\t");
String chr="."; char strand = '.';
int start=0, end=0;
String ID = words[0];
if(readLength==-1)
readLength = words[2].length();
if(ID.equals(lastID)){
currReadHitCount++;
}else{
if(currRead!=null){
currRead.setNumHits(currReadHitCount);
//Add the hits to the data structure
addHits(currRead);
currRead=null;
}
currReadHitCount=1;
}
String tag = words[4];
if(tag.equals("U") || (useNonUnique && words.length>9 && tag.charAt(0)=='R')){
int mis=0;
if(words.length>13 && words[13].length()>1){
mis = words[7].split(" ").length;
}
chr = words[7];
String[] tmp = chr.split("\\.");
chr=tmp[0].replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", "");
chr=chr.replaceFirst("^>", "");
start = new Integer(words[8]).intValue();
end =start+readLength-1;
strand = words[9].equals("F") ? '+' : '-';
ReadHit currHit = new ReadHit(chr, start, end, strand, 1);
if(!ID.equals(lastID) || currRead==null){
currRead = new Read();
}currRead.addHit(currHit);
}
lastID=ID;
}
}
if(currRead!=null){
currRead.setNumHits(currReadHitCount);
//Add the hits to the data structure
addHits(currRead);
}
reader.close();
} catch (IOException e) {
e.printStackTrace();
}
}//end of countReads method
}//end of NovoFileReader class