package org.seqcode.deepseq.hitloaders;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import org.seqcode.deepseq.*;
/**
* BEDFileHitLoader: a FileHitLoader for BED files
* Format = http://genome.ucsc.edu/FAQ/FAQformat.html#format1
*
* @author mahony
*
*/
public class BEDFileHitLoader extends FileHitLoader {
public BEDFileHitLoader(File f, boolean nonUnique, boolean loadT1Reads, boolean loadT2Reads, boolean loadPairs){
super(f, nonUnique, true, false, false, loadPairs);
if(!loadT1Reads || loadT2Reads)
System.err.println("BEDFileHitLoader: You asked to load only Type1 or Type2 reads, but BED cannot represent different reads.");
if(loadPairs)
System.err.println("BEDFileHitLoader: You asked to load pairs, but BED cannot represent paired read data.");
}
/**
* Get the reads from the appropriate source (implementation-specific).
* Loads data to the fivePrimesList and hitsCountList
* Nothing loaded to hitPairsList, since BED does not store pairs
*/
public void sourceAllHits() {
this.initialize();
try {
totalHits=0;
BufferedReader reader = new BufferedReader(new FileReader(file));
String line;
float currReadHitCount=0;
Read currRead=null;
while ((line = reader.readLine()) != null) {
line = line.trim();
if(line.charAt(0)!='#'){
String[] words = line.split("\\s+");
String chr="."; char strand = '.';
int start=0, end=0;
if(currRead!=null){
currRead.setNumHits(currReadHitCount);
//Add the hits to the data structure
addHits(currRead);
currRead=null;
}
currReadHitCount=1;
try{
chr = words[0];
String[] tmp = chr.split("\\.");
chr=tmp[0].replaceFirst("^chromosome", "").replaceFirst("^chrom", "").replaceFirst("^chr", "");
chr=chr.replaceFirst("^>", "");
// http://genome.ucsc.edu/FAQ/FAQformat.html#format1
// BED format is half open - The chromEnd base is not included
// For example, the first 100 bases of a chromosome are defined as chromStart=0, chromEnd=100, and span the bases numbered 0-99.
// BED format is also 0-based, and we want 1-based
start = new Integer(words[1]).intValue()+1;
end = new Integer(words[2]).intValue();
strand = words[5].charAt(0);
ReadHit currHit = new ReadHit(chr, start, end, strand);
currRead = new Read();
currRead.addHit(currHit);
} catch (NumberFormatException e){
// skip reading this line for header or comment lines
}
}
}
if(currRead!=null){
currRead.setNumHits(currReadHitCount);
//Add the hits to the data structure
addHits(currRead);
}
reader.close();
}
catch (IOException e) {
e.printStackTrace();
}
}//end of countReads method
}//end of BEDFileHitLoader class