package aliview.test;
import java.io.File;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import org.bitbucket.kienerj.io.OptimizedRandomAccessFile;
import aliview.importer.AlignmentImportException;
import aliview.sequences.FastFastaSequence;
import aliview.sequences.Sequence;
public class FastFastaFileTest {
private static final Logger logger = Logger.getLogger(FastFastaFileTest.class);
private int longestSequenceLength;
public static void main(String[] args) {
FastFastaFileTest ffFileTest = new FastFastaFileTest();
try {
ffFileTest.importSequences();
} catch (AlignmentImportException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
public FastFastaFileTest() {
}
public static void importSequences() throws AlignmentImportException {
long startTime = System.currentTimeMillis();
ArrayList<Sequence> sequences = new ArrayList<Sequence>();
try {
StringBuilder sequence = new StringBuilder();
//File seqFile = new File("/home/anders/projekt/ormbunkar/analys/karin_alignment/ssu_pr2-99.fasta.diffenc2");
File seqFile = new File("/vol2/big_data/SSURef_108_filtered_bacteria_pos_5389-24317.fasta");
//RandomAccessFile raf = new RandomAccessFile(seqFile, "r");
OptimizedRandomAccessFile raf = new OptimizedRandomAccessFile(seqFile, "r");
//BufferedReader r = new BufferedReader(this.reader);
String line = "";
String name = null;
int nLine = 0;
long nSeqCount = 0;
byte[] buffer = new byte[10000];
while ((raf.read(buffer)) > 0) {
//while ((line = raf.readLine()) != null) {
line = line.trim();
long filePoint = raf.getFilePointer();
boolean findNextLF = false;
for(int n = 0; n<buffer.length; n++){
if(buffer[n] == '>'){
long startPos = filePoint - n;
nSeqCount ++;
findNextLF = true;
}
/*
if((buffer[n] == '\n' || buffer[n] == '\r') && findNextLF){
long endPos = filePoint - n;
nSeqCount --;
findNextLF = false;
}
*/
n++;
}
/*
if(nLine == 0){
// if not fasta file then break
if(line.length() > 0 && line.charAt(0) != '>'){
// no fasta
throw new AlignmentImportException("Fasta file should start with > character");
}
}
if(line.length() > 0){
if(line.charAt(0) == '>'){
}
else{
}
}
*/
nLine ++;
// System.out.println("nLine" + nLine + "pointer" + raf.getFilePointer());
// if(nLine > 100){
// System.err.println("SystemExit");
// System.exit(1);
// }
// System.out.println(line);
// if(nLine > 5){
// System.err.println("SystemExit");
// System.exit(1);
// }
if(nSeqCount % 100 == 0){
System.err.println("found seq" + nSeqCount);
}
if(nLine % 10000 == 0){
System.out.println("nLine" + nLine + "pointer" + raf.getFilePointer() + "nSecCount" + nSeqCount);
}
}
// add last sequence
if(name != null && name.length() > 0){
String seqAsString = sequence.toString();
seqAsString = seqAsString.replaceAll(" ","");
sequences.add(new FastFastaSequence(name, seqAsString));
name = null;
}
} catch (Exception e) {
logger.error(e);
// TODO Auto-generated catch block
throw new AlignmentImportException("could not import as fasta file because: " + e.getMessage());
}
long endTime = System.currentTimeMillis();
System.out.println("reading sequences took " + (endTime - startTime) + " milliseconds");
}
public int getLongestSequenceLength() {
return longestSequenceLength;
}
}