package aliview.importer;
import java.io.BufferedReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import aliview.sequences.FastFastaSequence;
import aliview.sequences.Sequence;
public class FastFastaImporterSlow {
private static final Logger logger = Logger.getLogger(FastFastaImporterSlow.class);
private Reader reader;
private int longestSequenceLength;
public FastFastaImporterSlow(Reader reader) {
this.reader = reader;
}
public List<Sequence> importSequences() throws AlignmentImportException {
long startTime = System.currentTimeMillis();
ArrayList<Sequence> sequences = new ArrayList<Sequence>();
try {
StringBuilder sequence = new StringBuilder();
BufferedReader r = new BufferedReader(this.reader);
String line;
String name = null;
int nLine = 0;
while ((line = r.readLine()) != null) {
line = line.trim();
if(nLine == 0){
// if not fasta file then break
if(line.length() > 0 && line.charAt(0) != '>'){
// no fasta
throw new AlignmentImportException("Fasta file should start with > character");
}
}
if(line.length() > 0){
if(line.charAt(0) == '>'){
// if there is one sequence in buffer already create that one before starting a new one
if(name != null && name.length() > 0){
//char[] bases = new char[sequence.length()];
//sequence.getChars(0, sequence.length() -1, bases, 0);
//char[] bases = sequence.toString().toCharArray();
// remove blank in string todo this could maybe be done quicker
// in some fasta files there are blanks (ncbi format)
String seqAsString = sequence.toString();
seqAsString = seqAsString.replaceAll(" ","");
sequences.add(new FastFastaSequence(name, seqAsString));
this.longestSequenceLength = Math.max(this.longestSequenceLength, seqAsString.length());
sequence = new StringBuilder();
name = null;
}
name = line;
}
else{
sequence.append(line);
}
}
nLine ++;
}
// add last sequence
if(name != null && name.length() > 0){
String seqAsString = sequence.toString();
seqAsString = seqAsString.replaceAll(" ","");
sequences.add(new FastFastaSequence(name, seqAsString));
this.longestSequenceLength = Math.max(this.longestSequenceLength, seqAsString.length());
name = null;
}
} catch (Exception e) {
logger.error(e);
// TODO Auto-generated catch block
throw new AlignmentImportException("could not import as fasta file because: " + e.getMessage());
}
long endTime = System.currentTimeMillis();
System.out.println("reading sequences took " + (endTime - startTime) + " milliseconds");
return sequences;
}
public int getLongestSequenceLength() {
return longestSequenceLength;
}
}