package aliview.importer;
import java.util.ArrayList;
import org.apache.log4j.Logger;
import it.unimi.dsi.io.ByteBufferInpStream;
import aliview.sequencelist.MemoryMappedSequencesFile;
import aliview.sequences.FastaFileSequence;
import aliview.sequences.FileSequence;
import aliview.sequences.Sequence;
import aliview.subprocesses.SubThreadProgressWindow;
public class FastaFileIndexer implements FileIndexer{
private static final Logger logger = Logger.getLogger(FastaFileIndexer.class);
long estimateTotalSeqInFile = 0;
long fileSize = -1;
public ArrayList<Sequence> findSequencesInFile(MemoryMappedSequencesFile sequencesFile, long filePointerStart, int seqOffset, int nSeqsToRetrieve,
SubThreadProgressWindow progressWin) {
ByteBufferInpStream mappedBuff = sequencesFile.getMappedBuff();
this.fileSize = mappedBuff.length();
int nSeqCount = 0;
ArrayList<Sequence> allSeqs = new ArrayList<Sequence>();
for(int n = 0; n < nSeqsToRetrieve; n++){
FileSequence seq = findSequenceInFile(sequencesFile, filePointerStart, seqOffset);
if(seq == null){
break;
}
long seqLength = seq.getLength();
allSeqs.add(seq);
seqOffset ++;
filePointerStart = seq.getEndPointer() + 1;
nSeqCount ++;
int MESSAGE_FREQUENCE = 1;
if(estimateTotalSeqInFile > 500){
MESSAGE_FREQUENCE = 100;
}
if(estimateTotalSeqInFile > 5000){
MESSAGE_FREQUENCE = 1000;
}
if(nSeqCount % MESSAGE_FREQUENCE == 0 && nSeqCount > 1){
int lastSeqIndex = seqOffset;
long lastSeqEndPointer = seq.getEndPointer();
long oneSeqFileSizeSize = (lastSeqEndPointer +1) / (lastSeqIndex + 1);
estimateTotalSeqInFile = fileSize / oneSeqFileSizeSize;
final int current = lastSeqIndex;
progressWin.setMessage("Indexing file " + current + " out of ~" + estimateTotalSeqInFile);
}
if(progressWin.wasSubThreadInterruptedByUser()){
Thread.currentThread().interrupt();
break;
}
// TODO check if window is closed - then kill thread
// if other thread is waiting for mapped buffer (e.g. main Thread, pause indexing for 200ms)
// this is done by releasing lock and sleeping a short while
if(sequencesFile.getMappedBuffLock().hasQueuedThreads()){
sequencesFile.getMappedBuffLock().unlock();
try {
Thread.sleep(200);
} catch (InterruptedException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
sequencesFile.getMappedBuffLock().lock();
}
}
return allSeqs;
}
public FileSequence findSequenceInFile(MemoryMappedSequencesFile sequencesFile, long filePointerStart, int seqOffset){
StringBuilder name = new StringBuilder();
FileSequence sequence = null;
boolean bytesUntilNextLFAreName = false;
byte nextByte;
ByteBufferInpStream mappedBuff = sequencesFile.getMappedBuff();
mappedBuff.position(filePointerStart);
int lineLength = 0;
while ((nextByte = (byte)mappedBuff.read()) != -1) {
boolean findNextLF = false;
// Find name start
if(nextByte == '>' && bytesUntilNextLFAreName == false){
// save and return last seq
if(sequence != null){
sequence.setEndPointer(mappedBuff.position() -2); // remove > and LF
return sequence;
}
// start new one
name = new StringBuilder(250);
sequence = new FastaFileSequence(sequencesFile, mappedBuff.position()); // skip >
bytesUntilNextLFAreName = true;
}
// line feed - end of name
if((nextByte == '\n')){
if(bytesUntilNextLFAreName){
// take care of name
sequence.setName(name.toString());
sequence.setSequenceAfterNameStartPointer(mappedBuff.position() + 1); // exlude LF
bytesUntilNextLFAreName = false;
// jump over sequence to next name if possible
// if(seekOffset > 0){
// seekStartPos = mappedBuff.position();
// seekToPos = seekStartPos + seekOffset + 1;
// mappedBuff.position(seekToPos);
// // if next pos not is newline then sequences are not aligned and we
// // go back and loop through all positions
// byte checkByte =(byte) mappedBuff.read();
// if(checkByte != '\n' && checkByte != '\r'){
// // rewind
// mappedBuff.position(seekStartPos);
// seekOffset = 0;
// }
// }
}
}
lineLength ++;
// build name
if(bytesUntilNextLFAreName){
name.append((char) nextByte);
}
}
// EOF
if(nextByte == -1){
if(sequence != null){
logger.info("EOF=" + mappedBuff.position());
//logger.info("sequence.getStartPointer()" + sequence.getStartPointer());
sequence.setEndPointer(mappedBuff.position() - 1); // remove EOF
}
}
return sequence;
}
}