package aliview.sequencelist; import it.unimi.dsi.io.ByteBufferInpStream; import java.awt.Component; import java.awt.event.WindowEvent; import java.awt.event.WindowListener; import java.awt.event.WindowStateListener; import java.io.BufferedReader; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileReader; import java.io.IOException; import java.nio.channels.FileChannel; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Iterator; import java.util.List; import java.util.ListIterator; import java.util.concurrent.locks.Lock; import java.util.concurrent.locks.ReentrantLock; import javax.swing.JDialog; import javax.swing.JOptionPane; import javax.swing.JPanel; import javax.swing.SwingUtilities; import javax.swing.event.ListDataEvent; import javax.swing.event.ListDataListener; import org.apache.commons.lang.StringUtils; import org.apache.log4j.Logger; import utils.DialogUtils; import aliview.AliView; import aliview.AliViewWindow; import aliview.externalcommands.ExternalCommandExecutor; import aliview.importer.AlignmentImportException; import aliview.importer.ClustalFileIndexer; import aliview.importer.FastaFileIndexer; import aliview.importer.FileFormat; import aliview.importer.FileImportUtils; import aliview.importer.FileIndexer; import aliview.importer.NotUsed_IndexFileReader; import aliview.importer.MSFFileIndexer; import aliview.importer.NexusFileIndexer; import aliview.importer.PhylipFileIndexer; import aliview.messenges.Messenger; import aliview.sequences.FastaFileSequence; import aliview.sequences.FileSequence; import aliview.sequences.Sequence; import aliview.settings.Settings; import aliview.subprocesses.SubProcessWindow; import aliview.subprocesses.SubThreadProgressWindow; public class MemoryMappedSequencesFile{ private static final Logger logger = Logger.getLogger(MemoryMappedSequencesFile.class); private static final String LF = System.getProperty("line.separator"); private FileFormat fileFormat; private File alignmentFile; private ByteBufferInpStream mappedBuff; private final ReentrantLock mappedBuffLock = new ReentrantLock(); // private FileSequence lastCachedSeq; private long fileSize = -1; // ArrayList<ListDataListener> listeners = new ArrayList<ListDataListener>(); // private ArrayList<FilePage> pages; public MemoryMappedSequencesFile(File aliFile, FileFormat foundFormat) throws IOException { this.alignmentFile = aliFile; this.fileFormat = foundFormat; logger.info("new FileMMSequnceList"); } public ReentrantLock getMappedBuffLock() { return mappedBuffLock; } void indexFileAndAddSequencesToAlignmentModel(FileSequenceAlignmentListModel destinationModel) throws IOException{ // check if index file exists File indexFile = new File(alignmentFile.getAbsolutePath() + ".fai"); // read from index file if exists if(fileFormat == FileFormat.FASTA && indexFile.exists()){ List<Sequence> seqs = createSequencesFromExistingIndexFile(indexFile); // create memory mapped buffer if(mappedBuff == null){ createMemoryMappedBuffer(); } // add sequences to cache addSequencesToDestination(seqs, destinationModel); } // Otherwise index file else{ indexFileAndAddSequencesToListInSubthread(destinationModel, fileFormat); } } public List<Sequence> createSequencesFromExistingIndexFile(File indexFile) { long startTime = System.currentTimeMillis(); ArrayList<Sequence> sequences = new ArrayList<Sequence>(); try { BufferedReader r = new BufferedReader(new FileReader(indexFile)); String line; int nLine = 0; int seqIndex = 0; // String[] splitted = new String[5]; while ((line = r.readLine()) != null) { line = line.trim(); if(line.length() > 0){ String[] splitted = StringUtils.split(line, '\t'); // String[] splitted = line.split("\t");//StringUtils.split(line, '\t'); // int startPos = 0; // int endPos = line.indexOf('\t',startPos); // // splitted[0] = line.substring(startPos, endPos); // startPos = endPos + 1; // endPos = line.indexOf('\t',startPos); // splitted[1] = line.substring(startPos, endPos); // startPos = endPos + 1; // endPos = line.indexOf('\t',startPos); // splitted[2] = line.substring(startPos, endPos); // startPos = endPos + 1; // endPos = line.indexOf('\t',startPos); // splitted[3] = line.substring(startPos, endPos); // startPos = endPos + 1; // endPos = line.indexOf('\t',startPos); // splitted[4] = line.substring(startPos, endPos); String seqName = splitted[0]; int seqWithoutWhitespaceLength = Integer.parseInt(splitted[1]); long seqAfterNameStartPointer = Long.parseLong(splitted[2]); int lineCharLength = Integer.parseInt(splitted[3]); int lineAbsoluteLength = Integer.parseInt(splitted[4]); int nSeqFullLines = (int)Math.floor(seqWithoutWhitespaceLength/lineCharLength); int lineDiff = lineAbsoluteLength - lineCharLength; double partialLine = ((double)seqWithoutWhitespaceLength/(double)lineCharLength) - (double)nSeqFullLines; int extraChars = (int)Math.floor(partialLine * lineDiff); long endPointer = seqAfterNameStartPointer + seqWithoutWhitespaceLength + nSeqFullLines * lineDiff + extraChars; FileSequence seq = new FileSequence(this, seqIndex, seqName, seqWithoutWhitespaceLength, seqAfterNameStartPointer, endPointer, lineCharLength, lineAbsoluteLength); sequences.add(seq); seqIndex ++; } nLine ++; } } catch (Exception e) { logger.error(e); } long endTime = System.currentTimeMillis(); System.out.println("reading index took " + (endTime - startTime) + " milliseconds"); return sequences; } private void indexFileAndAddSequencesToListInSubthread(final FileSequenceAlignmentListModel destinationModel, final FileFormat fileFormat){ final SubThreadProgressWindow progressWin = new SubThreadProgressWindow(); progressWin.setAlwaysOnTop(true); progressWin.setTitle("Background indexing"); progressWin.setInitialMessage("Indexing file: " + 0 + "/" + "number of sequences"); progressWin.show(); progressWin.centerLocationToThisComponentOrScreen(AliView.getActiveWindow()); //progressWin.setTopRightRelativeThisComponent(AliView.getActiveWindow()); progressWin.setBottomRightRelativeThisComponent(AliView.getActiveWindow()); try{ final Thread thread = new Thread(new Runnable(){ public void run(){ try { logger.info("Indexing Thread started"); int nMaxSeqsToRetrieveBeforeDestinationUpdateFirst = 500; int nMaxSeqsToRetrieveBeforeDestinationUpdateAfterFirst = 5000; int nMaxSeqsToRetrieveBeforeDestinationUpdate = nMaxSeqsToRetrieveBeforeDestinationUpdateFirst; // These formats are possibly sequential and it is good to retrieve all seqs at once if(fileFormat == FileFormat.PHYLIP || fileFormat == FileFormat.NEXUS || fileFormat == FileFormat.CLUSTAL){ nMaxSeqsToRetrieveBeforeDestinationUpdate = Integer.MAX_VALUE; } boolean hasMoreSequencesToIndex = true; FileSequence lastCachedSeq = null; int indexOffset = 0; while(hasMoreSequencesToIndex){ // The standard JAVA-MappedFileBuffer, but it is limited to 2GB files // mappedBuff = new FileInputStream(aliFile).getChannel().map(FileChannel.MapMode.READ_ONLY, 0, aliFile.length()); // This is extended version - any size files if(mappedBuff == null){ progressWin.setTitle("Mapping file"); progressWin.setMessage("Mapping file - usually takes about 0-15 sec." ); progressWin.setVisible(true); createMemoryMappedBuffer(); progressWin.setTitle("Background indexing"); progressWin.setMessage("Indexing file: " + 0 + "/" + "number of sequences"); } long startPointer = 0; if(lastCachedSeq != null){ startPointer = lastCachedSeq.getEndPointer(); } List<Sequence> moreSeqs = findSequencesInFile(startPointer,indexOffset,nMaxSeqsToRetrieveBeforeDestinationUpdate, progressWin); logger.info("Thread here moreSeqs.size()" + moreSeqs.size()); if(moreSeqs.size() > 0){ addSequencesToDestination(moreSeqs, destinationModel); lastCachedSeq = (FileSequence) moreSeqs.get(moreSeqs.size() - 1); indexOffset += moreSeqs.size(); }else{ hasMoreSequencesToIndex = false; } if(Thread.interrupted()){ break; } nMaxSeqsToRetrieveBeforeDestinationUpdate = nMaxSeqsToRetrieveBeforeDestinationUpdateAfterFirst; // // sleeep a while so file can be read by other thread // try { // logger.info("Thread sleep"); // Thread.sleep(100); // } catch (InterruptedException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } } } catch (FileNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); Messenger.showOKOnlyMessage(Messenger.FILE_OPEN_NOT_EXISTS, LF + e.getLocalizedMessage()); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); Messenger.showOKOnlyMessage(Messenger.FILE_ERROR, LF + e.getLocalizedMessage()); } // loading is done the new thread should activate GUI again before it is finished SwingUtilities.invokeLater(new Runnable() { public void run(){ boolean wasThreadInterruptedByUser = progressWin.wasSubThreadInterruptedByUser(); progressWin.dispose(); //fireContentsChanged(this); // unlock window // AliViewWindow.getAliViewWindowGlassPane().setVisible(false); } }); } }); // Lock GUI while second thread is working progressWin.setActiveThread(thread); thread.start(); // AliViewWindow.getAliViewWindowGlassPane().setVisible(true); } catch (Exception e) { // unlock window // AliViewWindow.getAliViewWindowGlassPane().setVisible(false); progressWin.dispose(); // fireContentsChanged(this); e.printStackTrace(); } } // TODO close buffer maybe? When alignment is changed? protected void createMemoryMappedBuffer() throws IOException{ try { mappedBuff = ByteBufferInpStream.map(new FileInputStream(alignmentFile).getChannel(),FileChannel.MapMode.READ_ONLY ); } catch (FileNotFoundException e) { e.printStackTrace(); throw e; } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); Messenger.showOKOnlyMessage(Messenger.OPEN_LARGE_FILE_ERROR, LF + e.getLocalizedMessage()); e.getLocalizedMessage(); throw e; } } private List<Sequence> findSequencesInFile(long filePointerStart, int seqOffset, final int nSeqsToRetrieve, final SubThreadProgressWindow progressWin){ long startTime = System.currentTimeMillis(); int nSeqCount = 0; ArrayList<Sequence> allSeqs = new ArrayList<Sequence>(); if(this.fileFormat == FileFormat.PHYLIP){ try { PhylipFileIndexer fileIndexer = new PhylipFileIndexer(); allSeqs = fileIndexer.findSequencesInFile(this, filePointerStart, seqOffset, nSeqsToRetrieve, progressWin); } catch (AlignmentImportException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else if(this.fileFormat == FileFormat.NEXUS){ try { NexusFileIndexer fileIndexer = new NexusFileIndexer(); allSeqs = fileIndexer.findSequencesInFile(this, filePointerStart, seqOffset, nSeqsToRetrieve, progressWin); } catch (AlignmentImportException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else if(this.fileFormat == FileFormat.CLUSTAL){ try { ClustalFileIndexer fileIndexer = new ClustalFileIndexer(); allSeqs = fileIndexer.findSequencesInFile(this, filePointerStart, seqOffset, nSeqsToRetrieve, progressWin); } catch (AlignmentImportException e) { // TODO Auto-generated catch block e.printStackTrace(); } } else if(this.fileFormat == FileFormat.MSF){ try { MSFFileIndexer fileIndexer = new MSFFileIndexer(); allSeqs = fileIndexer.findSequencesInFile(this, filePointerStart, seqOffset, nSeqsToRetrieve, progressWin); } catch (AlignmentImportException e) { // TODO Auto-generated catch block e.printStackTrace(); } }else{ FastaFileIndexer fileIndexer = new FastaFileIndexer(); mappedBuffLock.lock(); allSeqs = fileIndexer.findSequencesInFile(this, filePointerStart, seqOffset, nSeqsToRetrieve, progressWin); mappedBuffLock.unlock(); } long endTime = System.currentTimeMillis(); System.out.println("reading sequences took " + (endTime - startTime) + " milliseconds"); return allSeqs; } private void addSequencesToDestination(final List<Sequence> moreSeqs, final FileSequenceAlignmentListModel destinationModel){ logger.info("addSequencesToDestination"); SwingUtilities.invokeLater(new Runnable() { public void run() { destinationModel.addMoreFileSequences(moreSeqs, false); } }); } /* public List<FilePage> getFilePages() { if(pages == null && lastCachedSeq != null){ pages = new ArrayList<FilePage>(); int nMaxPageSizeInSequences = nSequencesPerPage; int lastCachedIndex = lastCachedSeq.getIndex(); long lastCachedEndPointer = lastCachedSeq.getEndPointer(); long oneSeqFileSizeSize = (lastCachedEndPointer +1) / (lastCachedIndex + 1); long fileSize = getFileSize(); long estimateTotalSeqInFile = getFileSize() / oneSeqFileSizeSize; long pageFileSize = (lastCachedIndex + 1) * oneSeqFileSizeSize; long maxPageFileSize = nSequencesPerPage * oneSeqFileSizeSize; pageFileSize = Math.max(pageFileSize, maxPageFileSize); int nPages = (int)Math.round(fileSize/pageFileSize + 0.5); // add 0.5 to always round up logger.info("nPages" + nPages); long startPointer = 0; int startIndex = 0; long endPointer = startPointer + pageFileSize; int endIndex = startIndex + nSequencesPerPage; for(int n = 0; n < nPages; n++){ pages.add(new FilePage(n, aliFile, new ArrayList<Sequence>(), startIndex, endIndex, startPointer, endPointer, nMaxPageSizeInSequences)); startIndex = endIndex; endIndex = Math.min((int)estimateTotalSeqInFile, startIndex + nSequencesPerPage); startPointer = endPointer + 1; endPointer = Math.min(fileSize, startPointer + pageFileSize -1); } } return pages; } */ public byte readByteInFile(long pos) { return (byte) readInFile(pos); } // boolean firstTime = true; public int readInFile(long pos) { if(pos < 0){ return 0; } mappedBuffLock.lock(); try{ mappedBuff.position(pos); int val = mappedBuff.read(); return val; }finally{ mappedBuffLock.unlock(); } // if(firstTime){ // try { // Thread.sleep(10000); // mappedBuffLock.lock(); // } catch (InterruptedException e) { // // TODO Auto-generated catch block // e.printStackTrace(); // } // firstTime = false; // } // ca 90-95ms // ca 105-110ms // ca 120-130ms // if(! mappedBuffLock.isHeldByCurrentThread()){ // mappedBuffLock.lock(); // // } // mappedBuff.position(pos); // int val = mappedBuff.read(); //// if(mappedBuffLock.hasQueuedThreads()){ // mappedBuffLock.unlock(); //// } // // mappedBuffLock.unlock(); // return val; } public ByteBufferInpStream getMappedBuff() { return mappedBuff; } public int readBytesInFile(long pos, int i, byte[] bytesToDraw) { // synchronized (mappedBuff) { // mappedBuff.position(pos); // return mappedBuff.read(bytesToDraw,0,i); // } mappedBuffLock.lock(); try{ mappedBuff.position(pos); int val = mappedBuff.read(bytesToDraw,0,i); return val; }finally{ mappedBuffLock.unlock(); } } public long getFileSize(){ // synchronized (mappedBuff) { // if(fileSize == -1){ // fileSize = mappedBuff.length(); // } // return fileSize; // } mappedBuffLock.lock(); try{ if(fileSize == -1){ fileSize = mappedBuff.length(); } return fileSize; }finally{ mappedBuffLock.unlock(); } } // public int size() { // // // return 400000; // return seqList.size(); // // // return seqList.size() + extraUnloadedSeqs; // // // /* // if(totalSeqCount == -1){ // return seqList.size() + extraUnloadedSeqs ; // } // else{ // return totalSeqCount; // } // */ // // //return 10000; // // /* // int index = lastCachedSeq.getSeqIndex(); // long pointer = lastCachedSeq.getSeqStartPointer(); // // long seqFileSize = pointer / (index + 1); // // long estimate = getFileSize() / seqFileSize; // // return (int) estimate; // */ // // } } /* private synchronized List<FileSequence> findSequencesInFile(long filePointerStartPos, int seqOffset, final int nSeqsToRetrieve, final SubThreadProgressWindow progressWin){ long startTime = System.currentTimeMillis(); int nSeqCount = 0; ArrayList<FileSequence> allSeqs = new ArrayList<FileSequence>(); long filePointerNextStartPos = filePointerStartPos; for(int n = 0; n < nSeqsToRetrieve; n++){ FileSequence seq = findSequenceInFile(filePointerNextStartPos, seqOffset, progressWin); allSeqs.add(seq); seqOffset ++; filePointerNextStartPos = seq.getEndPointer(); nSeqCount ++; if(nSeqCount % 1000 == 0){ final int current = nSeqCount; progressWin.setMessage("Indexing file " + current + "/" + nSeqsToRetrieve); } if(progressWin.wasSubThreadInterruptedByUser()){ break; } } long endTime = System.currentTimeMillis(); System.out.println("reading sequences took " + (endTime - startTime) + " milliseconds"); return allSeqs; } */ /* private synchronized FileSequence findSequenceInFile(long filePointerStart, int seqOffset, final SubThreadProgressWindow progressWin){ // long startTime = System.currentTimeMillis(); int nSeqCount = 0; StringBuilder name = new StringBuilder(); FileSequence sequence = null; boolean bytesUntilNextLFAreName = false; byte nextByte; readerHelper.setPosition(filePointerStart); long nameStartPos = 0; long endNamePos; long seqStartPos; long seqEndPos; nameStartPos = readerHelper.findNext((byte)'>'); sequence = new FastaFileSequence(this, seqOffset, nameStartPos); // endNamePos = readerHelper.appendBytesUntilNextLF(name); sequence.addName("Hej"); // seqStartPos = readerHelper.findNextNonControlChar(); // sequence.setSequenceAfterNameStartPointer(seqStartPos); seqEndPos = readerHelper.findNextOrEOF((byte)'>'); sequence.setEndPointer(seqEndPos); // long endTime = System.currentTimeMillis(); // System.out.println("reading sequences took " + (endTime - startTime) + " milliseconds"); return sequence; } */ /* private synchronized List<FileSequence> findSequenceInFile(long filePointerStart, int seqOffset, final int nSeqsToRetrieve, final SubThreadProgressWindow progressWin){ long startTime = System.currentTimeMillis(); ArrayList<FileSequence> allSeqs = new ArrayList<FileSequence>(); int nSeqCount = 0; StringBuilder name = new StringBuilder(); FileSequence sequence = null; boolean bytesUntilNextLFAreName = false; //long filePos = filePointerStart; byte nextByte; mappedBuff.position(filePointerStart); logger.info("mapBuffpositionbefore" + mappedBuff.position()); if(mappedBuff.position() != filePointerStart){ logger.info("filePointerStart" + filePointerStart); } while ((nextByte = (byte)mappedBuff.read()) != -1) { boolean findNextLF = false; // Find name start if(nextByte == '>'){ // save last seq and start a new one if(sequence != null){ sequence.setEndPointer(mappedBuff.position() -2); // remove > and LF // sequence.setNextSeqStartPos(mappedBuff.position() - 1); // include > allSeqs.add(sequence); // calculate seek offset if(seekOffset == 0){ seekOffset = sequence.getLength(); } // if(sequence.getLength() > 19000){ // logger.info("error"); // logger.info("filePointerStart" + filePointerStart); // logger.info("mappedBuff.position()" + mappedBuff.position()); // logger.info("index" + sequence.getIndex()); // logger.info("seqstartpoint" + sequence.getStartPointer()); // logger.info("seqendpoint" + sequence.getEndPointer()); // // } // } // name = new StringBuilder('>'); sequence = new FileSequence(this, seqOffset + nSeqCount, mappedBuff.position() -1); // include > bytesUntilNextLFAreName = true; nSeqCount ++; if(nSeqCount % 1000 == 0){ final int current = nSeqCount; progressWin.setMessage("Indexing file " + current + "/" + nSeqsToRetrieve); } } if((nextByte == '\n' || nextByte == '\r')){ if(bytesUntilNextLFAreName){ sequence.addName(name.toString()); sequence.setSequenceAfterNameStartPointer(mappedBuff.position() + 1); // exlude LF bytesUntilNextLFAreName = false; // jump over sequence to next name if possible if(seekOffset > 0){ seekStartPos = mappedBuff.position(); seekToPos = seekStartPos + seekOffset + 1; mappedBuff.position(seekToPos); // if next pos not is newline then sequences are not aligned and we // go back and loop through all positions byte checkByte =(byte) mappedBuff.read(); if(checkByte != '\n' && checkByte != '\r'){ // rewind mappedBuff.position(seekStartPos); seekOffset = 0; } } } } if(bytesUntilNextLFAreName){ name.append((char) nextByte); } if(nSeqCount > nSeqsToRetrieve){ System.out.println("Found " + nSeqCount + " seq, break"); break; } if(progressWin.wasSubThreadInterruptedByUser()){ break; } } // EOF if(nextByte == -1){ if(sequence != null){ System.out.println("EOF"); sequence.setEndPointer(mappedBuff.position() - 1); // remove EOF } } logger.info("mapBuffpositionafter" + mappedBuff.position()); // Skip adding the last seq for now //long endPos = raf.length(); //sequence.addNextSeqStartPos(startPos); long endTime = System.currentTimeMillis(); System.out.println("reading sequences took " + (endTime - startTime) + " milliseconds"); return allSeqs; } */