/* * The MIT License * * Copyright (c) 2009 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package htsjdk.samtools.util; import java.io.DataInputStream; import java.io.DataOutputStream; import java.io.EOFException; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.List; import java.util.NoSuchElementException; import java.util.PriorityQueue; /** * Accumulate a list of longs that can then be sorted in natural order and iterated over. * If there are more values accumulated than a specified maximum, values are spilled to disk. * * Note that because this class returns primitive longs rather than Longs, it does not conform to * any of the Collection iteration interfaces. Use as follows: * * 1. ctor * 2. call add() as many times as desired. * 3. call doneAddingStartIteration(). * 4. call hasNext() and next() until exhausted or had enough. * 5. optionally call cleanup() to free space in temporary directory as soon as possible. * * If there are few enough values so that they all can be kept in RAM, then the array is sorted * and iterated over trivially. * * If there are more values that can fit in RAM, then values are sorted and written to a temp file when the max * number to be stored in RAM is reached. Multiple temp files are then merged during iteration via PriorityQueue. * * c.f. SortingCollection for more details. * * @author alecw@broadinstitute.org */ public class SortingLongCollection { public static final int SIZEOF = 8; public static final int MAX_ITEMS_IN_RAM = (int)Math.floor((Integer.MAX_VALUE/8)*.999); /** * Where files of sorted values go. */ private final File[] tmpDir; private final int maxValuesInRam; private int numValuesInRam = 0; private long[] ramValues; /** * Set to true when done adding and ready to iterate */ private boolean doneAdding = false; /** * Set to true when all temp files have been cleaned up */ private boolean cleanedUp = false; /** * List of files in tmpDir containing sorted values */ private final List<File> files = new ArrayList<File>(); // for in-memory iteration private int iterationIndex = 0; // For disk-based iteration private PriorityQueue<PeekFileValueIterator> priorityQueue; /** * Prepare to accumulate values to be sorted * @param maxValuesInRam how many values to accumulate before spilling to disk * @param tmpDir Where to write files of values that will not fit in RAM */ public SortingLongCollection(final int maxValuesInRam, final File... tmpDir) { if (maxValuesInRam <= 0) { throw new IllegalArgumentException("maxValuesInRam must be > 0"); } this.tmpDir = tmpDir; this.maxValuesInRam = Math.min(maxValuesInRam, MAX_ITEMS_IN_RAM); this.ramValues = new long[maxValuesInRam]; } /** * Add a value to the collection. * @param value */ public void add(final long value) { if (doneAdding) { throw new IllegalStateException("Cannot add after calling doneAddingStartIteration()"); } if (numValuesInRam == maxValuesInRam) { spillToDisk(); } ramValues[numValuesInRam++] = value; } /** * This method must be called after done adding, and before calling hasNext() or next(). */ public void doneAddingStartIteration() { if (cleanedUp || doneAdding) { throw new IllegalStateException("Cannot call doneAddingStartIteration() after cleanup() was called."); } doneAdding = true; if (this.files.isEmpty()) { Arrays.sort(this.ramValues, 0, this.numValuesInRam); return; } if (this.numValuesInRam > 0) { spillToDisk(); } this.priorityQueue = new PriorityQueue<PeekFileValueIterator>(files.size(), new PeekFileValueIteratorComparator()); for (final File f : files) { final FileValueIterator it = new FileValueIterator(f); if (it.hasNext()) { this.priorityQueue.offer(new PeekFileValueIterator(it)); } } // Facilitate GC this.ramValues = null; } /** * Sort the values in memory, write them to a file, and clear the buffer of values in memory. */ private void spillToDisk() { try { Arrays.sort(this.ramValues, 0, this.numValuesInRam); final File f = IOUtil.newTempFile("sortingcollection.", ".tmp", this.tmpDir, IOUtil.FIVE_GBS); DataOutputStream os = null; try { final long numBytes = this.numValuesInRam * SIZEOF; os = new DataOutputStream(IOUtil.maybeBufferOutputStream(new FileOutputStream(f))); f.deleteOnExit(); for (int i = 0; i < this.numValuesInRam; ++i) { os.writeLong(ramValues[i]); } os.flush(); } finally { if (os != null) { os.close(); } } this.numValuesInRam = 0; this.files.add(f); } catch (IOException e) { throw new RuntimeIOException(e); } } /** * Delete any temporary files. After this method is called, no other method calls should be made on this object. */ public void cleanup() { this.doneAdding = true; this.cleanedUp = true; this.ramValues = null; IOUtil.deleteFiles(this.files); } /** * Call only after doneAddingStartIteration() has been called. * * @return true if there is another value to be gotten. */ public boolean hasNext() { if (!doneAdding || cleanedUp) { throw new IllegalStateException(); } if (this.ramValues != null) { // in-memory iteration return this.iterationIndex < numValuesInRam; } else { return !priorityQueue.isEmpty(); } } /** * Call only if hasNext() == true. * @return next value from collection, in natural sort order. */ public long next() { if (!hasNext()) { throw new NoSuchElementException(); } if (this.ramValues != null) { // in-memory iteration return ramValues[iterationIndex++]; } else { final PeekFileValueIterator fileIterator = priorityQueue.poll(); final long ret = fileIterator.next(); if (fileIterator.hasNext()) { this.priorityQueue.offer(fileIterator); } else { fileIterator.close(); } return ret; } } /** * Read a file of longs */ private static class FileValueIterator { private final File file; private final DataInputStream is; private long currentRecord = 0; private boolean isCurrentRecord = true; FileValueIterator(final File file) { this.file = file; try { is = new DataInputStream(IOUtil.maybeBufferInputStream(new FileInputStream(file))); next(); } catch (FileNotFoundException e) { throw new RuntimeIOException(file.getAbsolutePath(), e); } } boolean hasNext() { return isCurrentRecord; } long next() { if (!hasNext()) { throw new NoSuchElementException(); } final long ret = currentRecord; try { currentRecord = is.readLong(); } catch (EOFException eof) { isCurrentRecord = false; currentRecord = 0; } catch(IOException e) { throw new RuntimeException(e); } return ret; } void close() { CloserUtil.close(is); IOUtil.deleteFiles(file); } } /** * Add peek() functionality to FileValueIterator */ private static class PeekFileValueIterator { private FileValueIterator underlyingIterator; private long peekValue; private boolean hasPeekedValue = false; PeekFileValueIterator(final FileValueIterator underlyingIterator) { this.underlyingIterator = underlyingIterator; } boolean hasNext() { return hasPeekedValue || underlyingIterator.hasNext(); } long next() { if (!hasNext()) { throw new NoSuchElementException(); } if (hasPeekedValue) { hasPeekedValue = false; return peekValue; } return underlyingIterator.next(); } long peek() { if (!hasNext()) { throw new NoSuchElementException(); } if (!hasPeekedValue) { peekValue = underlyingIterator.next(); hasPeekedValue = true; } return peekValue; } void close() { underlyingIterator.close(); hasPeekedValue = false; underlyingIterator = null; } } private static class PeekFileValueIteratorComparator implements Comparator<PeekFileValueIterator> { public int compare(final PeekFileValueIterator it1, final PeekFileValueIterator it2) { if (it1.peek() < it2.peek()) { return -1; } if (it1.peek() == it2.peek()) { return 0; } return 1; } } }