/*
* Copyright 2006-2012 Amazon Technologies, Inc. or its affiliates.
* Amazon, Amazon.com and Carbonado are trademarks or registered trademarks
* of Amazon Technologies, Inc. or its affiliates. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.amazon.carbonado.cursor;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.lang.reflect.UndeclaredThrowableException;
import java.util.AbstractCollection;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.PriorityQueue;
import com.amazon.carbonado.FetchException;
import com.amazon.carbonado.FetchInterruptedException;
import com.amazon.carbonado.Query;
import com.amazon.carbonado.Storable;
import com.amazon.carbonado.Storage;
import com.amazon.carbonado.SupportException;
import com.amazon.carbonado.spi.RAFInputStream;
import com.amazon.carbonado.spi.RAFOutputStream;
/**
* Sort buffer implemented via a merge sort algorithm. If there are too many
* storables to fit in the reserved memory buffer, they are sorted and
* serialized to temporary files.
*
* <p>The following system properties can be set to change the default
* performance characteristics of the merge sort. Each property name must be
* prefixed with "com.amazon.carbonado.cursor.MergeSortBuffer."
*
* <pre>
* Property Default Notes
* ------------------- ---------- ----------------------------------------------
* maxArrayCapacity 8192 Larger value greatly improves performance, but
* more memory is used for each running sort.
*
* maxOpenFileCount 100 Larger value may reduce the amount of file
* merges, but there is an increased risk of
* running out of file descriptors.
*
* outputBufferSize 10000 Larger value may improve performance of file
* writing, but not by much.
*
* tmpdir Merge sort files by default are placed in the
* Java temp directory. Override to place them
* somewhere else.
* </pre>
*
* @author Brian S O'Neill
* @see SortedCursor
*/
public class MergeSortBuffer<S extends Storable> extends AbstractCollection<S>
implements SortBuffer<S>
{
private static final int MIN_ARRAY_CAPACITY = 64;
// Bigger means better performance, but more memory is used.
private static final int MAX_ARRAY_CAPACITY;
private static final int DEFAULT_MAX_ARRAY_CAPACITY = 8192;
// Bigger means better performance, but more file handles may be used.
private static final int MAX_OPEN_FILE_COUNT;
private static final int DEFAULT_MAX_OPEN_FILE_COUNT = 100;
// Bigger may improve write performance, but not by much.
private static final int OUTPUT_BUFFER_SIZE;
private static final int DEFAULT_OUTPUT_BUFFER_SIZE = 10000;
private static final String TEMP_DIR;
static {
String prefix = MergeSortBuffer.class.getName() + '.';
MAX_ARRAY_CAPACITY = Integer.getInteger(prefix + "maxArrayCapacity",
DEFAULT_MAX_ARRAY_CAPACITY);
MAX_OPEN_FILE_COUNT = Integer.getInteger(prefix + "maxOpenFileCount",
DEFAULT_MAX_OPEN_FILE_COUNT);
OUTPUT_BUFFER_SIZE = Integer.getInteger(prefix + "outputBufferSize",
DEFAULT_OUTPUT_BUFFER_SIZE);
// Null means use system temp dir.
String tempDir = System.getProperty(prefix + "tmpdir", null);
if (tempDir != null) {
File f = new File(tempDir);
if (!f.exists() || !f.isDirectory() || !f.canRead() || !f.canWrite()) {
tempDir = null;
}
}
TEMP_DIR = tempDir;
}
private final String mTempDir;
private final int mMaxArrayCapacity;
private final Query.Controller mController;
private Preparer<S> mPreparer;
private S[] mElements;
private int mSize;
private int mTotalSize;
private WorkFilePool mWorkFilePool;
private List<RandomAccessFile> mFilesInUse;
private Comparator<S> mComparator;
private volatile boolean mStop;
/**
* @since 1.2
*/
public MergeSortBuffer() {
this(null, TEMP_DIR, MAX_ARRAY_CAPACITY);
}
/**
* @since 1.2
*
* @param controller optional controller which can abort query operation
*/
public MergeSortBuffer(Query.Controller controller) {
this(null, TEMP_DIR, MAX_ARRAY_CAPACITY, controller);
}
/**
* @param storage storage for elements; if null use first Storable to
* prepare reloaded Storables
*/
public MergeSortBuffer(Storage<S> storage) {
this(storage, TEMP_DIR, MAX_ARRAY_CAPACITY);
}
/**
* @param storage storage for elements; if null use first Storable to
* prepare reloaded Storables
* @param controller optional controller which can abort query operation
*/
public MergeSortBuffer(Storage<S> storage, Query.Controller controller) {
this(storage, TEMP_DIR, MAX_ARRAY_CAPACITY, controller);
}
/**
* @param storage storage for elements; if null use first Storable to
* prepare reloaded Storables
* @param tempDir directory to store temp files for merging, or null for default
*/
public MergeSortBuffer(Storage<S> storage, String tempDir) {
this(storage, tempDir, MAX_ARRAY_CAPACITY);
}
/**
* @param storage storage for elements; if null use first Storable to
* prepare reloaded Storables
* @param tempDir directory to store temp files for merging, or null for default
* @param maxArrayCapacity maximum amount of storables to keep in an array
* before serializing to a file
* @throws IllegalArgumentException if storage is null
*/
@SuppressWarnings("unchecked")
public MergeSortBuffer(Storage<S> storage, String tempDir, int maxArrayCapacity) {
this(storage, tempDir, maxArrayCapacity, null);
}
/**
* @param storage storage for elements; if null use first Storable to
* prepare reloaded Storables
* @param tempDir directory to store temp files for merging, or null for default
* @param maxArrayCapacity maximum amount of storables to keep in an array
* before serializing to a file
* @param controller optional controller which can abort query operation
* @throws IllegalArgumentException if storage is null
*/
@SuppressWarnings("unchecked")
public MergeSortBuffer(Storage<S> storage, String tempDir, int maxArrayCapacity,
Query.Controller controller)
{
mTempDir = tempDir;
mMaxArrayCapacity = maxArrayCapacity;
if (storage != null) {
mPreparer = new FromStorage(storage);
}
int cap = Math.min(MIN_ARRAY_CAPACITY, maxArrayCapacity);
mElements = (S[]) new Storable[cap];
if ((mController = controller) != null) {
controller.begin();
}
}
public void prepare(Comparator<S> comparator) {
if (comparator == null) {
throw new IllegalArgumentException();
}
clear();
mComparator = comparator;
}
@Override
public boolean add(S storable) {
if (mPreparer == null) {
mPreparer = new FromStorable(storable);
}
Comparator<S> comparator = comparator();
arrayPrep:
if (mSize >= mElements.length) {
if (mElements.length < mMaxArrayCapacity) {
// Increase array capacity.
int newCap = mElements.length * 2;
if (newCap > mMaxArrayCapacity) {
newCap = mMaxArrayCapacity;
}
S[] newElements = (S[]) new Storable[newCap];
System.arraycopy(mElements, 0, newElements, 0, mElements.length);
mElements = newElements;
break arrayPrep;
}
// Sort current in-memory results and serialize to a temp file.
// Make sure everything is set up to use temp files.
{
if (mWorkFilePool == null) {
mWorkFilePool = WorkFilePool.getInstance(mTempDir);
mFilesInUse = new ArrayList<RandomAccessFile>();
}
}
Arrays.sort(mElements, comparator);
RandomAccessFile raf;
try {
raf = mWorkFilePool.acquireWorkFile(this);
OutputStream out =
new BufferedOutputStream(new RAFOutputStream(raf), OUTPUT_BUFFER_SIZE);
if (mFilesInUse.size() < (MAX_OPEN_FILE_COUNT - 1)) {
mFilesInUse.add(raf);
byte count = 0;
for (S element : mElements) {
// Check every so often if should continue.
continueCheck(++count);
element.writeTo(out);
}
} else {
// Merge files together.
// Determine the average length per file in use.
long totalLength = 0;
int fileCount = mFilesInUse.size();
for (int i=0; i<fileCount; i++) {
totalLength += mFilesInUse.get(i).length();
}
// Compute average with ceiling rounding mode.
long averageLength = (totalLength + fileCount) / fileCount;
// For any file whose length is above average, don't merge
// it. The goal is to evenly distribute file growth.
List<RandomAccessFile> filesToExclude = new ArrayList<RandomAccessFile>();
List<RandomAccessFile> filesToMerge = new ArrayList<RandomAccessFile>();
long mergedLength = 0;
for (int i=0; i<fileCount; i++) {
RandomAccessFile fileInUse = mFilesInUse.get(i);
long fileLength = fileInUse.length();
if (fileLength > averageLength) {
filesToExclude.add(fileInUse);
} else {
filesToMerge.add(fileInUse);
mergedLength += fileLength;
}
}
mFilesInUse.add(raf);
// Pre-allocate space, in an attempt to improve performance
// as well as error out earlier, should the disk be full.
raf.setLength(mergedLength);
byte count = 0;
Iterator<S> it = iterator(filesToMerge);
while (it.hasNext()) {
// Check every so often if should continue.
continueCheck(++count);
S element = it.next();
element.writeTo(out);
}
mWorkFilePool.releaseWorkFiles(filesToMerge);
mFilesInUse = filesToExclude;
mFilesInUse.add(raf);
}
out.flush();
// Truncate any data from last time file was used.
raf.setLength(raf.getFilePointer());
// Reset to start of file in preparation for reading later.
raf.seek(0);
} catch (SupportException e) {
throw new UndeclaredThrowableException(e);
} catch (IOException e) {
throw new UndeclaredThrowableException(e);
}
mSize = 0;
}
mElements[mSize++] = storable;
mTotalSize++;
return true;
}
@Override
public int size() {
return mTotalSize;
}
@Override
public Iterator<S> iterator() {
return iterator(mFilesInUse);
}
private Iterator<S> iterator(List<RandomAccessFile> filesToMerge) {
Comparator<S> comparator = comparator();
if (mWorkFilePool == null) {
return new ObjectArrayIterator<S>(mElements, 0, mSize);
}
// Merge with the files. Use a priority queue to decide which is the
// next buffer to pull an element from.
PriorityQueue<Iter<S>> pq = new PriorityQueue<Iter<S>>(1 + mFilesInUse.size());
pq.add(new ArrayIter<S>(comparator, mElements, mSize));
for (RandomAccessFile raf : filesToMerge) {
try {
raf.seek(0);
} catch (IOException e) {
throw new UndeclaredThrowableException(e);
}
InputStream in = new BufferedInputStream(new RAFInputStream(raf));
pq.add(new InputIter<S>(comparator, mPreparer, in));
}
return new Merger<S>(pq);
}
@Override
public void clear() {
if (mPreparer instanceof FromStorable) {
mPreparer = null;
}
if (mTotalSize > 0) {
mSize = 0;
mTotalSize = 0;
if (mWorkFilePool != null && mFilesInUse != null) {
mWorkFilePool.releaseWorkFiles(mFilesInUse);
mFilesInUse.clear();
}
}
}
public void sort() {
// Sort current in-memory results. Anything residing in files has
// already been sorted.
Arrays.sort(mElements, 0, mSize, comparator());
}
public void close() {
try {
clear();
if (mWorkFilePool != null) {
mWorkFilePool.unregisterWorkFileUser(this);
}
} finally {
Query.Controller controller = mController;
if (controller != null) {
controller.close();
}
}
}
void stop() {
mStop = true;
}
private Comparator<S> comparator() {
Comparator<S> comparator = mComparator;
if (comparator == null) {
throw new IllegalStateException("Buffer was not prepared with a Comparator");
}
return comparator;
}
private void continueCheck(byte count) {
if (count == 0) {
try {
Query.Controller controller = mController;
if (controller != null) {
controller.continueCheck();
}
if (mStop) {
throw new FetchInterruptedException("Shutting down");
}
} catch (FetchException e) {
close();
throw new UndeclaredThrowableException(e);
}
}
}
private static interface Preparer<S extends Storable> {
S prepare();
}
private static class FromStorage<S extends Storable> implements Preparer<S> {
private final Storage<S> mStorage;
FromStorage(Storage<S> storage) {
if (storage == null) {
throw new IllegalArgumentException();
}
mStorage = storage;
}
public S prepare() {
return mStorage.prepare();
}
}
private static class FromStorable<S extends Storable> implements Preparer<S> {
private final S mStorable;
FromStorable(S storable) {
if (storable == null) {
throw new IllegalArgumentException();
}
mStorable = (S) storable.prepare();
}
public S prepare() {
return (S) mStorable.prepare();
}
}
/**
* Simple interator interface that supports peeking at next element.
*/
private abstract static class Iter<S extends Storable> implements Comparable<Iter<S>> {
private final Comparator<S> mComparator;
protected Iter(Comparator<S> comparator) {
mComparator = comparator;
}
/**
* Returns null if iterator is exhausted.
*/
abstract S peek();
/**
* Returns null if iterator is exhausted.
*/
abstract S next();
public int compareTo(Iter<S> iter) {
S thisPeek = peek();
S thatPeek = iter.peek();
if (thisPeek == null) {
if (thatPeek == null) {
return 0;
}
// Null is low in order to rise to top of priority queue. This
// Iter will then be tossed out of the priority queue.
return -1;
} else if (thatPeek == null) {
return 1;
}
return mComparator.compare(thisPeek, thatPeek);
}
}
/**
* Iterator that reads from an array.
*/
private static class ArrayIter<S extends Storable> extends Iter<S> {
private final S[] mArray;
private final int mSize;
private int mPos;
ArrayIter(Comparator<S> comparator, S[] array, int size) {
super(comparator);
mArray = array;
mSize = size;
}
@Override
S peek() {
int pos = mPos;
if (pos >= mSize) {
return null;
}
return mArray[pos];
}
@Override
S next() {
int pos = mPos;
if (pos >= mSize) {
return null;
}
S next = mArray[pos];
mPos = pos + 1;
return next;
}
}
/**
* Iterator that reads from an input stream of serialized Storables.
*/
private static class InputIter<S extends Storable> extends Iter<S> {
private final Preparer<S> mPreparer;
private InputStream mIn;
private S mNext;
InputIter(Comparator<S> comparator, Preparer<S> preparer, InputStream in) {
super(comparator);
mPreparer = preparer;
mIn = in;
}
@Override
S peek() {
if (mNext != null) {
return mNext;
}
if (mIn != null) {
try {
S next = mPreparer.prepare();
next.readFrom(mIn);
mNext = next;
} catch (EOFException e) {
mIn = null;
} catch (SupportException e) {
throw new UndeclaredThrowableException(e);
} catch (IOException e) {
throw new UndeclaredThrowableException(e);
}
}
return mNext;
}
@Override
S next() {
S next = peek();
mNext = null;
return next;
}
}
private static class Merger<S extends Storable> implements Iterator<S> {
private final PriorityQueue<Iter<S>> mPQ;
private S mNext;
Merger(PriorityQueue<Iter<S>> pq) {
mPQ = pq;
}
public boolean hasNext() {
if (mNext == null) {
while (true) {
Iter<S> iter = mPQ.poll();
if (iter == null) {
return false;
}
if ((mNext = iter.next()) != null) {
// Iter is not exhausted, so put it back in to be used
// again. Adding it back causes it to be inserted in
// the proper order, based on the next element it has
// to offer.
mPQ.add(iter);
return true;
}
}
}
return true;
}
public S next() {
if (hasNext()) {
S next = mNext;
mNext = null;
return next;
}
throw new NoSuchElementException();
}
public void remove() {
throw new UnsupportedOperationException();
}
}
private static class ObjectArrayIterator<E> implements Iterator<E> {
private final E[] mElements;
private final int mEnd;
private int mIndex;
public ObjectArrayIterator(E[] elements, int start, int end) {
mElements = elements;
mEnd = end;
mIndex = start;
}
public boolean hasNext() {
return mIndex < mEnd;
}
public E next() {
if (mIndex >= mEnd) {
throw new NoSuchElementException();
}
return mElements[mIndex++];
}
public void remove() {
throw new UnsupportedOperationException();
}
}
}