package ch.unibe.scg.cells;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
import com.google.common.collect.ComparisonChain;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Ordering;
import com.google.common.collect.UnmodifiableIterator;
import com.google.protobuf.ByteString;
/**
* For running in memory, acts as both as sink and source.
*
* <p>
* A shuffler naturally has three states:
* <ol>
* <li>Writable (initially)
* <li>Closing (during execution of close)
* <li>Readable (after close completes)
* </ol>
*
* <p>
* It is never a good idea or safe to read from a shuffler while it is still writable.
* This is especially true in a multi-threading environment.
*
* <p>
* This class is thread-safe in the sense that while it is writable, concurrent
* writes are legal. However, during Closing, it may neither be read nor written to.
* During Readable, writes are forbidden. Concurrent reads are safe.
*
* <p>
* This class is {@link java.io.Serializable}, but there's a caveat.
* Since an InMemoryShuffler is meant to be used locally and in memory, it cannot meaningfully
* be shipped from one machine to another.
* It is legal for a Mapper to hold on to a InMemoryShuffler -- so long as that mapper
* is used locally only.
*
* <p>
* Cells then serializes this classes using {@link ShallowSerializingCopy}.
* However, if serialization is attempted using a classical {@link java.io.ObjectOutputStream},
* it will throw a {@link UnsupportedOperationException}.
*/
class InMemoryShuffler<T> implements CellSink<T>, CellSource<T>, CellLookupTable<T>, Iterable<Cell<T>> {
final private static long serialVersionUID = 1L;
/** The backing store. When used as a sink, this is mutable. Closing the sink makes the field immutable. */
private List<Cell<T>> store = new ArrayList<>();
/** Becomes available upon close */
private List<RowPointer> colIndex;
InMemoryShuffler() {} // Don't subclass
/** To look up a row, get a RowPointer, then look up its row in the store */
private static class RowPointer implements Comparable<RowPointer> {
final ByteString colKey;
final ByteString rowKey;
RowPointer(ByteString colKey, ByteString rowKey) {
this.colKey = colKey;
this.rowKey = rowKey;
}
@Override
public int compareTo(RowPointer o) {
return ComparisonChain
.start()
.compare(colKey.asReadOnlyByteBuffer(), o.colKey.asReadOnlyByteBuffer())
.compare(rowKey.asReadOnlyByteBuffer(), o.rowKey.asReadOnlyByteBuffer())
.result();
}
}
/** Return an instance of a shuffler */
public static <T> InMemoryShuffler<T> getInstance() {
return new InMemoryShuffler<>();
}
/**
* Serialize all elements. When the returned shuffler will be read, the elements are
* re-deserialized.
*
* @return a new InMemoryShuffler that will, when read, read all elements.
*/
public static <T> InMemoryShuffler<T> copyFrom(Iterable<T> elements, Codec<T> codec) {
InMemoryShuffler<T> ret = getInstance();
for (T e : elements) {
ret.write(codec.encode(e));
}
ret.close();
return ret;
}
private Object writeReplace() {
return new ShallowSerializingCopy.SerializableLiveObject(this);
}
/** This operation is NOT threadsafe. */
@Override
public void close() {
// Sort store, discard duplicates, and make immutable.
store = ImmutableList.copyOf(new TreeSet<>(store));
List<RowPointer> colIndexBuilder = new ArrayList<>();
// Produce colIndex
for (Cell<T> c : store) {
colIndexBuilder.add(new RowPointer(c.getColumnKey(), c.getRowKey()));
}
colIndex = Ordering.natural().immutableSortedCopy(colIndexBuilder);
}
/**
* Should be called only in state Writable. Concurrent writes are allowed,
* but may not be interspersed with reads or closes. See class comment.
*/
@Override
public synchronized void write(Cell<T> cell) {
store.add(cell);
}
/** May only be called in state Readable. This is thread-safe with other reads. See class comment. */
@Override
public Iterator<Cell<T>> iterator() {
assert Ordering.natural().isOrdered(store) : "Someone forgot to close the sink.";
final Iterator<Cell<T>> ret = store.iterator();
return new UnmodifiableIterator<Cell<T>>() {
@Override public boolean hasNext() {
return ret.hasNext();
}
@Override public Cell<T> next() {
return ret.next();
}
};
}
/** Only available in state Readable. This operation is threadsafe -- but not during writes. See class comment. */
@Override
public Iterable<Cell<T>> readRow(ByteString rowKey) {
assert Ordering.natural().isOrdered(store) : "Someone forgot to close the sink.";
int startPos = rowStartPos(rowKey);
// end position is binary search for rowKey + 1
int endPos = store.size();
if (!rowKey.isEmpty()) {
endPos = rowStartPos(keyPlusOne(rowKey));
}
return store.subList(startPos, endPos);
}
/** This operation is threadsafe -- but not during writes. See class comment. */
@Override
public Iterable<Cell<T>> readColumn(ByteString columnKey) {
int startPos = indexStartPos(columnKey);
int endPos = colIndex.size();
if (!columnKey.isEmpty()) {
endPos = indexStartPos(keyPlusOne(columnKey));
}
List<RowPointer> rows = colIndex.subList(startPos, endPos);
List<Cell<T>> ret = new ArrayList<>();
for (RowPointer r : rows) {
int p = Collections.binarySearch(store, new Cell<T>(r.rowKey, r.colKey, ByteString.EMPTY));
assert p >= 0 : "Index contained incorrect information for row " + r.rowKey.toStringUtf8() + " col: "
+ columnKey.toStringUtf8() + store;
ret.add(store.get(p));
}
return ret;
}
private ByteString keyPlusOne(ByteString rowKey) {
assert !rowKey.isEmpty() : "This case needs special treatment on caller level.";
return ByteString.copyFrom(new BigInteger(rowKey.toByteArray()).add(BigInteger.ONE)
.toByteArray());
}
/**
* @return startPos of the row >= 0. If there is no such row, The insertion point is
* returned.
*/
private int rowStartPos(ByteString rowKey) {
return retrieveInsertionPoint(Collections.binarySearch(store, new Cell<T>(rowKey, ByteString.EMPTY,
ByteString.EMPTY)));
}
private int indexStartPos(ByteString colKey) {
return retrieveInsertionPoint(Collections.binarySearch(colIndex, new RowPointer(colKey, ByteString.EMPTY)));
}
private int retrieveInsertionPoint(int pos) {
if (pos < 0) {
pos = -pos - 1;
}
assert pos >= 0;
return pos;
}
@Override
public int nShards() {
if (store.isEmpty()) {
return 0;
}
return 1;
}
@Override
public OneShotIterable<Cell<T>> getShard(int shard) {
if (shard < 0 || nShards() <= shard) {
throw new IndexOutOfBoundsException(
String.format("You asked for shard %s, but there are only %s.", shard, nShards()));
}
return new AdapterOneShotIterable<>(store);
}
}