package ch.unibe.scg.cells; import java.math.BigInteger; import java.util.ArrayList; import java.util.Collections; import java.util.Comparator; import java.util.Iterator; import java.util.List; import com.google.common.base.Supplier; import com.google.common.collect.ComparisonChain; import com.google.common.collect.Iterables; import com.google.common.collect.Ordering; import com.google.protobuf.ByteString; class InMemorySource<T> implements CellSource<T>, CellLookupTable<T>, Iterable<Cell<T>> { private static final long serialVersionUID = 1L; private static final Comparator<ByteString> cmp = new LexicographicalComparator(); /** Shards are immutable. Does not contain empty shards. */ final private List<List<Cell<T>>> store; /** A sorted index of row keys at the end of shards, to enable fast shard retrieving by row key. */ final private List<ByteString> splitIndex; /** Lazily initialized column index. */ final private ColIndexSupplier colIndex = new ColIndexSupplier(); /** To look up a row, get a RowPointer, then look up its row in the store */ private static class RowPointer implements Comparable<RowPointer> { final ByteString colKey; final ByteString rowKey; RowPointer(ByteString colKey, ByteString rowKey) { this.colKey = colKey; this.rowKey = rowKey; } @Override public int compareTo(RowPointer o) { return ComparisonChain .start() .compare(colKey, o.colKey, cmp) .compare(rowKey, o.rowKey, cmp) .result(); } } /** * Supplier a column index, enabling a fast way to read column. * The first call to get() will create index. */ private class ColIndexSupplier implements Supplier<List<RowPointer>> { private List<RowPointer> columnIndex; @Override public synchronized List<RowPointer> get() { if (columnIndex != null) { return columnIndex; } List<RowPointer> colIndexBuilder = new ArrayList<>(); for (List<Cell<T>> shard : store) { for (Cell<T> c : shard) { colIndexBuilder.add(new RowPointer(c.getColumnKey(), c.getRowKey())); } } // TODO: parallel sorting? columnIndex = Ordering.natural().immutableSortedCopy(colIndexBuilder); return columnIndex; } } private InMemorySource(List<List<Cell<T>>> store, List<ByteString> splitIndex) { this.store = store; this.splitIndex = splitIndex; } private Object writeReplace() { return new ShallowSerializingCopy.SerializableLiveObject(this); } static <T> InMemorySource<T> make(List<List<Cell<T>>> store) { assert isStoreOk(store); List<List<Cell<T>>> nonEmptyStore = new ArrayList<>(store.size()); List<ByteString> splitIndex = new ArrayList<>(store.size()); for (int i = 0; i < store.size(); i++) { List<Cell<T>> shard = store.get(i); if (!shard.isEmpty()) { nonEmptyStore.add(shard); splitIndex.add(shard.get(shard.size() - 1).getRowKey()); } } return new InMemorySource<>(nonEmptyStore, splitIndex); } @Override public Iterator<Cell<T>> iterator() { return Iterables.concat(store).iterator(); } @Override public int nShards() { return store.size(); } @Override public void close() { // TODO: disallow getting more shards to follow the contract in comments. // Nothing to do. } @Override public OneShotIterable<Cell<T>> getShard(int shard) { return new AdapterOneShotIterable<>(store.get(shard)); } @Override public Iterable<Cell<T>> readRow(ByteString rowKeyPrefix) { int fromShard = findShard(rowKeyPrefix); if (fromShard < 0) { return Collections.emptyList(); } int from = Collections.binarySearch( store.get(fromShard), new Cell<T>(rowKeyPrefix, ByteString.EMPTY, ByteString.EMPTY)); if (from < 0) { from = ~from; // ~from is the insertion point. } if (isKeyAllFF(rowKeyPrefix)) { assert fromShard == store.size() - 1; return store.get(fromShard).subList(from, store.get(fromShard).size()); } ByteString toKey = keyPlusOne(rowKeyPrefix); int toShard = findShard(toKey); if (toShard < 0) { // Couldn't find a toShard. This can only happen if fromShard was in the last shard already. assert fromShard == store.size() - 1; toShard = fromShard; } int to = Collections.binarySearch(store.get(toShard), new Cell<T>(toKey, ByteString.EMPTY, ByteString.EMPTY)); if (to >= 0) { to++; // To must be exclusive. } else { to = ~to; // To should be insertion point. } if (fromShard == toShard) { return store.get(fromShard).subList(from, to); } Iterable<Cell<T>> ret = store.get(fromShard).subList(from, store.get(fromShard).size()); for (int i = fromShard + 1; i < toShard; i++) { ret = Iterables.concat(ret, store.get(i)); } return Iterables.concat(ret, store.get(toShard).subList(0, to)); } @Override public Iterable<Cell<T>> readColumn(ByteString columnKeyPrefix) { int startPos = colIndexStartPos(columnKeyPrefix); int endPos = colIndex.get().size(); if (!columnKeyPrefix.isEmpty()) { if (isKeyAllFF(columnKeyPrefix)) { assert startPos == store.size() - 1; } else { endPos = colIndexStartPos(keyPlusOne(columnKeyPrefix)); } } List<RowPointer> rows = colIndex.get().subList(startPos, endPos); List<Cell<T>> ret = new ArrayList<>(); for (RowPointer r : rows) { int shard = findShard(r.rowKey); assert shard >= 0 : "Index contained incorrect information for row " + r.rowKey.toStringUtf8() + " col: " + columnKeyPrefix.toStringUtf8() + store; int p = Collections.binarySearch(store.get(shard), new Cell<T>(r.rowKey, r.colKey, ByteString.EMPTY)); assert p >= 0 : "Index contained incorrect information for row " + r.rowKey.toStringUtf8() + " col: " + columnKeyPrefix.toStringUtf8() + store; ret.add(store.get(shard).get(p)); } return ret; } /** @return true if key consists only of bytes 0xff. False otherwise. */ private boolean isKeyAllFF(ByteString key) { for (int i = 0; i < key.size(); i++) { if (key.byteAt(i) != -1) { // -1 signed == 0xff unsigned. return false; } } return true; } /** @return the index of shard that could contain the key prefix. -1 if there's none. */ private int findShard(ByteString needle) { if (splitIndex.isEmpty() || cmp.compare(needle, splitIndex.get(splitIndex.size() -1)) > 0) { return -1; } int ret = Collections.binarySearch(splitIndex, needle, cmp); if (ret < 0) { return ~ret; } return ret; } private static <T> boolean isStoreOk(List<List<Cell<T>>> store) { for (List<Cell<T>> shard : store) { if (!Ordering.<Cell<T>> natural().isOrdered(shard)) { return false; } } List<Cell<T>> prevShard = null; for (List<Cell<T>> cur : store) { if (cur.isEmpty()) { continue; } if (prevShard != null && prevShard.get(prevShard.size() - 1).compareTo(cur.get(0)) >= 0) { return false; } prevShard = cur; } Iterable<Cell<T>> flatStore = Iterables.concat(store); Cell<T> prevCell = null; for (Cell<T> c : flatStore) { if (c.equals(prevCell)) { return false; } prevCell = c; } return true; } private ByteString keyPlusOne(ByteString key) { assert !key.isEmpty() : "This case needs special treatment on caller level."; return ByteString.copyFrom(new BigInteger(key.toByteArray()).add(BigInteger.ONE) .toByteArray()); } /** @return the index of row pointer that could contain the column key prefix. */ private int colIndexStartPos(ByteString colKeyPrefix) { int pos = Collections.binarySearch(colIndex.get(), new RowPointer(colKeyPrefix, ByteString.EMPTY)); if (pos < 0) { pos = ~pos; } return pos; } }