package ch.unibe.scg.cells.hadoop;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.util.Iterator;
import java.util.Map.Entry;
import java.util.NoSuchElementException;
import javax.inject.Inject;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import ch.unibe.scg.cells.AdapterOneShotIterable;
import ch.unibe.scg.cells.Cell;
import ch.unibe.scg.cells.CellSource;
import ch.unibe.scg.cells.OneShotIterable;
import ch.unibe.scg.cells.hadoop.HBaseStorage.FamilyName;
import com.google.common.base.Charsets;
import com.google.common.collect.UnmodifiableIterator;
import com.google.common.io.Closer;
import com.google.protobuf.ByteString;
/** A cell source reading from a HTable. Don't forget to close it when you're done. */
public class HBaseCellSource<T> implements CellSource<T>{
final private static long serialVersionUID = 1L;
/** Do not modify. */
final private byte[] family;
final private SerializableHTable hTable;
private transient Closer closer = Closer.create();
@Inject
HBaseCellSource(@FamilyName ByteString family, SerializableHTable hTable) {
this.family = family.toByteArray();
this.hTable = hTable;
}
private void readObject(ObjectInputStream in) throws ClassNotFoundException, IOException {
in.defaultReadObject();
closer = Closer.create();
}
private class ResultScannerIterator extends UnmodifiableIterator<Cell<T>> {
/** The current row's column keys and cell contents. */
private Iterator<Entry<byte[], byte[]>> curRow;
/** The current row's key. */
private ByteString curRowKey;
final private Iterator<Result> nextRows;
/** {@code next} may be null. */
ResultScannerIterator(Iterator<Result> nextRows) {
this.nextRows = nextRows;
}
@Override
public boolean hasNext() {
if (curRow != null && curRow.hasNext()) {
return true;
}
return nextRows.hasNext();
}
@Override
public Cell<T> next() {
if (!hasNext()) { // Demanded by Iterator contract.
throw new NoSuchElementException();
}
if (curRow == null || !curRow.hasNext()) {
// Read next row.
Result r = nextRows.next();
curRowKey = ByteString.copyFrom(r.getRow());
curRow = r.getFamilyMap(family).entrySet().iterator();
}
Entry<byte[], byte[]> c = curRow.next();
return Cell.<T> make(curRowKey,
ByteString.copyFrom(c.getKey()),
ByteString.copyFrom(c.getValue()));
}
}
/** Provides scans appropriate for reading tables sequentially, as in a Mapper. */
static Scan makeScan() {
Scan scan = new Scan();
// HBase book says 500, see chapter 12.9.1. Hbase Book
// This is 10 times faster than the default value.
scan.setCaching(1000);
scan.setCacheBlocks(false); // HBase book 12.9.5. Block Cache
return scan;
}
private ResultScanner openScanner() throws IOException {
Scan scan = makeScan();
scan.addFamily(family);
try {
return hTable.hTable.getScanner(scan);
} catch (IOException e) {
throw new IOException("Couldn't open table "
+ new String(hTable.hTable.getTableName(), Charsets.UTF_8), e);
}
}
@Override
public void close() throws IOException {
if (hTable != null) {
closer.register(hTable.hTable);
}
closer.close();
}
@Override
public int nShards() throws IOException {
try (ResultScanner scan = openScanner()) {
if (scan.iterator().hasNext()) {
return 1;
}
return 0;
}
}
@SuppressWarnings("resource") // scan is added to the closer, and therefore closed.
@Override
public OneShotIterable<Cell<T>> getShard(int shard) throws IOException {
if (shard < 0 || nShards() <= shard) {
throw new IndexOutOfBoundsException(
String.format("You asked for shard %s, but there are only %s.", shard, nShards()));
}
ResultScanner scan = openScanner();
closer.register(scan);
return new AdapterOneShotIterable<>(new ResultScannerIterator(scan.iterator()));
}
}