/*
* Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved.
* This file is part of Async HBase.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
* - Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* - Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* - Neither the name of the StumbleUpon nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*/
package org.hbase.async;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Arrays;
import org.jboss.netty.buffer.ChannelBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;
import org.hbase.async.generated.ClientPB.Column;
import org.hbase.async.generated.ClientPB.Scan;
import org.hbase.async.generated.ClientPB.ScanRequest;
import org.hbase.async.generated.ClientPB.ScanResponse;
import org.hbase.async.generated.FilterPB;
import org.hbase.async.generated.HBasePB.TimeRange;
import static org.hbase.async.HBaseClient.EMPTY_ARRAY;
/**
* Creates a scanner to read data sequentially from HBase.
* <p>
* This class is <strong>not synchronized</strong> as it's expected to be
* used from a single thread at a time. It's rarely (if ever?) useful to
* scan concurrently from a shared scanner using multiple threads. If you
* want to optimize large table scans using extra parallelism, create a few
* scanners and give each of them a partition of the table to scan. Or use
* MapReduce.
* <p>
* Unlike HBase's traditional client, there's no method in this class to
* explicitly open the scanner. It will open itself automatically when you
* start scanning by calling {@link #nextRows()}. Also, the scanner will
* automatically call {@link #close} when it reaches the end key. If, however,
* you would like to stop scanning <i>before reaching the end key</i>, you
* <b>must</b> call {@link #close} before disposing of the scanner. Note that
* it's always safe to call {@link #close} on a scanner.
* <p>
* If you keep your scanner open and idle for too long, the RegionServer will
* close the scanner automatically for you after a timeout configured on the
* server side. When this happens, you'll get an
* {@link UnknownScannerException} when you attempt to use the scanner again.
* Also, if you scan too slowly (e.g. you take a long time between each call
* to {@link #nextRows()}), you may prevent HBase from splitting the region if
* the region is also actively being written to while you scan. For heavy
* processing you should consider using MapReduce.
* <p>
* A {@code Scanner} is not re-usable. Should you want to scan the same rows
* or the same table again, you must create a new one.
*
* <h1>A note on passing {@code byte} arrays in argument</h1>
* None of the method that receive a {@code byte[]} in argument will copy it.
* For more info, please refer to the documentation of {@link HBaseRpc}.
* <h1>A note on passing {@code String}s in argument</h1>
* All strings are assumed to use the platform's default charset.
*/
public final class Scanner {
private static final Logger LOG = LoggerFactory.getLogger(Scanner.class);
/**
* The default maximum number of {@link KeyValue}s the server is allowed
* to return in a single RPC response to a {@link Scanner}.
* <p>
* This default value is exposed only as a hint but the value itself
* is not part of the API and is subject to change without notice.
* @see #setMaxNumKeyValues
*/
public static final int DEFAULT_MAX_NUM_KVS = 4096;
/**
* The default maximum number of rows to scan per RPC.
* <p>
* This default value is exposed only as a hint but the value itself
* is not part of the API and is subject to change without notice.
* @see #setMaxNumRows
*/
public static final int DEFAULT_MAX_NUM_ROWS = 128;
/** Special reference we use to indicate we're done scanning. */
private static final RegionInfo DONE =
new RegionInfo(EMPTY_ARRAY, EMPTY_ARRAY, EMPTY_ARRAY);
private final HBaseClient client;
private final byte[] table;
/**
* The key to start scanning from. An empty array means "start from the
* first key in the table". This key is updated each time we move on to
* another row, so that in the event of a failure, we know what was the
* last key previously returned. Note that this doesn't entail that the
* full row was returned. Depending on the failure, we may not know if
* the last key returned was only a subset of a row or a full row, so it
* may not be possible to gracefully recover from certain errors without
* re-scanning and re-returning the same data twice.
*/
private byte[] start_key = EMPTY_ARRAY;
/**
* The last key to scan up to (exclusive).
* An empty array means "scan until the last key in the table".
*/
private byte[] stop_key = EMPTY_ARRAY;
private byte[][] families;
private byte[][][] qualifiers;
/** Filter to apply on the scanner. */
private ScanFilter filter;
/** Minimum {@link KeyValue} timestamp to scan. */
private long min_timestamp = 0;
/** Maximum {@link KeyValue} timestamp to scan. */
private long max_timestamp = Long.MAX_VALUE;
/** @see #setServerBlockCache */
private boolean populate_blockcache = true;
/**
* Maximum number of rows to fetch at a time.
* @see #setMaxNumRows
*/
private int max_num_rows = DEFAULT_MAX_NUM_ROWS;
/**
* Maximum number of KeyValues to fetch at a time.
* @see #setMaxNumKeyValues
*/
private int max_num_kvs = DEFAULT_MAX_NUM_KVS;
/**
* Maximum number of bytes to fetch at a time.
* Except that HBase won't truncate a row in the middle or what,
* so we could potentially go a bit above that.
* Only used when talking to HBase 0.95 and up.
* @see #setMaxNumBytes
*/
private long max_num_bytes = ~HBaseRpc.MAX_BYTE_ARRAY_MASK;
/**
* How many versions of each cell to retrieve.
*/
private int versions = 1;
/**
* The region currently being scanned.
* If null, we haven't started scanning.
* If == DONE, then we're done scanning.
* Otherwise it contains a proper region name, and we're currently scanning.
*/
private RegionInfo region;
/**
* This is the scanner ID we got from the RegionServer.
* It's generated randomly so any {@code long} value is possible.
*/
private long scanner_id;
/**
* Request object we re-use to avoid generating too much garbage.
* @see #getNextRowsRequest
*/
private GetNextRowsRequest get_next_rows_request;
/**
* Constructor.
* <strong>This byte array will NOT be copied.</strong>
* @param table The non-empty name of the table to use.
*/
Scanner(final HBaseClient client, final byte[] table) {
KeyValue.checkTable(table);
this.client = client;
this.table = table;
}
/**
* Returns the row key this scanner is currently at.
* <strong>Do not modify the byte array returned.</strong>
*/
public byte[] getCurrentKey() {
return start_key;
}
/**
* Specifies from which row key to start scanning (inclusive).
* @param start_key The row key to start scanning from. If you don't invoke
* this method, scanning will begin from the first row key in the table.
* <strong>This byte array will NOT be copied.</strong>
* @throws IllegalStateException if scanning already started.
*/
public void setStartKey(final byte[] start_key) {
KeyValue.checkKey(start_key);
checkScanningNotStarted();
this.start_key = start_key;
}
/**
* Specifies from which row key to start scanning (inclusive).
* @see #setStartKey(byte[])
* @throws IllegalStateException if scanning already started.
*/
public void setStartKey(final String start_key) {
setStartKey(start_key.getBytes());
}
/**
* Specifies up to which row key to scan (exclusive).
* @param stop_key The row key to scan up to. If you don't invoke
* this method, or if the array is empty ({@code stop_key.length == 0}),
* every row up to and including the last one will be scanned.
* <strong>This byte array will NOT be copied.</strong>
* @throws IllegalStateException if scanning already started.
*/
public void setStopKey(final byte[] stop_key) {
KeyValue.checkKey(stop_key);
checkScanningNotStarted();
this.stop_key = stop_key;
}
/**
* Specifies up to which row key to scan (exclusive).
* @see #setStopKey(byte[])
* @throws IllegalStateException if scanning already started.
*/
public void setStopKey(final String stop_key) {
setStopKey(stop_key.getBytes());
}
/**
* Specifies a particular column family to scan.
* @param family The column family.
* <strong>This byte array will NOT be copied.</strong>
* @throws IllegalStateException if scanning already started.
*/
public void setFamily(final byte[] family) {
KeyValue.checkFamily(family);
checkScanningNotStarted();
families = new byte[][] { family };
}
/** Specifies a particular column family to scan. */
public void setFamily(final String family) {
setFamily(family.getBytes());
}
/**
* Specifies multiple column families to scan.
* <p>
* If {@code qualifiers} is not {@code null}, then {@code qualifiers[i]}
* is assumed to be the list of qualifiers to scan in the family
* {@code families[i]}. If {@code qualifiers[i]} is {@code null}, then
* all the columns in the family {@code families[i]} will be scanned.
* @param families Array of column families names.
* @param qualifiers Array of column qualifiers. Can be {@code null}.
* <strong>This array of byte arrays will NOT be copied.</strong>
* @throws IllegalStateException if scanning already started.
* @since 1.5
*/
public void setFamilies(byte[][] families, byte[][][] qualifiers) {
checkScanningNotStarted();
for (int i = 0; i < families.length; i++) {
KeyValue.checkFamily(families[i]);
if (qualifiers != null && qualifiers[i] != null) {
for (byte[] qualifier : qualifiers[i]) {
KeyValue.checkQualifier(qualifier);
}
}
}
this.families = families;
this.qualifiers = qualifiers;
}
/**
* Specifies multiple column families to scan.
* <p>
* NOTE: This will null out the qualifiers list if it was set previously as
* well as replace any families that were already set.
* @param families A list of one or more family names.
* @throws IllegalStateException if scanning already started.
* @since 1.5
*/
public void setFamilies(final String... families) {
checkScanningNotStarted();
this.families = new byte[families.length][];
for (int i = 0; i < families.length; i++) {
this.families[i] = families[i].getBytes();
KeyValue.checkFamily(this.families[i]);
}
qualifiers = null;
}
/**
* Specifies a particular column qualifier to scan.
* <p>
* Note that specifying a qualifier without a family has no effect.
* You need to call {@link #setFamily(byte[])} too.
* @param qualifier The column qualifier.
* <strong>This byte array will NOT be copied.</strong>
* @throws IllegalStateException if scanning already started.
*/
public void setQualifier(final byte[] qualifier) {
KeyValue.checkQualifier(qualifier);
checkScanningNotStarted();
this.qualifiers = new byte[][][] { { qualifier } };
}
/** Specifies a particular column qualifier to scan. */
public void setQualifier(final String qualifier) {
setQualifier(qualifier.getBytes());
}
/**
* Specifies one or more column qualifiers to scan.
* <p>
* Note that specifying qualifiers without a family has no effect.
* You need to call {@link #setFamily(byte[])} too.
* @param qualifiers The column qualifiers.
* <strong>These byte arrays will NOT be copied.</strong>
* @throws IllegalStateException if scanning already started.
* @since 1.4
*/
public void setQualifiers(final byte[][] qualifiers) {
checkScanningNotStarted();
for (final byte[] qualifier : qualifiers) {
KeyValue.checkQualifier(qualifier);
}
this.qualifiers = new byte[][][] { qualifiers };
}
/**
* Specifies the filter to apply to this scanner.
* @param filter The filter. If {@code null}, then no filter will be used.
* @since 1.5
*/
public void setFilter(final ScanFilter filter) {
this.filter = filter;
}
/**
* Returns the possibly-{@code null} filter applied to this scanner.
* @since 1.5
*/
public ScanFilter getFilter() {
return filter;
}
/**
* Clears any filter that was previously set on this scanner.
* <p>
* This is a shortcut for {@link #setFilter}{@code (null)}
* @since 1.5
*/
public void clearFilter() {
filter = null;
}
/**
* Sets a regular expression to filter results based on the row key.
* <p>
* This is equivalent to calling
* {@link #setFilter setFilter}{@code (new }{@link
* KeyRegexpFilter}{@code (regexp))}
* @param regexp The regular expression with which to filter the row keys.
*/
public void setKeyRegexp(final String regexp) {
filter = new KeyRegexpFilter(regexp);
}
/**
* Sets a regular expression to filter results based on the row key.
* <p>
* This is equivalent to calling
* {@link #setFilter setFilter}{@code (new }{@link
* KeyRegexpFilter}{@code (regexp, charset))}
* @param regexp The regular expression with which to filter the row keys.
* @param charset The charset used to decode the bytes of the row key into a
* string. The RegionServer must support this charset, otherwise it will
* unexpectedly close the connection the first time you attempt to use this
* scanner.
*/
public void setKeyRegexp(final String regexp, final Charset charset) {
filter = new KeyRegexpFilter(regexp, charset);
}
/**
* Sets whether or not the server should populate its block cache.
* @param populate_blockcache if {@code false}, the block cache of the server
* will not be populated as the rows are being scanned. If {@code true} (the
* default), the blocks loaded by the server in order to feed the scanner
* <em>may</em> be added to the block cache, which will make subsequent read
* accesses to the same rows and other neighbouring rows faster. Whether or
* not blocks will be added to the cache depend on the table's configuration.
* <p>
* If you scan a sequence of keys that is unlikely to be accessed again in
* the near future, you can help the server improve its cache efficiency by
* setting this to {@code false}.
* @throws IllegalStateException if scanning already started.
*/
public void setServerBlockCache(final boolean populate_blockcache) {
checkScanningNotStarted();
this.populate_blockcache = populate_blockcache;
}
/**
* Sets the maximum number of rows to scan per RPC (for better performance).
* <p>
* Every time {@link #nextRows()} is invoked, up to this number of rows may
* be returned. The default value is {@link #DEFAULT_MAX_NUM_ROWS}.
* <p>
* <b>This knob has a high performance impact.</b> If it's too low, you'll
* do too many network round-trips, if it's too high, you'll spend too much
* time and memory handling large amounts of data. The right value depends
* on the size of the rows you're retrieving.
* <p>
* If you know you're going to be scanning lots of small rows (few cells, and
* each cell doesn't store a lot of data), you can get better performance by
* scanning more rows by RPC. You probably always want to retrieve at least
* a few dozen kilobytes per call.
* <p>
* If you want to err on the safe side, it's better to use a value that's a
* bit too high rather than a bit too low. Avoid extreme values (such as 1
* or 1024) unless you know what you're doing.
* <p>
* Note that unlike many other methods, it's fine to change this value while
* scanning. Changing it will take affect all the subsequent RPCs issued.
* This can be useful you want to dynamically adjust how much data you want
* to receive at once (provided that you can estimate the size of your rows).
* @param max_num_rows A strictly positive integer.
* @throws IllegalArgumentException if the argument is zero or negative.
*/
public void setMaxNumRows(final int max_num_rows) {
if (max_num_rows <= 0) {
throw new IllegalArgumentException("zero or negative argument: "
+ max_num_rows);
}
this.max_num_rows = max_num_rows;
}
/**
* Sets the maximum number of {@link KeyValue}s the server is allowed to
* return in a single RPC response.
* <p>
* If you're dealing with wide rows, in which you have many cells, you may
* want to limit the number of cells ({@code KeyValue}s) that the server
* returns in a single RPC response.
* <p>
* The default is {@link #DEFAULT_MAX_NUM_KVS}, unlike in HBase's client
* where the default is {@code -1}. If you set this to a negative value,
* the server will always return full rows, no matter how wide they are. If
* you request really wide rows, this may cause increased memory consumption
* on the server side as the server has to build a large RPC response, even
* if it tries to avoid copying data. On the client side, the consequences
* on memory usage are worse due to the lack of framing in RPC responses.
* The client will have to buffer a large RPC response and will have to do
* several memory copies to dynamically grow the size of the buffer as more
* and more data comes in.
* @param max_num_kvs A non-zero value.
* @throws IllegalArgumentException if the argument is zero.
* @throws IllegalStateException if scanning already started.
*/
public void setMaxNumKeyValues(final int max_num_kvs) {
if (max_num_kvs == 0) {
throw new IllegalArgumentException("batch size can't be zero");
}
checkScanningNotStarted();
this.max_num_kvs = max_num_kvs;
}
/**
* Maximum number of {@link KeyValue}s the server is allowed to return.
* @see #setMaxNumKeyValues
* @since 1.5
*/
public int getMaxNumKeyValues() {
return max_num_kvs;
}
/**
* Sets the maximum number of versions to return for each cell scanned.
* <p>
* By default a scanner will only return the most recent version of
* each cell. If you want to get all possible versions available,
* pass {@link Integer#MAX_VALUE} in argument.
* @param versions A strictly positive number of versions to return.
* @since 1.4
* @throws IllegalStateException if scanning already started.
* @throws IllegalArgumentException if {@code versions <= 0}
*/
public void setMaxVersions(final int versions) {
if (versions <= 0) {
throw new IllegalArgumentException("Need a strictly positive number: "
+ versions);
}
checkScanningNotStarted();
this.versions = versions;
}
/**
* Returns the maximum number of versions to return for each cell scanned.
* @return A strictly positive integer.
* @since 1.4
*/
public int getMaxVersions() {
return versions;
}
/**
* Sets the maximum number of bytes returned at once by the scanner.
* <p>
* HBase may actually return more than this many bytes because it will not
* truncate a row in the middle.
* <p>
* This value is only used when communicating with HBase 0.95 and newer.
* For older versions of HBase this value is silently ignored.
* @param max_num_bytes A strictly positive number of bytes.
* @since 1.5
* @throws IllegalStateException if scanning already started.
* @throws IllegalArgumentException if {@code max_num_bytes <= 0}
*/
public void setMaxNumBytes(final long max_num_bytes) {
if (max_num_bytes <= 0) {
throw new IllegalArgumentException("Need a strictly positive number of"
+ " bytes, got " + max_num_bytes);
}
checkScanningNotStarted();
this.max_num_bytes = max_num_bytes;
}
/**
* Returns the maximum number of bytes returned at once by the scanner.
* @see #setMaxNumBytes
* @since 1.5
*/
public long getMaxNumBytes() {
return max_num_bytes;
}
/**
* Sets the minimum timestamp to scan (inclusive).
* <p>
* {@link KeyValue}s that have a timestamp strictly less than this one
* will not be returned by the scanner. HBase has internal optimizations to
* avoid loading in memory data filtered out in some cases.
* @param timestamp The minimum timestamp to scan (inclusive).
* @throws IllegalArgumentException if {@code timestamp < 0}.
* @throws IllegalArgumentException if {@code timestamp > getMaxTimestamp()}.
* @see #setTimeRange
* @since 1.3
*/
public void setMinTimestamp(final long timestamp) {
if (timestamp < 0) {
throw new IllegalArgumentException("Negative timestamp: " + timestamp);
} else if (timestamp > max_timestamp) {
throw new IllegalArgumentException("New minimum timestamp (" + timestamp
+ ") is greater than the maximum"
+ " timestamp: " + max_timestamp);
}
checkScanningNotStarted();
min_timestamp = timestamp;
}
/**
* Returns the minimum timestamp to scan (inclusive).
* @return A positive integer.
* @since 1.3
*/
public long getMinTimestamp() {
return min_timestamp;
}
/**
* Sets the maximum timestamp to scan (exclusive).
* <p>
* {@link KeyValue}s that have a timestamp greater than or equal to this one
* will not be returned by the scanner. HBase has internal optimizations to
* avoid loading in memory data filtered out in some cases.
* @param timestamp The maximum timestamp to scan (exclusive).
* @throws IllegalArgumentException if {@code timestamp < 0}.
* @throws IllegalArgumentException if {@code timestamp < getMinTimestamp()}.
* @see #setTimeRange
* @since 1.3
*/
public void setMaxTimestamp(final long timestamp) {
if (timestamp < 0) {
throw new IllegalArgumentException("Negative timestamp: " + timestamp);
} else if (timestamp < min_timestamp) {
throw new IllegalArgumentException("New maximum timestamp (" + timestamp
+ ") is greater than the minimum"
+ " timestamp: " + min_timestamp);
}
checkScanningNotStarted();
max_timestamp = timestamp;
}
/**
* Returns the maximum timestamp to scan (exclusive).
* @return A positive integer.
* @since 1.3
*/
public long getMaxTimestamp() {
return max_timestamp;
}
/**
* Sets the time range to scan.
* <p>
* {@link KeyValue}s that have a timestamp that do not fall in the range
* {@code [min_timestamp; max_timestamp[} will not be returned by the
* scanner. HBase has internal optimizations to avoid loading in memory
* data filtered out in some cases.
* @param min_timestamp The minimum timestamp to scan (inclusive).
* @param max_timestamp The maximum timestamp to scan (exclusive).
* @throws IllegalArgumentException if {@code min_timestamp < 0}
* @throws IllegalArgumentException if {@code max_timestamp < 0}
* @throws IllegalArgumentException if {@code min_timestamp > max_timestamp}
* @since 1.3
*/
public void setTimeRange(final long min_timestamp, final long max_timestamp) {
if (min_timestamp > max_timestamp) {
throw new IllegalArgumentException("New minimum timestamp (" + min_timestamp
+ ") is greater than the new maximum"
+ " timestamp: " + max_timestamp);
} else if (min_timestamp < 0) {
throw new IllegalArgumentException("Negative minimum timestamp: "
+ min_timestamp);
}
checkScanningNotStarted();
// We now have the guarantee that max_timestamp >= 0, no need to check it.
this.min_timestamp = min_timestamp;
this.max_timestamp = max_timestamp;
}
/**
* Scans a number of rows. Calling this method is equivalent to:
* <pre>
* this.{@link #setMaxNumRows setMaxNumRows}(nrows);
* this.{@link #nextRows() nextRows}();
* </pre>
* @param nrows The maximum number of rows to retrieve.
* @return A deferred list of rows.
* @see #setMaxNumRows
* @see #nextRows()
*/
public Deferred<ArrayList<ArrayList<KeyValue>>> nextRows(final int nrows) {
setMaxNumRows(nrows);
return nextRows();
}
/**
* Scans a number of rows.
* <p>
* The last row returned may be partial if it's very wide and
* {@link #setMaxNumKeyValues} wasn't called with a negative value in
* argument.
* <p>
* Once this method returns {@code null} once (which indicates that this
* {@code Scanner} is done scanning), calling it again leads to an undefined
* behavior.
* @return A deferred list of rows. Each row is a list of {@link KeyValue}
* and each element in the list returned represents a different row. Rows
* are returned in sequential order. {@code null} is returned if there are
* no more rows to scan. Otherwise its {@link ArrayList#size size} is
* guaranteed to be less than or equal to the value last given to
* {@link #setMaxNumRows}.
* @see #setMaxNumRows
* @see #setMaxNumKeyValues
*/
public Deferred<ArrayList<ArrayList<KeyValue>>> nextRows() {
if (region == DONE) { // We're already done scanning.
return Deferred.fromResult(null);
} else if (region == null) { // We need to open the scanner first.
return client.openScanner(this).addCallbackDeferring(
new Callback<Deferred<ArrayList<ArrayList<KeyValue>>>, Object>() {
public Deferred<ArrayList<ArrayList<KeyValue>>> call(final Object arg) {
final Response resp;
if (arg instanceof Long) {
scanner_id = (Long) arg;
resp = null;
} else if (arg instanceof Response) {
resp = (Response) arg;
scanner_id = resp.scanner_id;
} else {
throw new IllegalStateException("WTF? Scanner open callback"
+ " invoked with impossible"
+ " argument: " + arg);
}
if (LOG.isDebugEnabled()) {
LOG.debug("Scanner " + Bytes.hex(scanner_id) + " opened on " + region);
}
if (resp != null) {
if (resp.rows == null) {
return scanFinished(resp);
}
return Deferred.fromResult(resp.rows);
}
return nextRows(); // Restart the call.
}
public String toString() {
return "scanner opened";
}
});
}
// Need to silence this warning because the callback `got_next_row'
// declares its return type to be Object, because its return value
// may or may not be deferred.
@SuppressWarnings("unchecked")
final Deferred<ArrayList<ArrayList<KeyValue>>> d = (Deferred)
client.scanNextRows(this).addCallbacks(got_next_row, nextRowErrback());
return d;
}
/**
* Singleton callback to handle responses of "next" RPCs.
* This returns an {@code ArrayList<ArrayList<KeyValue>>} (possibly inside a
* deferred one).
*/
private final Callback<Object, Object> got_next_row =
new Callback<Object, Object>() {
public Object call(final Object response) {
ArrayList<ArrayList<KeyValue>> rows = null;
Response resp = null;
if (response instanceof Response) { // HBase 0.95 and up
resp = (Response) response;
rows = resp.rows;
} else if (response instanceof ArrayList) { // HBase 0.94 and before.
@SuppressWarnings("unchecked") // I 3>> generics.
final ArrayList<ArrayList<KeyValue>> r =
(ArrayList<ArrayList<KeyValue>>) response;
rows = r;
} else if (response != null) {
throw new InvalidResponseException(ArrayList.class, response);
}
if (rows == null) { // We're done scanning this region.
return scanFinished(resp);
}
final ArrayList<KeyValue> lastrow = rows.get(rows.size() - 1);
start_key = lastrow.get(0).key();
return rows;
}
public String toString() {
return "get nextRows response";
}
};
/**
* Creates a new errback to handle errors while trying to get more rows.
*/
private final Callback<Object, Object> nextRowErrback() {
return new Callback<Object, Object>() {
public Object call(final Object error) {
final RegionInfo old_region = region; // Save before invalidate().
invalidate(); // If there was an error, don't assume we're still OK.
if (error instanceof NotServingRegionException) {
// We'll resume scanning on another region, and we want to pick up
// right after the last key we successfully returned. Padding the
// last key with an extra 0 gives us the next possible key.
// TODO(tsuna): If we get 2 NSRE in a row, well pad the key twice!
start_key = Arrays.copyOf(start_key, start_key.length + 1);
return nextRows(); // XXX dangerous endless retry
} else if (error instanceof UnknownScannerException) {
// This can happen when our scanner lease expires. Unfortunately
// there's no way for us to distinguish between an expired lease
// and a real problem, for 2 reasons: the server doesn't keep track
// of recently expired scanners and the lease time is only known by
// the server and never communicated to the client. The normal
// HBase client assumes that the client will share the same
// hbase-site.xml configuration so that both the client and the
// server will know the same lease time, but this assumption is bad
// as nothing guarantees that the client's configuration will be in
// sync with the server's. This unnecessarily increases deployment
// complexity and it's brittle.
final Scanner scnr = Scanner.this;
LOG.warn(old_region + " pretends to not know " + scnr + ". I will"
+ " retry to open a scanner but this is typically because you've"
+ " been holding the scanner open and idle for too long (possibly"
+ " due to a long GC pause on your side or in the RegionServer)",
error);
// Let's re-open ourselves and keep scanning.
return nextRows(); // XXX dangerous endless retry
}
return error; // Let the error propagate.
}
public String toString() {
return "NextRow errback";
}
};
}
/**
* Closes this scanner (don't forget to call this when you're done with it!).
* <p>
* Closing a scanner already closed has no effect. The deferred returned
* will be called back immediately.
* @return A deferred object that indicates the completion of the request.
* The {@link Object} has not special meaning and can be {@code null}.
*/
public Deferred<Object> close() {
if (region == null || region == DONE) {
return Deferred.fromResult(null);
}
return client.closeScanner(this).addBoth(closedCallback());
}
/** Callback+Errback invoked when the RegionServer closed our scanner. */
private Callback<Object, Object> closedCallback() {
return new Callback<Object, Object>() {
public Object call(Object arg) {
if (arg instanceof Exception) {
final Exception error = (Exception) arg;
// NotServingRegionException:
// If the region isn't serving, then our scanner is already broken
// somehow, because while it's open it holds a read lock on the
// region, which prevents it from splitting (among other things).
// So if we get this error, our scanner is already dead anyway.
// UnknownScannerException:
// If this region doesn't know anything about this scanner then we
// don't have anything to do to close it!
if (error instanceof NotServingRegionException
|| error instanceof UnknownScannerException) {
if (LOG.isDebugEnabled()) {
LOG.debug("Ignoring exception when closing " + Scanner.this,
error);
}
arg = null; // Clear the error.
} // else: the `return arg' below will propagate the error.
} else if (LOG.isDebugEnabled()) {
LOG.debug("Scanner " + Bytes.hex(scanner_id) + " closed on "
+ region);
}
region = DONE;
scanner_id = 0xDEAD000CC000DEADL; // Make debugging easier.
return arg;
}
public String toString() {
return "scanner closed";
}
};
}
private Deferred<ArrayList<ArrayList<KeyValue>>> scanFinished(final Response resp) {
final byte[] region_stop_key = region.stopKey();
// Check to see if this region is the last we should scan (either
// because (1) it's the last region or (3) because its stop_key is
// greater than or equal to the stop_key of this scanner provided
// that (2) we're not trying to scan until the end of the table).
if (region_stop_key == EMPTY_ARRAY // (1)
|| (stop_key != EMPTY_ARRAY // (2)
&& Bytes.memcmp(stop_key, region_stop_key) <= 0)) { // (3)
get_next_rows_request = null; // free();
families = null; // free();
qualifiers = null; // free();
start_key = stop_key = EMPTY_ARRAY; // free() but mustn't be null.
if (resp != null && !resp.more) {
return null; // The server already closed the scanner for us.
}
return close() // Auto-close the scanner.
.addCallback(new Callback<ArrayList<ArrayList<KeyValue>>, Object>() {
public ArrayList<ArrayList<KeyValue>> call(final Object arg) {
return null; // Tell the user there's nothing more to scan.
}
public String toString() {
return "auto-close scanner " + Bytes.hex(scanner_id);
}
});
}
return continueScanOnNextRegion();
}
/**
* Continues scanning on the next region.
* <p>
* This method is called when we tried to get more rows but we reached the
* end of the current region and need to move on to the next region.
* <p>
* This method closes the scanner on the current region, updates the start
* key of this scanner and resumes scanning on the next region.
* @return The deferred results from the next region.
*/
private Deferred<ArrayList<ArrayList<KeyValue>>> continueScanOnNextRegion() {
// Copy those into local variables so we can still refer to them in the
// "closure" below even after we've changed them.
final long old_scanner_id = scanner_id;
final RegionInfo old_region = region;
if (LOG.isDebugEnabled()) {
LOG.debug("Scanner " + Bytes.hex(old_scanner_id) + " done scanning "
+ old_region);
}
client.closeScanner(this).addCallback(new Callback<Object, Object>() {
public Object call(final Object arg) {
if (LOG.isDebugEnabled()) {
LOG.debug("Scanner " + Bytes.hex(old_scanner_id) + " closed on "
+ old_region);
}
return arg;
}
public String toString() {
return "scanner moved";
}
});
// Continue scanning from the next region's start key.
start_key = region.stopKey();
scanner_id = 0xDEAD000AA000DEADL; // Make debugging easier.
invalidate();
return nextRows();
}
public String toString() {
final String region = this.region == null ? "null"
: this.region == DONE ? "none" : this.region.toString();
final String filter = this.filter == null ? "null"
: this.filter.toString();
int fam_length = 0;
if (families == null) {
fam_length = 4;
} else {
for (byte[] family : families) {
fam_length += family.length + 2 + 2;
}
}
int qual_length = 0;
if (qualifiers == null) {
qual_length = 4;
} else {
for (byte[][] qualifier : qualifiers) {
if (qualifier != null) {
for (byte[] qual : qualifier) {
qual_length += qual.length + 2 + 1;
}
}
}
}
final StringBuilder buf = new StringBuilder(14 + 1 + table.length + 1 + 12
+ 1 + start_key.length + 1 + 11 + 1 + stop_key.length + 1
+ 11 + 1 + fam_length + qual_length + 1
+ 23 + 5 + 15 + 5 + 14 + 6
+ 9 + 1 + region.length() + 1
+ 9 + 1 + filter.length() + 1
+ 13 + 18 + 1);
buf.append("Scanner(table=");
Bytes.pretty(buf, table);
buf.append(", start_key=");
Bytes.pretty(buf, start_key);
buf.append(", stop_key=");
Bytes.pretty(buf, stop_key);
buf.append(", columns={");
familiesToString(buf);
buf.append("}, populate_blockcache=").append(populate_blockcache)
.append(", max_num_rows=").append(max_num_rows)
.append(", max_num_kvs=").append(max_num_kvs)
.append(", region=").append(region)
.append(", filter=").append(filter);
buf.append(", scanner_id=").append(Bytes.hex(scanner_id))
.append(')');
return buf.toString();
}
/** Helper method for {@link toString}. */
private void familiesToString(final StringBuilder buf) {
if (families == null) {
return;
}
for (int i = 0; i < families.length; i++) {
Bytes.pretty(buf, families[i]);
if (qualifiers != null && qualifiers[i] != null) {
buf.append(':');
Bytes.pretty(buf, qualifiers[i]);
}
buf.append(", ");
}
buf.setLength(buf.length() - 2); // Remove the extra ", ".
}
// ---------------------- //
// Package private stuff. //
// ---------------------- //
byte[] table() {
return table;
}
byte[] startKey() {
return start_key;
}
/**
* Sets the name of the region that's hosting {@code this.start_key}.
* @param region The region we're currently supposed to be scanning.
*/
void setRegionName(final RegionInfo region) {
this.region = region;
}
/**
* Invalidates this scanner and makes it assume it's no longer opened.
* When a RegionServer goes away while we're scanning it, or some other type
* of access problem happens, this method should be called so that the
* scanner will have to re-locate the RegionServer and re-open itself.
*/
void invalidate() {
region = null;
}
/**
* Returns the region currently being scanned, if any.
*/
RegionInfo currentRegion() {
return region;
}
/**
* Returns an RPC to fetch the next rows.
*/
HBaseRpc getNextRowsRequest() {
if (get_next_rows_request == null) {
get_next_rows_request = new GetNextRowsRequest();
}
return get_next_rows_request;
}
/**
* Returns an RPC to open this scanner.
*/
HBaseRpc getOpenRequest() {
return new OpenScannerRequest();
}
/**
* Returns an RPC to close this scanner.
*/
HBaseRpc getCloseRequest() {
return new CloseScannerRequest(scanner_id);
}
/**
* Throws an exception if scanning already started.
* @throws IllegalStateException if scanning already started.
*/
private void checkScanningNotStarted() {
if (region != null) {
throw new IllegalStateException("scanning already started");
}
}
/**
* Wraps the RPC response for scan requests from HBase 0.95+.
* When de-serializing a response to a "Scan" RPC from HBase 0.95+ we
* create this temporarily object at the time of the de-serialization
* of the RPC so that our callback can access a few more fields along
* with the actual payload of the response.
*/
final static class Response {
/** The ID associated with the scanner that issued the request. */
private final long scanner_id;
/** The actual payload of the response. */
private final ArrayList<ArrayList<KeyValue>> rows;
/**
* If false, the filter we use decided there was no more data to scan.
* In this case, the server has automatically closed the scanner for us,
* so we don't need to explicitly close it.
*/
private final boolean more;
Response(final long scanner_id,
final ArrayList<ArrayList<KeyValue>> rows,
final boolean more) {
this.scanner_id = scanner_id;
this.rows = rows;
this.more = more;
}
public String toString() {
return "Scanner$Response(scanner_id=" + Bytes.hex(scanner_id)
+ ", rows=" + rows + ", more=" + more + ")";
}
}
/**
* Extracts the rows from the given {@link ScanResponse}.
* @param resp The protobuf of the RPC response.
* @param buf The buffer the response was read from. The actual KeyValues
* of the response will be read from there if cell blocks are in use.
* @param cell_size The number of bytes of the cell block that follows,
* in the buffer.
*/
private ArrayList<ArrayList<KeyValue>> getRows(final ScanResponse resp,
final ChannelBuffer buf,
final int cell_size) {
final int nrows = (cell_size == 0
? resp.getResultsCount()
: resp.getCellsPerResultCount());
if (nrows == 0) {
return null;
}
HBaseRpc.checkArrayLength(buf, nrows);
final ArrayList<ArrayList<KeyValue>> rows =
new ArrayList<ArrayList<KeyValue>>(nrows);
if (cell_size != 0) {
KeyValue kv = null;
for (int i = 0; i < nrows; i++) {
final int nkvs = resp.getCellsPerResult(i);
HBaseRpc.checkArrayLength(buf, nkvs);
final ArrayList<KeyValue> row = new ArrayList<KeyValue>(nkvs);
for (int j = 0; j < nkvs; j++) {
final int kv_length = buf.readInt();
kv = KeyValue.fromBuffer(buf, kv);
row.add(kv);
}
rows.add(row);
}
} else {
for (int i = 0; i < nrows; i++) {
rows.add(GetRequest.convertResult(resp.getResults(i), buf, cell_size));
}
}
return rows;
}
/** RPC method name to use with HBase 0.95+. */
private static final byte[] SCAN = new byte[] { 'S', 'c', 'a', 'n' };
private static final byte[] OPEN_SCANNER = new byte[] {
'o', 'p', 'e', 'n', 'S', 'c', 'a', 'n', 'n', 'e', 'r'
};
/**
* RPC sent out to open a scanner on a RegionServer.
*/
private final class OpenScannerRequest extends HBaseRpc {
public OpenScannerRequest() {
super(Scanner.this.table, start_key);
}
@Override
byte[] method(final byte server_version) {
return (server_version >= RegionClient.SERVER_VERSION_095_OR_ABOVE
? SCAN
: OPEN_SCANNER);
}
/**
* Predicts a lower bound on the serialized size of this RPC.
* This is to avoid using a dynamic buffer, to avoid re-sizing the buffer.
* Since we use a static buffer, if the prediction is wrong and turns out
* to be less than what we need, there will be an exception which will
* prevent the RPC from being serialized. That'd be a severe bug.
*/
private int predictSerializedSize() {
int size = 0;
size += 4; // int: Number of parameters.
size += 1; // byte: Type of the 1st parameter.
size += 3; // vint: region name length (3 bytes => max length = 32768).
size += super.region.name().length; // The region name.
size += 1; // byte: Type of the 2nd parameter.
size += 1; // byte: Type again (see HBASE-2877).
size += 1; // byte: Version of Scan.
size += 3; // vint: start key length (3 bytes => max length = 32768).
size += start_key.length; // The start key.
size += 3; // vint: stop key length (3 bytes => max length = 32768).
size += stop_key.length; // The stop key.
size += 4; // int: Max number of versions to return.
size += 4; // int: Max number of KeyValues to get per RPC.
size += 4; // int: Unused field only used by HBase's client.
size += 1; // bool: Whether or not to populate the blockcache.
size += 1; // byte: Whether or not to use a filter.
if (filter != null) {
size += filter.predictSerializedSize();
}
size += 8; // long: Minimum timestamp.
size += 8; // long: Maximum timestamp.
size += 1; // byte: Boolean: "all time".
size += 4; // int: Number of families.
if (families != null) {
// vint: Family length (guaranteed on 1 byte) for each family.
size += families.length;
for (int i = 0; i < families.length; i++) {
byte[] family = families[i];
size += family.length; // The family.
size += 4; // int: How many qualifiers follow?
if (qualifiers != null && qualifiers[i] != null) {
// vint: Qualifier length times number of qualifiers.
size += 3 * qualifiers[i].length;
for (byte[] qualifier : qualifiers[i]) {
size += qualifier.length; // The qualifier.
}
}
}
}
return size;
}
/** Serializes this request. */
ChannelBuffer serialize(final byte server_version) {
// Save the region in the Scanner. This kind of a kludge but it really
// is the easiest way to give the Scanner the RegionInfo it needs.
Scanner.this.region = super.region;
if (server_version < RegionClient.SERVER_VERSION_095_OR_ABOVE) {
return serializeOld(server_version);
}
final Scan.Builder scan = Scan.newBuilder()
.setStartRow(Bytes.wrap(start_key))
.setStopRow(Bytes.wrap(stop_key));
if (families != null) {
for (int i = 0; i < families.length; i++) {
final Column.Builder columns = Column.newBuilder();
columns.setFamily(Bytes.wrap(families[i]));
if (qualifiers != null && qualifiers[i] != null) {
for (byte[] qualifier : qualifiers[i]) {
columns.addQualifier(Bytes.wrap(qualifier));
}
}
scan.addColumn(columns);
}
}
if (filter != null) {
scan.setFilter(FilterPB.Filter.newBuilder()
.setNameBytes(Bytes.wrap(filter.name()))
.setSerializedFilter(Bytes.wrap(filter.serialize()))
.build());
}
if (min_timestamp != 0 || max_timestamp != Long.MAX_VALUE) {
final TimeRange.Builder time = TimeRange.newBuilder();
if (min_timestamp != 0) {
time.setFrom(min_timestamp);
}
if (max_timestamp != Long.MAX_VALUE) {
time.setTo(max_timestamp);
}
scan.setTimeRange(time.build());
}
if (versions != 1) {
scan.setMaxVersions(versions);
}
if (!populate_blockcache) {
scan.setCacheBlocks(false);
}
if (max_num_kvs > 0) {
scan.setBatchSize(max_num_kvs);
}
scan.setMaxResultSize(max_num_bytes);
final ScanRequest req = ScanRequest.newBuilder()
.setRegion(region.toProtobuf())
.setScan(scan.build())
.setNumberOfRows(max_num_rows)
.build();
return toChannelBuffer(SCAN, req);
}
/** Serializes this request for HBase 0.94 and before. */
private ChannelBuffer serializeOld(final byte server_version) {
final ChannelBuffer buf = newBuffer(server_version,
predictSerializedSize());
buf.writeInt(2); // Number of parameters.
// 1st param: byte array containing region name
writeHBaseByteArray(buf, super.region.name());
// 2nd param: Scan object
buf.writeByte(39); // Code for a `Scan' parameter.
buf.writeByte(39); // Code again (see HBASE-2877).
buf.writeByte(1); // Manual versioning of Scan.
writeByteArray(buf, start_key);
writeByteArray(buf, stop_key);
buf.writeInt(versions); // Max number of versions to return.
// Max number of KeyValues to get per RPC.
buf.writeInt(max_num_kvs);
// Unused field, only used by HBase's client. This value should represent
// how many rows per call the client will fetch, but the server doesn't
// care about this value, neither do we, because we use a different API.
buf.writeInt(0xDEADBA5E);
// Whether or not to populate the blockcache.
buf.writeByte(populate_blockcache ? 0x01 : 0x00);
if (filter == null) {
buf.writeByte(0x00); // boolean (false): don't use a filter.
} else {
buf.writeByte(0x01); // boolean (true): use a filter.
filter.serializeOld(buf);
}
// TimeRange
buf.writeLong(min_timestamp); // Minimum timestamp.
buf.writeLong(max_timestamp); // Maximum timestamp.
// Boolean: "all time".
buf.writeByte(min_timestamp != 0 || max_timestamp != Long.MAX_VALUE
? 0x00 : 0x01);
// Families.
if (families != null) {
buf.writeInt(families.length); // Number of families that follow.
for (int i = 0; i < families.length; i++) {
// Each family is then written like so:
writeByteArray(buf, families[i]); // Column family name.
if (qualifiers != null && qualifiers[i] != null) {
buf.writeInt(qualifiers[i].length); // How many qualifiers do we want?
for (byte[] qualifier : qualifiers[i]) {
writeByteArray(buf, qualifier); // Column qualifier name.
}
} else {
buf.writeInt(0); // No qualifiers.
}
}
} else {
buf.writeInt(0); // No families.
}
return buf;
}
@Override
Response deserialize(final ChannelBuffer buf, final int cell_size) {
final ScanResponse resp = readProtobuf(buf, ScanResponse.PARSER);
if (!resp.hasScannerId()) {
throw new InvalidResponseException("Scan RPC response doesn't contain a"
+ " scanner ID", resp);
}
return new Response(resp.getScannerId(),
getRows(resp, buf, cell_size),
resp.getMoreResults());
}
public String toString() {
return "OpenScannerRequest(scanner=" + Scanner.this.toString() + ')';
}
}
private static final byte[] NEXT = new byte[] { 'n', 'e', 'x', 't' };
/**
* RPC sent out to fetch the next rows from the RegionServer.
*/
private final class GetNextRowsRequest extends HBaseRpc {
@Override
byte[] method(final byte server_version) {
return (server_version >= RegionClient.SERVER_VERSION_095_OR_ABOVE
? SCAN
: NEXT); // "next"... Great method name!
}
/** Serializes this request. */
ChannelBuffer serialize(final byte server_version) {
if (server_version < RegionClient.SERVER_VERSION_095_OR_ABOVE) {
final ChannelBuffer buf = newBuffer(server_version,
4 + 1 + 8 + 1 + 4);
buf.writeInt(2); // Number of parameters.
writeHBaseLong(buf, scanner_id);
writeHBaseInt(buf, max_num_rows);
return buf;
}
final ScanRequest req = ScanRequest.newBuilder()
.setScannerId(scanner_id)
.setNumberOfRows(max_num_rows)
.build();
return toChannelBuffer(SCAN, req);
}
@Override
Response deserialize(final ChannelBuffer buf, final int cell_size) {
final ScanResponse resp = readProtobuf(buf, ScanResponse.PARSER);
final long id = resp.getScannerId();
if (scanner_id != id) {
throw new InvalidResponseException("Scan RPC response was for scanner"
+ " ID " + id + " but we expected"
+ scanner_id, resp);
}
final ArrayList<ArrayList<KeyValue>> rows = getRows(resp, buf, cell_size);
if (rows == null) {
return null;
}
return new Response(resp.getScannerId(), rows, resp.getMoreResults());
}
public String toString() {
return "GetNextRowsRequest(scanner_id=" + Bytes.hex(scanner_id)
+ ", max_num_rows=" + max_num_rows
+ ", region=" + region
+ ", attempt=" + attempt + ')';
}
}
/**
* RPC sent out to close a scanner on a RegionServer.
*/
private static final class CloseScannerRequest extends HBaseRpc {
private static final byte[] CLOSE = new byte[] { 'c', 'l', 'o', 's', 'e' };
private final long scanner_id;
public CloseScannerRequest(final long scanner_id) {
this.scanner_id = scanner_id;
}
@Override
byte[] method(final byte server_version) {
return (server_version >= RegionClient.SERVER_VERSION_095_OR_ABOVE
? SCAN
: CLOSE); // "close"... Great method name!
}
/** Serializes this request. */
ChannelBuffer serialize(final byte server_version) {
if (server_version < RegionClient.SERVER_VERSION_095_OR_ABOVE) {
final ChannelBuffer buf = newBuffer(server_version,
4 + 1 + 8);
buf.writeInt(1); // Number of parameters.
writeHBaseLong(buf, scanner_id);
return buf;
}
final ScanRequest req = ScanRequest.newBuilder()
.setScannerId(scanner_id)
.setCloseScanner(true)
.setNumberOfRows(0)
.build();
return toChannelBuffer(SCAN, req);
}
@Override
Object deserialize(final ChannelBuffer buf, final int cell_size) {
HBaseRpc.ensureNoCell(cell_size);
final ScanResponse resp = readProtobuf(buf, ScanResponse.PARSER);
final long id = resp.getScannerId();
if (scanner_id != id) {
throw new InvalidResponseException("Scan RPC response was for scanner"
+ " ID " + id + " but we expected"
+ scanner_id, resp);
}
return null;
}
public String toString() {
return "CloseScannerRequest(scanner_id=" + Bytes.hex(scanner_id)
+ ", attempt=" + attempt + ')';
}
}
}