/** * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.client; import java.io.IOException; import java.io.InterruptedIOException; import java.util.LinkedList; import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellUtil; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory; import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; import org.apache.hadoop.hbase.ipc.RpcControllerFactory; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos; import org.apache.hadoop.hbase.regionserver.RegionServerStoppedException; import org.apache.hadoop.hbase.util.Bytes; /** * Implements the scanner interface for the HBase client. * If there are multiple regions in a table, this scanner will iterate * through them all. */ @InterfaceAudience.Private public class ClientScanner extends AbstractClientScanner { private final Log LOG = LogFactory.getLog(this.getClass()); protected Scan scan; protected boolean closed = false; // Current region scanner is against. Gets cleared if current region goes // wonky: e.g. if it splits on us. protected HRegionInfo currentRegion = null; protected ScannerCallableWithReplicas callable = null; protected final LinkedList<Result> cache = new LinkedList<Result>(); protected final int caching; protected long lastNext; // Keep lastResult returned successfully in case we have to reset scanner. protected Result lastResult = null; protected final long maxScannerResultSize; private final ClusterConnection connection; private final TableName tableName; protected final int scannerTimeout; protected boolean scanMetricsPublished = false; protected RpcRetryingCaller<Result []> caller; protected RpcControllerFactory rpcControllerFactory; protected Configuration conf; //The timeout on the primary. Applicable if there are multiple replicas for a region //In that case, we will only wait for this much timeout on the primary before going //to the replicas and trying the same scan. Note that the retries will still happen //on each replica and the first successful results will be taken. A timeout of 0 is //disallowed. protected final int primaryOperationTimeout; private int retries; protected final ExecutorService pool; /** * Create a new ClientScanner for the specified table Note that the passed {@link Scan}'s start * row maybe changed changed. * @param conf The {@link Configuration} to use. * @param scan {@link Scan} to use in this scanner * @param tableName The table that we wish to scan * @param connection Connection identifying the cluster * @throws IOException */ public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName, ClusterConnection connection, RpcRetryingCallerFactory rpcFactory, RpcControllerFactory controllerFactory, ExecutorService pool, int primaryOperationTimeout) throws IOException { if (LOG.isTraceEnabled()) { LOG.trace("Scan table=" + tableName + ", startRow=" + Bytes.toStringBinary(scan.getStartRow())); } this.scan = scan; this.tableName = tableName; this.lastNext = System.currentTimeMillis(); this.connection = connection; this.pool = pool; this.primaryOperationTimeout = primaryOperationTimeout; this.retries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); if (scan.getMaxResultSize() > 0) { this.maxScannerResultSize = scan.getMaxResultSize(); } else { this.maxScannerResultSize = conf.getLong( HConstants.HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE_KEY, HConstants.DEFAULT_HBASE_CLIENT_SCANNER_MAX_RESULT_SIZE); } this.scannerTimeout = HBaseConfiguration.getInt(conf, HConstants.HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD, HConstants.HBASE_REGIONSERVER_LEASE_PERIOD_KEY, HConstants.DEFAULT_HBASE_CLIENT_SCANNER_TIMEOUT_PERIOD); // check if application wants to collect scan metrics initScanMetrics(scan); // Use the caching from the Scan. If not set, use the default cache setting for this table. if (this.scan.getCaching() > 0) { this.caching = this.scan.getCaching(); } else { this.caching = conf.getInt( HConstants.HBASE_CLIENT_SCANNER_CACHING, HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING); } this.caller = rpcFactory.<Result[]> newCaller(); this.rpcControllerFactory = controllerFactory; this.conf = conf; initializeScannerInConstruction(); } protected void initializeScannerInConstruction() throws IOException{ // initialize the scanner nextScanner(this.caching, false); } protected ClusterConnection getConnection() { return this.connection; } /** * @return Table name * @deprecated Since 0.96.0; use {@link #getTable()} */ @Deprecated protected byte [] getTableName() { return this.tableName.getName(); } protected TableName getTable() { return this.tableName; } protected int getRetries() { return this.retries; } protected int getScannerTimeout() { return this.scannerTimeout; } protected Configuration getConf() { return this.conf; } protected Scan getScan() { return scan; } protected ExecutorService getPool() { return pool; } protected int getPrimaryOperationTimeout() { return primaryOperationTimeout; } protected int getCaching() { return caching; } protected long getTimestamp() { return lastNext; } // returns true if the passed region endKey protected boolean checkScanStopRow(final byte [] endKey) { if (this.scan.getStopRow().length > 0) { // there is a stop row, check to see if we are past it. byte [] stopRow = scan.getStopRow(); int cmp = Bytes.compareTo(stopRow, 0, stopRow.length, endKey, 0, endKey.length); if (cmp <= 0) { // stopRow <= endKey (endKey is equals to or larger than stopRow) // This is a stop. return true; } } return false; //unlikely. } private boolean possiblyNextScanner(int nbRows, final boolean done) throws IOException { // If we have just switched replica, don't go to the next scanner yet. Rather, try // the scanner operations on the new replica, from the right point in the scan // Note that when we switched to a different replica we left it at a point // where we just did the "openScanner" with the appropriate startrow if (callable != null && callable.switchedToADifferentReplica()) return true; return nextScanner(nbRows, done); } /* * Gets a scanner for the next region. If this.currentRegion != null, then * we will move to the endrow of this.currentRegion. Else we will get * scanner at the scan.getStartRow(). We will go no further, just tidy * up outstanding scanners, if <code>currentRegion != null</code> and * <code>done</code> is true. * @param nbRows * @param done Server-side says we're done scanning. */ protected boolean nextScanner(int nbRows, final boolean done) throws IOException { // Close the previous scanner if it's open if (this.callable != null) { this.callable.setClose(); call(scan, callable, caller, scannerTimeout); this.callable = null; } // Where to start the next scanner byte [] localStartKey; // if we're at end of table, close and return false to stop iterating if (this.currentRegion != null) { byte [] endKey = this.currentRegion.getEndKey(); if (endKey == null || Bytes.equals(endKey, HConstants.EMPTY_BYTE_ARRAY) || checkScanStopRow(endKey) || done) { close(); if (LOG.isTraceEnabled()) { LOG.trace("Finished " + this.currentRegion); } return false; } localStartKey = endKey; if (LOG.isTraceEnabled()) { LOG.trace("Finished " + this.currentRegion); } } else { localStartKey = this.scan.getStartRow(); } if (LOG.isDebugEnabled() && this.currentRegion != null) { // Only worth logging if NOT first region in scan. LOG.debug("Advancing internal scanner to startKey at '" + Bytes.toStringBinary(localStartKey) + "'"); } try { callable = getScannerCallable(localStartKey, nbRows); // Open a scanner on the region server starting at the // beginning of the region call(scan, callable, caller, scannerTimeout); this.currentRegion = callable.getHRegionInfo(); if (this.scanMetrics != null) { this.scanMetrics.countOfRegions.incrementAndGet(); } } catch (IOException e) { close(); throw e; } return true; } static Result[] call(Scan scan, ScannerCallableWithReplicas callable, RpcRetryingCaller<Result[]> caller, int scannerTimeout) throws IOException, RuntimeException { if (Thread.interrupted()) { throw new InterruptedIOException(); } // callWithoutRetries is at this layer. Within the ScannerCallableWithReplicas, // we do a callWithRetries return caller.callWithoutRetries(callable, scannerTimeout); } //获取一个新的ScannerCallableWithReplicas @InterfaceAudience.Private protected ScannerCallableWithReplicas getScannerCallable(byte [] localStartKey, int nbRows) { scan.setStartRow(localStartKey); ScannerCallable s = new ScannerCallable(getConnection(), getTable(), scan, this.scanMetrics, this.rpcControllerFactory); s.setCaching(nbRows); ScannerCallableWithReplicas sr = new ScannerCallableWithReplicas(tableName, getConnection(), s, pool, primaryOperationTimeout, scan, retries, scannerTimeout, caching, conf, caller); return sr; } /** * Publish the scan metrics. For now, we use scan.setAttribute to pass the metrics back to the * application or TableInputFormat.Later, we could push it to other systems. We don't use metrics * framework because it doesn't support multi-instances of the same metrics on the same machine; * for scan/map reduce scenarios, we will have multiple scans running at the same time. * * By default, scan metrics are disabled; if the application wants to collect them, this * behavior can be turned on by calling calling {@link Scan#setScanMetricsEnabled(boolean)} * * <p>This invocation clears the scan metrics. Metrics are aggregated in the Scan instance. */ protected void writeScanMetrics() { if (this.scanMetrics == null || scanMetricsPublished) { return; } MapReduceProtos.ScanMetrics pScanMetrics = ProtobufUtil.toScanMetrics(scanMetrics); scan.setAttribute(Scan.SCAN_ATTRIBUTES_METRICS_DATA, pScanMetrics.toByteArray()); scanMetricsPublished = true; } @Override public Result next() throws IOException { // If the scanner is closed and there's nothing left in the cache, next is a no-op. if (cache.size() == 0 && this.closed) { return null; } if (cache.size() == 0) { Result [] values = null; long remainingResultSize = maxScannerResultSize; int countdown = this.caching; // We need to reset it if it's a new callable that was created // with a countdown in nextScanner callable.setCaching(this.caching); // This flag is set when we want to skip the result returned. We do // this when we reset scanner because it split under us. boolean skipFirst = false; boolean retryAfterOutOfOrderException = true; do { try { if (skipFirst) { // Skip only the first row (which was the last row of the last // already-processed batch). callable.setCaching(1); values = call(scan, callable, caller, scannerTimeout); // When the replica switch happens, we need to do certain operations // again. The scannercallable will openScanner with the right startkey // but we need to pick up from there. Bypass the rest of the loop // and let the catch-up happen in the beginning of the loop as it // happens for the cases where we see exceptions. Since only openScanner // would have happened, values would be null if (values == null && callable.switchedToADifferentReplica()) { if (this.lastResult != null) { //only skip if there was something read earlier skipFirst = true; } this.currentRegion = callable.getHRegionInfo(); continue; } callable.setCaching(this.caching); skipFirst = false; } // Server returns a null values if scanning is to stop. Else, // returns an empty array if scanning is to go on and we've just // exhausted current region. values = call(scan, callable, caller, scannerTimeout); if (skipFirst && values != null && values.length == 1) { skipFirst = false; // Already skipped, unset it before scanning again values = call(scan, callable, caller, scannerTimeout); } // When the replica switch happens, we need to do certain operations // again. The callable will openScanner with the right startkey // but we need to pick up from there. Bypass the rest of the loop // and let the catch-up happen in the beginning of the loop as it // happens for the cases where we see exceptions. Since only openScanner // would have happened, values would be null if (values == null && callable.switchedToADifferentReplica()) { if (this.lastResult != null) { //only skip if there was something read earlier skipFirst = true; } this.currentRegion = callable.getHRegionInfo(); continue; } retryAfterOutOfOrderException = true; } catch (DoNotRetryIOException e) { if (e instanceof UnknownScannerException) { long timeout = lastNext + scannerTimeout; // If we are over the timeout, throw this exception to the client wrapped in // a ScannerTimeoutException. Else, it's because the region moved and we used the old // id against the new region server; reset the scanner. if (timeout < System.currentTimeMillis()) { long elapsed = System.currentTimeMillis() - lastNext; ScannerTimeoutException ex = new ScannerTimeoutException( elapsed + "ms passed since the last invocation, " + "timeout is currently set to " + scannerTimeout); ex.initCause(e); throw ex; } } else { // If exception is any but the list below throw it back to the client; else setup // the scanner and retry. Throwable cause = e.getCause(); if ((cause != null && cause instanceof NotServingRegionException) || (cause != null && cause instanceof RegionServerStoppedException) || e instanceof OutOfOrderScannerNextException) { // Pass // It is easier writing the if loop test as list of what is allowed rather than // as a list of what is not allowed... so if in here, it means we do not throw. } else { throw e; } } // Else, its signal from depths of ScannerCallable that we need to reset the scanner. if (this.lastResult != null) { // The region has moved. We need to open a brand new scanner at // the new location. // Reset the startRow to the row we've seen last so that the new // scanner starts at the correct row. Otherwise we may see previously // returned rows again. // (ScannerCallable by now has "relocated" the correct region) this.scan.setStartRow(this.lastResult.getRow()); // Skip first row returned. We already let it out on previous // invocation. skipFirst = true; } if (e instanceof OutOfOrderScannerNextException) { if (retryAfterOutOfOrderException) { retryAfterOutOfOrderException = false; } else { // TODO: Why wrap this in a DNRIOE when it already is a DNRIOE? throw new DoNotRetryIOException("Failed after retry of " + "OutOfOrderScannerNextException: was there a rpc timeout?", e); } } // Clear region. this.currentRegion = null; // Set this to zero so we don't try and do an rpc and close on remote server when // the exception we got was UnknownScanner or the Server is going down. callable = null; // This continue will take us to while at end of loop where we will set up new scanner. continue; } long currentTime = System.currentTimeMillis(); if (this.scanMetrics != null ) { this.scanMetrics.sumOfMillisSecBetweenNexts.addAndGet(currentTime-lastNext); } lastNext = currentTime; if (values != null && values.length > 0) { for (Result rs : values) { cache.add(rs); // We don't make Iterator here for (Cell cell : rs.rawCells()) { remainingResultSize -= CellUtil.estimatedHeapSizeOf(cell); } countdown--; this.lastResult = rs; } } // Values == null means server-side filter has determined we must STOP } while (remainingResultSize > 0 && countdown > 0 && possiblyNextScanner(countdown, values == null)); } if (cache.size() > 0) { return cache.poll(); } // if we exhausted this scanner before calling close, write out the scan metrics writeScanMetrics(); return null; } @Override public void close() { if (!scanMetricsPublished) writeScanMetrics(); if (callable != null) { callable.setClose(); try { call(scan, callable, caller, scannerTimeout); } catch (UnknownScannerException e) { // We used to catch this error, interpret, and rethrow. However, we // have since decided that it's not nice for a scanner's close to // throw exceptions. Chances are it was just due to lease time out. } catch (IOException e) { /* An exception other than UnknownScanner is unexpected. */ LOG.warn("scanner failed to close. Exception follows: " + e); } callable = null; } closed = true; } }