/** * Copyright 2009 The Apache Software Foundation * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.client; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.ResultScanner; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.client.ServerCallable; import org.apache.hadoop.hbase.filter.Filter; import java.io.IOException; import java.util.concurrent.ExecutorService; /** * {@inheritDoc} * <p/> * This extension of the {@link HTable} class provides a mechanism to initiate * and run scanners for each region in parallel. This is achieved using the a * {@link ExecutorService} and a {@link ServerCallable} for each region. Each * callable in responsible for fetching 'hbase.client.scanner.caching'* rows * from its region per invocation. * <p/> * In order to limit the number of rows pulled into memory on the client the * region callable is not resubmitted to the {@link ExecutorService} until it's * previous set of results has been consumed. As a result the order of the rows * will not necessarily be in key order. This has several side effects; * <ul> * <li>The provided {@link Scan} cannot specify a * {@link Scan#setStartRow(byte[]) start row} or a * {@link Scan#setStopRow(byte[]) stop row}. * <li>The provided {@link Scan#getFilter()} cannot abort the result set * processing using the {@link Filter#filterAllRemaining()} method. * </ul> * <p/> ** If the 'hbase.client.scanner.caching' results in 1 then the value is over * written with {@link #DEFAULT_SCANNER_CACHING} value. */ public class ParallelHTable extends HTable { /** * Default scanner caching value. */ public static final int DEFAULT_SCANNER_CACHING = 2000; private ExecutorService executorService; /** * Constructor. * * @param tableName the table name * @param executorService the executor service * * @throws IOException * if an error occurs */ public ParallelHTable(String tableName, ExecutorService executorService) throws IOException { super(tableName); this.executorService = executorService; } /** * Constructor. * * @param tableName the table name * @param executorService the executor service * * @throws IOException if an error occurs */ public ParallelHTable(byte[] tableName, ExecutorService executorService) throws IOException { super(tableName); this.executorService = executorService; } /** * Constructor. * * @param conf the config * @param tableName the table name * @param executorService the executor service * * @throws IOException if an error occurs */ public ParallelHTable(HBaseConfiguration conf, String tableName, ExecutorService executorService) throws IOException { super(conf, tableName); this.executorService = executorService; } /** * Constructor. * * @param conf the config * @param tableName the table name * @param executorService the executor service * * @throws IOException if an error occurs */ public ParallelHTable(Configuration conf, byte[] tableName, ExecutorService executorService) throws IOException { super(conf, tableName); this.executorService = executorService; } /** * Get a scanner on the current table as specified by the {@link Scan} object. * Also note that if the {@link ParallelClientScanner} is used then region * splits will NOT be handled. An NotServingRegionException will be thrown and * the query should be re-tried by the client. * * @param scan a configured {@link Scan} object * @param scanInParallel if true multiple thread will be used to perform the scan * @return scanner * * @throws IOException if an error occurs */ public ResultScanner getScanner(final Scan scan, boolean scanInParallel) throws IOException { if (scanInParallel) { return new ParallelClientScanner(this, scan, defaultScannerCaching()); } else { return super.getScanner(scan); } } /** * The default scanner caching (pre-fetch count in our code) is set to 1 in * HTable. That's not really suitable for the parallel scanner, so instead we * use {@link #DEFAULT_SCANNER_CACHING}. * * @return the value of {@link HTable#scannerCaching} if it's not set to 1, * otherwise {@link #DEFAULT_SCANNER_CACHING} */ private int defaultScannerCaching() { return (super.scannerCaching != 1 ? super.scannerCaching : DEFAULT_SCANNER_CACHING); } /** * Returns the {@link java.util.concurrent.ExecutorService} used to process * the parallel region scans. * * @return the executor service */ public ExecutorService getExecutorService() { return executorService; } }