ParallelHTable.java example

Explorer
hsearch-obsolete-master
- src
/**
 * Copyright 2009 The Apache Software Foundation
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.client;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.ServerCallable;
import org.apache.hadoop.hbase.filter.Filter;

import java.io.IOException;
import java.util.concurrent.ExecutorService;

/**
 * {@inheritDoc}
 * <p/>
 * This extension of the {@link HTable} class provides a mechanism to initiate
 * and run scanners for each region in parallel. This is achieved using the a
 * {@link ExecutorService} and a {@link ServerCallable} for each region. Each
 * callable in responsible for fetching 'hbase.client.scanner.caching'* rows
 * from its region per invocation.
 * <p/>
 * In order to limit the number of rows pulled into memory on the client the
 * region callable is not resubmitted to the {@link ExecutorService} until it's
 * previous set of results has been consumed. As a result the order of the rows
 * will not necessarily be in key order. This has several side effects;
 * <ul>
 * <li>The provided {@link Scan} cannot specify a
 * {@link Scan#setStartRow(byte[]) start row} or a
 * {@link Scan#setStopRow(byte[]) stop row}.
 * <li>The provided {@link Scan#getFilter()} cannot abort the result set
 * processing using the {@link Filter#filterAllRemaining()} method.
 * </ul>
 * <p/>
 ** If the 'hbase.client.scanner.caching' results in 1 then the value is over
 * written with {@link #DEFAULT_SCANNER_CACHING} value.
 */
public class ParallelHTable extends HTable {
  /**
   * Default scanner caching value.
   */
  public static final int DEFAULT_SCANNER_CACHING = 2000;

  private ExecutorService executorService;

  /**
   * Constructor.
   * 
   * @param tableName the table name
   * @param executorService the executor service
   * 
   * @throws IOException
   *           if an error occurs
   */
  public ParallelHTable(String tableName, ExecutorService executorService)
      throws IOException {
    super(tableName);
    this.executorService = executorService;
  }

  /**
   * Constructor.
   * 
   * @param tableName the table name
   * @param executorService the executor service
   * 
   * @throws IOException if an error occurs
   */
  public ParallelHTable(byte[] tableName, ExecutorService executorService)
      throws IOException {
    super(tableName);
    this.executorService = executorService;
  }

  /**
   * Constructor.
   * 
   * @param conf the config
   * @param tableName the table name
   * @param executorService the executor service
   * 
   * @throws IOException if an error occurs
   */
  public ParallelHTable(HBaseConfiguration conf, String tableName,
      ExecutorService executorService) throws IOException {
    super(conf, tableName);
    this.executorService = executorService;
  }

  /**
   * Constructor.
   * 
   * @param conf the config
   * @param tableName the table name
   * @param executorService the executor service
   * 
   * @throws IOException if an error occurs
   */
  public ParallelHTable(Configuration conf, byte[] tableName,
      ExecutorService executorService) throws IOException {
    super(conf, tableName);
    this.executorService = executorService;
  }

  /**
   * Get a scanner on the current table as specified by the {@link Scan} object.
   * Also note that if the {@link ParallelClientScanner} is used then region
   * splits will NOT be handled. An NotServingRegionException will be thrown and
   * the query should be re-tried by the client.
   * 
   * @param scan a configured {@link Scan} object
   * @param scanInParallel if true multiple thread will be used to perform the scan
   * @return scanner
   * 
   * @throws IOException if an error occurs
   */
  public ResultScanner getScanner(final Scan scan, boolean scanInParallel) throws IOException {
    if (scanInParallel) {
      return new ParallelClientScanner(this, scan, defaultScannerCaching());
    } else {
      return super.getScanner(scan);
    }
  }

  /**
   * The default scanner caching (pre-fetch count in our code) is set to 1 in
   * HTable. That's not really suitable for the parallel scanner, so instead we
   * use {@link #DEFAULT_SCANNER_CACHING}.
   * 
   * @return the value of {@link HTable#scannerCaching} if it's not set to 1,
   *         otherwise {@link #DEFAULT_SCANNER_CACHING}
   */
  private int defaultScannerCaching() {
    return (super.scannerCaching != 1 ? super.scannerCaching
        : DEFAULT_SCANNER_CACHING);
  }

  /**
   * Returns the {@link java.util.concurrent.ExecutorService} used to process
   * the parallel region scans.
   * 
   * @return the executor service
   */
  public ExecutorService getExecutorService() {
    return executorService;
  }
}