/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.client; import java.io.IOException; import java.lang.InterruptedException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Put; import org.apache.hadoop.hbase.client.Row; /** * Utility class for HTable. * * */ @InterfaceAudience.Public @InterfaceStability.Stable public class HTableUtil { private static final int INITIAL_LIST_SIZE = 250; /** * Processes a List of Puts and writes them to an HTable instance in RegionServer buckets via the htable.put method. * This will utilize the writeBuffer, thus the writeBuffer flush frequency may be tuned accordingly via htable.setWriteBufferSize. * <br><br> * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs in each flush. * <br><br> * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. * <br> * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. * <br> * Assumption #3: That the input list of Puts is big enough to be useful (in the thousands or more). The intent of this * method is to process larger chunks of data. * <br> * Assumption #4: htable.setAutoFlush(false) has been set. This is a requirement to use the writeBuffer. * <br><br> * @param htable HTable instance for target HBase table * @param puts List of Put instances * @throws IOException if a remote or network exception occurs * */ public static void bucketRsPut(HTable htable, List<Put> puts) throws IOException { Map<String, List<Put>> putMap = createRsPutMap(htable, puts); for (List<Put> rsPuts: putMap.values()) { htable.put( rsPuts ); } htable.flushCommits(); } /** * Processes a List of Rows (Put, Delete) and writes them to an HTable instance in RegionServer buckets via the htable.batch method. * <br><br> * The benefit of submitting Puts in this manner is to minimize the number of RegionServer RPCs, thus this will * produce one RPC of Puts per RegionServer. * <br><br> * Assumption #1: Regions have been pre-created for the table. If they haven't, then all of the Puts will go to the same region, * defeating the purpose of this utility method. See the Apache HBase book for an explanation of how to do this. * <br> * Assumption #2: Row-keys are not monotonically increasing. See the Apache HBase book for an explanation of this problem. * <br> * Assumption #3: That the input list of Rows is big enough to be useful (in the thousands or more). The intent of this * method is to process larger chunks of data. * <br><br> * This method accepts a list of Row objects because the underlying .batch method accepts a list of Row objects. * <br><br> * @param htable HTable instance for target HBase table * @param rows List of Row instances * @throws IOException if a remote or network exception occurs */ public static void bucketRsBatch(HTable htable, List<Row> rows) throws IOException { try { Map<String, List<Row>> rowMap = createRsRowMap(htable, rows); for (List<Row> rsRows: rowMap.values()) { htable.batch( rsRows ); } } catch (InterruptedException e) { throw new IOException(e); } } private static Map<String,List<Put>> createRsPutMap(HTable htable, List<Put> puts) throws IOException { Map<String, List<Put>> putMap = new HashMap<String, List<Put>>(); for (Put put: puts) { HRegionLocation rl = htable.getRegionLocation( put.getRow() ); String hostname = rl.getHostname(); List<Put> recs = putMap.get( hostname); if (recs == null) { recs = new ArrayList<Put>(INITIAL_LIST_SIZE); putMap.put( hostname, recs); } recs.add(put); } return putMap; } private static Map<String,List<Row>> createRsRowMap(HTable htable, List<Row> rows) throws IOException { Map<String, List<Row>> rowMap = new HashMap<String, List<Row>>(); for (Row row: rows) { HRegionLocation rl = htable.getRegionLocation( row.getRow() ); String hostname = rl.getHostname(); List<Row> recs = rowMap.get( hostname); if (recs == null) { recs = new ArrayList<Row>(INITIAL_LIST_SIZE); rowMap.put( hostname, recs); } recs.add(row); } return rowMap; } }