/* * Copyright © 2015-2016 Cask Data, Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package co.cask.cdap.data.tools; import co.cask.cdap.common.conf.CConfiguration; import co.cask.cdap.common.guice.ConfigModule; import co.cask.cdap.common.guice.DiscoveryRuntimeModule; import co.cask.cdap.common.guice.IOModule; import co.cask.cdap.common.guice.KafkaClientModule; import co.cask.cdap.common.guice.LocationRuntimeModule; import co.cask.cdap.common.guice.ZKClientModule; import co.cask.cdap.data.runtime.DataFabricModules; import co.cask.cdap.data.runtime.DataSetsModules; import co.cask.cdap.data.stream.StreamAdminModules; import co.cask.cdap.data.view.ViewAdminModules; import co.cask.cdap.explore.guice.ExploreClientModule; import co.cask.cdap.logging.guice.LoggingModules; import co.cask.cdap.metrics.guice.MetricsClientRuntimeModule; import co.cask.cdap.notifications.feeds.guice.NotificationFeedServiceRuntimeModule; import co.cask.tephra.Transaction; import co.cask.tephra.TransactionCodec; import co.cask.tephra.TransactionSystemClient; import co.cask.tephra.TxConstants; import co.cask.tephra.distributed.TransactionService; import com.google.common.util.concurrent.Service; import com.google.inject.Guice; import com.google.inject.Injector; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.hadoop.hbase.mapreduce.HFileOutputFormat2; import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil; import org.apache.hadoop.hbase.mapreduce.TableMapper; import org.apache.hadoop.mapreduce.Job; import org.apache.twill.zookeeper.ZKClientService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.io.IOException; import java.util.Random; /** * Tool to export the HBase table to HFiles. Tool accepts the HBase table name as input parameter and outputs * the HDFS path where the corresponding HFiles are exported. */ public class HBaseTableExporter { private static final Logger LOG = LoggerFactory.getLogger(HBaseTableExporter.class); private final Configuration hConf; private final TransactionService txService; private final ZKClientService zkClientService; private final TransactionSystemClient txClient; private Path bulkloadDir = null; public HBaseTableExporter() throws Exception { this.hConf = HBaseConfiguration.create(); Injector injector = createInjector(CConfiguration.create(), hConf); this.txClient = injector.getInstance(TransactionSystemClient.class); this.txService = injector.getInstance(TransactionService.class); this.zkClientService = injector.getInstance(ZKClientService.class); Runtime.getRuntime().addShutdownHook(new Thread() { @Override public void run() { try { HBaseTableExporter.this.stop(); } catch (Throwable e) { LOG.error("Failed to stop the tool.", e); } } }); } private static Injector createInjector(CConfiguration cConf, Configuration hConf) { return Guice.createInjector( new ConfigModule(cConf, hConf), new IOModule(), new ZKClientModule(), new KafkaClientModule(), new LocationRuntimeModule().getDistributedModules(), new DiscoveryRuntimeModule().getDistributedModules(), new DataFabricModules().getDistributedModules(), new DataSetsModules().getDistributedModules(), new MetricsClientRuntimeModule().getDistributedModules(), new LoggingModules().getDistributedModules(), new ExploreClientModule(), new ViewAdminModules().getDistributedModules(), new StreamAdminModules().getDistributedModules(), new NotificationFeedServiceRuntimeModule().getDistributedModules() ); } /** * A mapper that just writes KeyValues. */ static class KeyValueImporter extends TableMapper<ImmutableBytesWritable, KeyValue> { @Override public void map(ImmutableBytesWritable row, Result value, Context context) throws IOException, InterruptedException { for (Cell kv : value.rawCells()) { context.write(row, KeyValueUtil.ensureKeyValue(kv)); } } } /** * Sets up the actual MapReduce job. * @param tx The transaction which needs to be passed to the Scan instance. This transaction is be used by * coprocessors to filter out the data corresonding to the invalid transactions . * @param tableName Name of the table which need to be exported as HFiles. * @return the configured job * @throws IOException */ public Job createSubmittableJob(Transaction tx, String tableName) throws IOException { Job job = Job.getInstance(hConf, "HBaseTableExporter"); job.setJarByClass(HBaseTableExporter.class); Scan scan = new Scan(); scan.setCacheBlocks(false); // Set the transaction attribute for the scan. scan.setAttribute(TxConstants.TX_OPERATION_ATTRIBUTE_KEY, new TransactionCodec().encode(tx)); job.setNumReduceTasks(0); TableMapReduceUtil.initTableMapperJob(tableName, scan, KeyValueImporter.class, null, null, job); FileSystem fs = FileSystem.get(hConf); Random rand = new Random(); Path root = new Path(fs.getWorkingDirectory(), "hbasetableexporter"); fs.mkdirs(root); while (true) { bulkloadDir = new Path(root, "" + rand.nextLong()); if (!fs.exists(bulkloadDir)) { break; } } HFileOutputFormat2.setOutputPath(job, bulkloadDir); HTable hTable = new HTable(hConf, tableName); HFileOutputFormat2.configureIncrementalLoad(job, hTable); return job; } private void startUp() throws Exception { zkClientService.startAndWait(); txService.startAndWait(); } /** * Stops a guava {@link Service}. No exception will be thrown even stopping failed. */ private void stopQuietly(Service service) { try { service.stopAndWait(); } catch (Exception e) { LOG.warn("Exception when stopping service {}", service, e); } } private void stop() throws Exception { stopQuietly(txService); stopQuietly(zkClientService); } private void printHelp() { System.out.println(); System.out.println("Usage: /opt/cdap/master/bin/svc-master " + "run co.cask.cdap.data.tools.HBaseTableExporter <tablename>"); System.out.println("Args:"); System.out.println(" tablename Name of the table to copy"); } public void doMain(String[] args) throws Exception { if (args.length < 1) { printHelp(); return; } String tableName = args[0]; try { startUp(); Transaction tx = txClient.startLong(); Job job = createSubmittableJob(tx, tableName); if (!job.waitForCompletion(true)) { LOG.info("MapReduce job failed!"); throw new RuntimeException("Failed to run the MapReduce job."); } // Always commit the transaction, since we are not doing any data update // operation in this tool. txClient.commit(tx); System.out.println("Export operation complete. HFiles are stored at location " + bulkloadDir.toString()); } finally { stop(); } } public static void main(String[] args) throws Exception { try { HBaseTableExporter hBaseTableExporter = new HBaseTableExporter(); hBaseTableExporter.doMain(args); } catch (Throwable t) { LOG.error("Failed to export the HBase table.", t); } } }