/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.tool; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.util.Tool; import org.apache.hadoop.util.ToolRunner; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.HColumnDescriptor; import org.apache.hadoop.hbase.HBaseConfiguration; import org.apache.hadoop.hbase.TableNotFoundException; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.HTable; import org.apache.hadoop.hbase.client.HBaseAdmin; /** * HBase Canary Tool, that that can be used to do * "canary monitoring" of a running HBase cluster. * * Foreach region tries to get one row per column family * and outputs some information about failure or latency. */ public final class Canary implements Tool { // Sink interface used by the canary to outputs information public interface Sink { public void publishReadFailure(HRegionInfo region); public void publishReadFailure(HRegionInfo region, HColumnDescriptor column); public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime); } // Simple implementation of canary sink that allows to plot on // file or standard output timings or failures. public static class StdOutSink implements Sink { @Override public void publishReadFailure(HRegionInfo region) { LOG.error(String.format("read from region %s failed", region.getRegionNameAsString())); } @Override public void publishReadFailure(HRegionInfo region, HColumnDescriptor column) { LOG.error(String.format("read from region %s column family %s failed", region.getRegionNameAsString(), column.getNameAsString())); } @Override public void publishReadTiming(HRegionInfo region, HColumnDescriptor column, long msTime) { LOG.info(String.format("read from region %s column family %s in %dms", region.getRegionNameAsString(), column.getNameAsString(), msTime)); } } private static final long DEFAULT_INTERVAL = 6000; private static final Log LOG = LogFactory.getLog(Canary.class); private Configuration conf = null; private HBaseAdmin admin = null; private long interval = 0; private Sink sink = null; public Canary() { this(new StdOutSink()); } public Canary(Sink sink) { this.sink = sink; } @Override public Configuration getConf() { return conf; } @Override public void setConf(Configuration conf) { this.conf = conf; } @Override public int run(String[] args) throws Exception { int tables_index = -1; // Process command line args for (int i = 0; i < args.length; i++) { String cmd = args[i]; if (cmd.startsWith("-")) { if (tables_index >= 0) { // command line args must be in the form: [opts] [table 1 [table 2 ...]] System.err.println("Invalid command line options"); printUsageAndExit(); } if (cmd.equals("-help")) { // user asked for help, print the help and quit. printUsageAndExit(); } else if (cmd.equals("-daemon") && interval == 0) { // user asked for daemon mode, set a default interval between checks interval = DEFAULT_INTERVAL; } else if (cmd.equals("-interval")) { // user has specified an interval for canary breaths (-interval N) i++; if (i == args.length) { System.err.println("-interval needs a numeric value argument."); printUsageAndExit(); } try { interval = Long.parseLong(args[i]) * 1000; } catch (NumberFormatException e) { System.err.println("-interval needs a numeric value argument."); printUsageAndExit(); } } else { // no options match System.err.println(cmd + " options is invalid."); printUsageAndExit(); } } else if (tables_index < 0) { // keep track of first table name specified by the user tables_index = i; } } // initialize HBase conf and admin if (conf == null) conf = HBaseConfiguration.create(); admin = new HBaseAdmin(conf); // lets the canary monitor the cluster do { if (admin.isAborted()) { LOG.error("HBaseAdmin aborted"); return(1); } if (tables_index >= 0) { for (int i = tables_index; i < args.length; i++) { sniff(args[i]); } } else { sniff(); } Thread.sleep(interval); } while (interval > 0); return(0); } private void printUsageAndExit() { System.err.printf("Usage: bin/hbase %s [opts] [table 1 [table 2...]]\n", getClass().getName()); System.err.println(" where [opts] are:"); System.err.println(" -help Show this help and exit."); System.err.println(" -daemon Continuous check at defined intervals."); System.err.println(" -interval <N> Interval between checks (sec)"); System.exit(1); } /* * canary entry point to monitor all the tables. */ private void sniff() throws Exception { for (HTableDescriptor table : admin.listTables()) { sniff(table); } } /* * canary entry point to monitor specified table. */ private void sniff(String tableName) throws Exception { if (admin.isTableAvailable(tableName)) { sniff(admin.getTableDescriptor(tableName.getBytes())); } else { LOG.warn(String.format("Table %s is not available", tableName)); } } /* * Loops over regions that owns this table, * and output some information abouts the state. */ private void sniff(HTableDescriptor tableDesc) throws Exception { HTable table = null; try { table = new HTable(admin.getConfiguration(), tableDesc.getName()); } catch (TableNotFoundException e) { return; } for (HRegionInfo region : admin.getTableRegions(tableDesc.getName())) { try { sniffRegion(region, table); } catch (Exception e) { sink.publishReadFailure(region); } } } /* * For each column family of the region tries to get one row * and outputs the latency, or the failure. */ private void sniffRegion(HRegionInfo region, HTable table) throws Exception { HTableDescriptor tableDesc = table.getTableDescriptor(); for (HColumnDescriptor column : tableDesc.getColumnFamilies()) { Get get = new Get(region.getStartKey()); get.addFamily(column.getName()); try { long startTime = System.currentTimeMillis(); table.get(get); long time = System.currentTimeMillis() - startTime; sink.publishReadTiming(region, column, time); } catch (Exception e) { sink.publishReadFailure(region, column); } } } public static void main(String[] args) throws Exception { int exitCode = ToolRunner.run(new Canary(), args); System.exit(exitCode); } }