HBaseRpc.java example

Explorer
asynchbase-master
- src
- test
/*
 * Copyright (C) 2010-2012  The Async HBase Authors.  All rights reserved.
 * This file is part of Async HBase.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *   - Redistributions of source code must retain the above copyright notice,
 *     this list of conditions and the following disclaimer.
 *   - Redistributions in binary form must reproduce the above copyright notice,
 *     this list of conditions and the following disclaimer in the documentation
 *     and/or other materials provided with the distribution.
 *   - Neither the name of the StumbleUpon nor the names of its contributors
 *     may be used to endorse or promote products derived from this software
 *     without specific prior written permission.
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */
package org.hbase.async;

import java.io.IOException;
import java.util.concurrent.TimeUnit;

import com.google.protobuf.AbstractMessageLite;
import com.google.protobuf.CodedOutputStream;
import com.google.protobuf.InvalidProtocolBufferException;
import com.google.protobuf.Parser;

import org.jboss.netty.buffer.ChannelBuffer;
import org.jboss.netty.buffer.ChannelBuffers;
import org.jboss.netty.util.CharsetUtil;
import org.jboss.netty.util.Timeout;
import org.jboss.netty.util.TimerTask;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.stumbleupon.async.Deferred;

/**
 * Abstract base class for all RPC requests going out to HBase.
 * <p>
 * Implementations of this class are <b>not</b> expected to be synchronized.
 *
 * <h1>A note on passing {@code byte} arrays in argument</h1>
 * None of the method that receive a {@code byte[]} in argument will copy it.
 * If you change the contents of any byte array you give to an instance of
 * this class, you <em>may</em> affect the behavior of the request in an
 * <strong>unpredictable</strong> way.  If you need to change the byte array,
 * {@link Object#clone() clone} it before giving it to this class.  For those
 * familiar with the term "defensive copy", we don't do it in order to avoid
 * unnecessary memory copies when you know you won't be changing (or event
 * holding a reference to) the byte array, which is frequently the case.
 */
public abstract class HBaseRpc {
  private static final Logger LOG = LoggerFactory.getLogger(HBaseRpc.class);
  
  /**
   * An RPC from which you can get a table name.
   * @since 1.1
   */
  public interface HasTable {
    /**
     * Returns the name of the table this RPC is for.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the name of the table this RPC is for.
     */
    public byte[] table();
  }

  /**
   * An RPC from which you can get a row key name.
   * @since 1.1
   */
  public interface HasKey {
    /**
     * Returns the row key this RPC is for.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the row key this RPC is for.
     */
    public byte[] key();
  }

  /**
   * An RPC from which you can get a family name.
   * @since 1.1
   */
  public interface HasFamily {
    /**
     * Returns the family this RPC is for.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the family this RPC is for.
     */
    public byte[] family();
  }

  /**
   * An RPC from which you can get a column qualifier name.
   * @since 1.1
   */
  public interface HasQualifier {
    /**
     * Returns the column qualifier this RPC is for.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the column qualifier this RPC is for.
     */
    public byte[] qualifier();
  }

  /**
   * An RPC from which you can get multiple column qualifier names.
   * @since 1.1
   */
  public interface HasQualifiers {
    /**
     * Returns the column qualifiers this RPC is for.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the column qualifiers this RPC is for.
     */
    public byte[][] qualifiers();
  }

  /**
   * An RPC from which you can get a value.
   * @since 1.1
   */
  public interface HasValue {
    /**
     * Returns the value contained in this RPC.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the value contained in this RPC.
     */
    public byte[] value();
  }

  /**
   * An RPC from which you can get multiple values.
   * @since 1.3
   */
  public interface HasValues {
    /**
     * Returns the values contained in this RPC.
     * <p>
     * <strong>DO NOT MODIFY THE CONTENTS OF THE ARRAY RETURNED.</strong>
     * @return the values contained in this RPC.
     */
    public byte[][] values();
  }

  /**
   * An RPC from which you can get a timestamp.
   * @since 1.2
   */
  public interface HasTimestamp {
    /**
     * Returns the strictly positive timestamp contained in this RPC.
     * @return the strictly positive timestamp contained in this RPC.
     */
    public long timestamp();
  }

  /**
   * Package-private interface to mark RPCs that are changing data in HBase.
   * @since 1.4
   */
  interface IsEdit {
    /** RPC method name to use with HBase 0.95+.  */
    static final byte[] MUTATE = { 'M', 'u', 't', 'a', 't', 'e' };
  }
  private boolean trace_rpc;

  public boolean isTraceRPC() {
    return trace_rpc;
  }

  public void setTraceRPC(boolean trace_rpc) {
    this.trace_rpc = trace_rpc;
  }
  
  /*
   * This class, although it's part of the public API, is mostly here to make
   * it easier for this library to manipulate the HBase RPC protocol.
   *
   *
   * Unofficial Hadoop / HBase RPC protocol documentation
   * ****************************************************
   *
   * HBase uses a modified version of the Hadoop RPC protocol.  They took
   * Hadoop's RPC code, copy-pasted it into HBase, and tweaked it a little
   * bit (mostly in a desperate attempt to try to make it less inefficient).
   *
   * RPCs are numbered with an arbitrary 32-bit ID.  It is customary, but not
   * mandatory, to start at 0 and increment by 1 every time you send out an
   * RPC.  The ID is allowed to wrap around and become negative.  As long as
   * no 2 RPCs share the same ID at the same time, we're fine.
   *
   * When requests are written out to the wire, they're framed.  Meaning, a
   * 4 byte integer value is first written in order to specify how many bytes
   * are in the request (excluding the first 4 bytes themselves).  The size -1
   * is special.  The client uses it to send a "ping" to the server at regular
   * intervals, and the server specifically ignores any RPC with size -1.  We
   * don't do this in this client, because it's mostly useless, and we rely on
   * TCP keepalive instead.
   *
   * Then the RPC ID is written (4 bytes).  BTW, all integer values are
   * encoded in big endian, as it's the default in Java world (Sun, SPARC...).
   *
   * Then the length of the method name is written on 2 bytes (I guess 1 byte
   * wasn't enough in case you wanted to have 32768 byte long method names).
   *
   * Then the method name itself is written as-is (as a byte array).
   *
   * The last 4 fields are what constitute the "RPC header".  The remaining
   * bytes are the parameters of the request.  First, there is a 4-byte int
   * that specifies how many parameters follow (this way you can have up to
   * 2 147 483 648 parameters, which may come in handy in a few centuries).
   *
   * In HBase 0.92 and above, 3 more fields have been added in the header as
   * previously described.  The first is a one byte version number that comes
   * right before the method name, indicating how the parameters of the RPC
   * have been serialized.  Then there is a 8 byte (!) client version that's
   * right after the method name, followed by a 4 byte "fingerprint", which
   * is a sort of hash code of the method's signature (name, return type, and
   * parameters types).  Note that the client version seems to be always set
   * to zero...
   *
   * In Hadoop RPC, the name of the class is first serialized (2 bytes
   * specifying the length of the string, followed by that number of bytes
   * of a UTF-8 encoded string in case you name your classes with Kanjis).
   * In HBase RPC, a 1-byte ID representing the class name is written instead
   * of writing the full class name.  Those IDs are hard-coded in a central
   * location (`HbaseObjectWritable', HBase's copy-pasted-hacked version of
   * Hadoop's `ObjectWritable').
   *
   * The way each parameter is serialized depends on the object type being
   * serialized.  Since Hadoop doesn't use any automatic serialization
   * framework, every class is free to serialize itself however it wants.
   * The way it works is that for built-in types, they'll handle the
   * serialization manually, and for other objects, they require that those
   * objects implement their `Writable' interface which requires that a method
   * named `readFields' and a method named `write' be implemented to
   * de-serialize / serialize the object.  So since the RPC layer knows the
   * name of the class of the parameter, it will grab its `Class' using the
   * Java Classloader and then `newInstance' it and then use `readFields' to
   * populate the newly created instance.  Thankfully most objects use a
   * common library to serialize their own fields recursively, however things
   * aren't always consistent, particularly when HBase chose to diverge from
   * Hadoop in certain (but not all) code paths.
   *
   * The way RPC responses are encoded is as follows.  First comes the 4-byte
   * RPC ID.  Then 1 byte containing flags indicating whether or not the
   * request failed (0x01) on the remote side, and whether the response is
   * framed (0x02).  If flags are only 0x00, this is an old-style (pre 0.92)
   * successful response that is not framed.  Framed responses contain a
   * 4-byte integer with the length of the entire response, including the
   * leading RPC ID, flags, and the length itself.  If there is a length, it
   * is always followed by a 4-byte integer with the state of the RPC follows.
   * As of 0.92, this state mostly useless.  If the request failed (flag 0x01
   * is set), the rest of the response is just 2 Hadoop-encoded
   * strings (2-byte length, followed by a UTF-8 string).  The first string is
   * the name of the class of the exception and the second is the message of
   * the exception (which typically includes some of the server-side stack
   * trace).  Note that if the response is NOT framed, it's not easy to tell
   * ahead of time how many bytes to expect or where the next response starts.
   *
   * If the RPC was successful, the remaining of the payload is serialized
   * using the same method as the RPC parameters are serialized (see above).
   *
   * Before the very first RPC, the server expects a "hello" message that
   * starts with 4-byte magic number, followed by the RPC version (1 byte).
   * Then comes 4 bytes to specify the rest of the length of the "hello"
   * message.  The remaining is a `Writable' instance serialized that
   * specifies which authentication provider to use and give our credentials.
   * In HBase 0.92 and above, the `Writable' should represent what protocol
   * the client wants to speak, which should be the name of an interface.
   * "org.apache.hadoop.hbase.ipc.HRegionInterface" should be used.
   * The "hello" message is implemented in `RegionClient#helloRpc'.  In order
   * to support HBase 0.92, we always piggy back a `getProtocolVersion' RPC
   * right after the header, so we can tell what version the server is using
   * and how to serialize RPCs and read its responses.
   */

  // ------ //
  // Flags. //
  // ------ //
  // 5th byte into the response.
  // See ipc/ResponseFlag.java in HBase's source code.

  static final byte RPC_SUCCESS = 0x00;
  static final byte RPC_ERROR = 0x01;
  /**
   * Indicates that the next byte is an integer with the length of the response.
   * This can be found on both successful ({@link RPC_SUCCESS}) or failed
   * ({@link RPC_ERROR}) responses.
   * @since HBase 0.92
   */
  static final byte RPC_FRAMED = 0x02;

  // ----------- //
  // RPC Status. //
  // ----------- //
  // 4 byte integer (on wire), located 9 byte into the response, only if
  // {@link RPC_FRAMED} is set.
  // See ipc/Status.java in HBase's source code.

  /**
   * Indicates that an error prevented the RPC from being executed.
   * This is a somewhat misleading name.  It indicates that the RPC couldn't
   * be executed, typically because of a protocol version mismatch, an
   * incorrectly encoded RPC (or possibly corrupted on-wire such that the
   * server couldn't deserialize it), or an authentication error (unsure about
   * that one).
   */
  static final byte RPC_FATAL = -1;

  /**
   * To be implemented by the concrete sub-type.
   * This method is expected to instantiate a {@link ChannelBuffer} using
   * either {@link #newBuffer} and return it
   * properly populated so it's ready to be written out to the wire (except
   * for the "RPC header" that contains the RPC ID and method name and such,
   * which is going to be populated automatically just before sending the RPC
   * out, see {@link RegionClient#encode}.
   *
   * Notice that this method is package-private, so only classes within this
   * package can use this as a base class.
   *
   * @param server_version What RPC protocol version the server is running.
   */
  abstract ChannelBuffer serialize(byte server_version);

  /**
   * To be implemented by the concrete sub-type.
   * This method is expected to de-serialize a response received for the
   * current RPC, when communicating with HBase 0.95 and newer.
   *
   * Notice that this method is package-private, so only classes within this
   * package can use this as a base class.
   *
   * @param buf The buffer from which to de-serialize the response.
   * @param cell_size The size, in bytes, of the "cell block" that follows the
   * protobuf of the RPC response.  If 0, then there is just the protobuf.
   * The value is guaranteed to be both positive and of a "reasonable" size.
   */
   abstract Object deserialize(ChannelBuffer buf, int cell_size);

  /**
   * Throws an exception if the argument is non-zero.
   */
  static void ensureNoCell(final int cell_size) {
    if (cell_size != 0) {
      throw new InvalidResponseException(
        "Should not have gotten any cell blocks, yet there are "
        + cell_size + " bytes that follow the protobuf response."
        + "  This should never happen."
        + "  Are you using an incompatible version of HBase?", null);
    }
  }

  /**
   * The Deferred that will be invoked when this RPC completes or fails.
   * In case of a successful completion, this Deferred's first callback
   * will be invoked with an {@link Object} containing the de-serialized
   * RPC response in argument.
   * Once an RPC has been used, we create a new Deferred for it, in case
   * the user wants to re-use it.
   */
  private Deferred<Object> deferred;

  // The next 3 fields are package-private so subclasses can access them
  // without them being part of the interface (unlike with `protected').

  /**
   * The table for which this RPC is.
   * {@code null} if this RPC isn't for a particular table.
   * Invariants:
   *   table == null  =>  key == null
   *   table != null  =>  key != null
   */
  final byte[] table;  // package-private for subclasses, not other classes.

  /**
   * The row key for which this RPC is.
   * {@code null} if this RPC isn't for a particular row key.
   * Invariants:
   *   table == null  =>  key == null
   *   table != null  =>  key != null
   */
  final byte[] key;  // package-private for subclasses, not other classes.

  /**
   * The region for which this RPC is.
   * {@code null} if this RPC isn't for a single specific region.
   * Invariants:
   *   table == null  =>  region == null
   *   table != null  =>  region != null before {@link #serialize} gets called
   */
  RegionInfo region;  // package-private for subclasses, not other classes.

  /**
   * How many times have we retried this RPC?.
   * Only used by the low-level retry logic in {@link RegionClient} in order
   * to detect broken META tables (e.g. we keep getting an NSRE but META keeps
   * sending us to the same RegionServer that sends us the NSREs, or we keep
   * looking up the same row in META because of a "hole" in META).
   * <p>
   * Proper synchronization is required, although in practice most of the code
   * that access this attribute will have a happens-before relationship with
   * the rest of the code, due to other existing synchronization.
   */
  byte attempt;  // package-private for RegionClient and HBaseClient only.

  /** An optional timeout in milliseconds for the RPC. -1 means use the 
   * default from the config. 0 means don't timeout. */
  private int timeout = -1;
  
  /** If the RPC has a timeout set this will be set on submission to the 
   * timer thread. */
  Timeout timeout_handle; // package-private for RegionClient and HBaseClient only.
  
  /** Task set if a timeout has been requested. The task will be executed only
   * if the RPC did timeout, then we mark {@link #has_timedout} as true and
   * remove the RPC from the region client as well as calling it back with
   * a {@link RpcTimedoutException}
   */
  private TimerTask timeout_task;
  
  /** Whether or not this RPC has timed out already */
  private boolean has_timedout;
  
  /**
   * If true, this RPC should fail-fast as soon as we know we have a problem.
   */
  boolean failfast = false;

  /** The ID of this RPC as set by the last region client that handled it */
  int rpc_id;
  
  /** A reference to the last region client that handled this RPC */
  private RegionClient region_client;
  
  /**
   * Set whether the RPC not be retried upon encountering a problem.
   * <p>
   * RPCs can be retried for various legitimate reasons (e.g. NSRE due to a
   * region moving), but under certain failure circumstances (such as a node
   * going down) we want to give up and be alerted as soon as possible.
   * @param failfast If {@code true}, this RPC should fail-fast as soon as
   * we know we have a problem.
   * @return whether the RPC not be retried upon encountering a problem.
   * @since 1.5
   */
  public final boolean setFailfast(final boolean failfast) {
    return this.failfast = failfast;
  }

  /**
   * Returns whether the RPC not be retried upon encountering a problem.
   * @return whether the RPC not be retried upon encountering a problem.
   * @see #setFailfast
   * @since 1.5
   */
  public final boolean failfast() {
    return failfast;
  }

  /**
   * If true, this RPC is a probe which checks if the destination region is
   * online.
   */
  boolean probe = false;

  public boolean isProbe() {
    return probe;
  }

  public void setProbe(boolean probe) {
    this.probe = probe;
  }

  /**
   * Whether or not if this RPC is a probe that is suspended by an NSRE
   */
  private boolean suspended_probe = false;

  boolean isSuspendedProbe() {
    return suspended_probe;
  }

  void setSuspendedProbe(boolean suspended_probe) {
    this.suspended_probe = suspended_probe;
  }

  /**
   * Package private constructor for RPCs that aren't for any region.
   */
  HBaseRpc() {
    table = null;
    key = null;
  }

  /**
   * Package private constructor for RPCs that are for a region.
   * @param table The name of the table this RPC is for.
   * @param row The name of the row this RPC is for.
   */
  HBaseRpc(final byte[] table, final byte[] key) {
    KeyValue.checkTable(table);
    KeyValue.checkKey(key);
    this.table = table;
    this.key = key;
  }

  /**
   * A timeout, in milliseconds, to set for this RPC. If the RPC cannot be 
   * sent and processed by HBase within this period then a 
   * {@link RpcTimedOutException} will be returned in the deferred.
   * <b>
   * If no timeout is set, then "hbase.rpc.timeout" will be used by default.
   * However if a value of "0" is supplied as the timeout, then the RPC will
   * not be timed out.
   * @param timeout The timeout in milliseconds.
   * @throws IllegalArgumentException if the value is less than zero
   * @since 1.7
   */
  public void setTimeout(final int timeout) {
    if (timeout < 0) {
      throw new IllegalArgumentException("The timeout cannot be negative");
    }
    this.timeout = timeout;
  }
  
  /** An optional timeout for the RPC in milliseconds.
   * Note that the initial value is -1, meaning the RPC will use the default
   * value configured in hbase.rpc.timeout.
   * @return The timeout value in milliseconds.
   */
  public int getTimeout() {
    return timeout;
  }
  
  // ---------------------- //
  // Package private stuff. //
  // ---------------------- //

  /**
   * Package private way of getting the name of the RPC method.
   * @param server_version What RPC protocol version the server is running.
   */
  abstract byte[] method(byte server_version);

  /**
   * Sets the region this RPC is going to.
   * <p>
   * This method is invoked by {@link HBaseClient} once the user give it
   * their {@code HBaseRpc}, because {@link HBaseClient} is the only thing
   * that knows about and keeps track of regions.  Users don't care about
   * regions.
   * @param region The target region of this RPC.
   */
  final void setRegion(final RegionInfo region) {
    if (table == null) {
      throw new AssertionError("Can't use setRegion if no table was given.");
    }
    this.region = region;
  }

  /**
   * Returns the region this RPC is supposed to go to (can be {@code null}).
   */
  final RegionInfo getRegion() {
    return region;
  }

  /** Package private way of accessing / creating the Deferred of this RPC.  */
  final Deferred<Object> getDeferred() {
    if (deferred == null) {
      deferred = new Deferred<Object>();
    }
    return deferred;
  }

  /**
   * A timeout task that is schedule as soon as the RPC is about to go out on
   * the wire. If this is called then the RPC has timed out and we return a
   * {@link RpcTimedOutException} to the user. This class is also responsible
   * for removing the RPC from the proper region client map and incrementing 
   * it's timeout counter.
   * <p>
   * If this run method throws an exception or the Deferred callback does, then
   * it will be caught and logged by Netty's timer executor.
   */
  private final class TimeoutTask implements TimerTask { 
    @Override
    public void run(final Timeout time_out) throws Exception {
      synchronized (HBaseRpc.this) {
        if (has_timedout) {
          throw new IllegalStateException(
              "This RPC has already timed out " + HBaseRpc.this);
        }
        has_timedout = true;
      }
      
      if (timeout_handle == null) {
        LOG.error("Received a timeout handle " + time_out 
            + " but this RPC did not have one " + this);
      }
      if (time_out != timeout_handle) {
        LOG.error("Receieved a timeout handle " + time_out + 
            " that doesn't match our own " + this);
      }
      if (region_client == null) {
        LOG.error("Somehow the region client was null when timing out RPC " 
            + this);
      } else {
        region_client.removeRpc(HBaseRpc.this, true);
      }
      
      callback(new RpcTimedOutException("RPC ID [" + rpc_id + 
          "] timed out waiting for response from HBase on region client [" + 
          region_client + " ] for over " + timeout + "ms"));
      timeout_task = null;
      timeout_handle = null;
    }
  }
  
  /**
   * Schedules the RPC with the HBaseClient rpc timeout timer with the given
   * timeout interval. If the timeout is set to zero then no timeout is 
   * scheduled.
   * If the RPC has already been timed out then we won't allow another attempt.
   * If the timer has shutdown (due to the client shutting down) then we 
   * don't do anything and let the region client expire the RPCs in it's queue.
   * @param region_client The region client that sent the RPC over the wire.
   * @throws IllegalStateException if the RPC has already timed out.
   */
  void enqueueTimeout(final RegionClient region_client) {
    // TODO - it's possible that we may actually retry a timed out RPC in which
    // case we want to allow this.
    if (has_timedout) {
      throw new IllegalStateException("This RPC has already timed out " + this);
    }
    if (timeout == -1) {
      timeout = region_client.getHBaseClient().getDefaultRpcTimeout();
    }
    if (timeout > 0) {
      this.region_client = region_client;
      if (timeout_task == null) {
        // we can re-use the task if this RPC is sent to another region server
        timeout_task = new TimeoutTask();
      }
      try {
        if (timeout_handle != null) {
          LOG.warn("RPC " + this + " had a previous timeout task");
        }
        timeout_handle = region_client.getHBaseClient().getRpcTimeoutTimer()
            .newTimeout(timeout_task, timeout, TimeUnit.MILLISECONDS);
      } catch (IllegalStateException e) {
        // This can happen if the timer fires just before shutdown()
        // is called from another thread, and due to how threads get
        // scheduled we tried to schedule a timeout after timer.stop().
        // Region clients will handle the RPCs on shutdown so we don't need 
        // to here.
        LOG.warn("Failed to schedule RPC timeout: " + this
                 + "  Ignore this if we're shutting down.", e);
        timeout_handle = null;
      }
    }
  }
  
  /** @return Whether or not this particular RPC has timed out and should not
   * be retried */
  final synchronized boolean hasTimedOut() {
    return has_timedout;
  }
  
  /**
   * Package private way of making an RPC complete by giving it its result.
   * If this RPC has no {@link Deferred} associated to it, nothing will
   * happen.  This may happen if the RPC was already called back.
   * <p>
   * Once this call to this method completes, this object can be re-used to
   * re-send the same RPC, provided that no other thread still believes this
   * RPC to be in-flight (guaranteeing this may be hard in error cases).
   */
  final void callback(final Object result) {
    if (timeout_handle != null) {
      timeout_handle.cancel();
      timeout_task = null;
      timeout_handle = null;
    }
    
    final Deferred<Object> d = deferred;
    if (d == null) {
      return;
    }
    deferred = null;
    attempt = 0;
    d.callback(result);
  }

  /** Checks whether or not this RPC has a Deferred without creating one.  */
  final boolean hasDeferred() {
    return deferred != null;
  }

  public String toString() {
    // Try to rightsize the buffer.
    final String method = new String(this.method((byte) 0));
    final StringBuilder buf = new StringBuilder(16 + method.length() + 2
      + 8 + (table == null ? 4 : table.length + 2)  // Assumption: ASCII => +2
      + 6 + (key == null ? 4 : key.length * 2)      // Assumption: binary => *2
      + 9 + (region == null ? 4 : region.stringSizeHint())
      + 10 + 1 + 1);
    buf.append("HBaseRpc(method=");
    buf.append(method);
    buf.append(", table=");
    Bytes.pretty(buf, table);
    buf.append(", key=");
    Bytes.pretty(buf, key);
    buf.append(", region=");
    if (region == null) {
      buf.append("null");
    } else {
      region.toStringbuf(buf);
    }
    buf.append(", attempt=").append(attempt)
       .append(", timeout=").append(timeout)
       .append(", hasTimedout=").append(has_timedout);
    buf.append(')');
    return buf.toString();
  }

  /**
   * Helper for subclass's {@link #toString} implementations.
   * <p>
   * This is used by subclasses such as {@link DeleteRequest}
   * or {@link GetRequest}, to avoid code duplication.
   * @param classname The name of the class of the caller.
   * @param family A possibly null family name.
   * @param qualifiers A non-empty list of qualifiers or null.
   */
  final String toStringWithQualifiers(final String classname,
                                      final byte[] family,
                                      final byte[][] qualifiers) {
    return toStringWithQualifiers(classname, family, qualifiers, null, "");
  }


  /**
   * Helper for subclass's {@link #toString} implementations.
   * <p>
   * This is used by subclasses such as {@link DeleteRequest}
   * or {@link GetRequest}, to avoid code duplication.
   * @param classname The name of the class of the caller.
   * @param family A possibly null family name.
   * @param qualifiers A non-empty list of qualifiers or null.
   * @param values A non-empty list of values or null.
   * @param fields Additional fields to include in the output.
   */
  final String toStringWithQualifiers(final String classname,
                                      final byte[] family,
                                      final byte[][] qualifiers,
                                      final byte[][] values,
                                      final String fields) {
    final StringBuilder buf = new StringBuilder(256  // min=182
                                                + fields.length());
    buf.append(classname).append("(table=");
    Bytes.pretty(buf, table);
    buf.append(", key=");
    Bytes.pretty(buf, key);
    buf.append(", family=");
    Bytes.pretty(buf, family);
    buf.append(", qualifiers=");
    Bytes.pretty(buf, qualifiers);
    if (values != null) {
      buf.append(", values=");
      Bytes.pretty(buf, values);
    }
    buf.append(fields);
    buf.append(", attempt=").append(attempt)
      .append(", region=");
    if (region == null) {
      buf.append("null");
    } else {
      region.toStringbuf(buf);
    }
    buf.append(')');
    return buf.toString();
  }

  /**
   * Helper for subclass's {@link #toString} implementations.
   * <p>
   * This is used by subclasses such as {@link DeleteRequest}
   * or {@link GetRequest}, to avoid code duplication.
   * @param classname The name of the class of the caller.
   * @param family A possibly null family name.
   * @param qualifier A possibly null column qualifier.
   * @param fields Additional fields to include in the output.
   */
  final String toStringWithQualifier(final String classname,
                                     final byte[] family,
                                     final byte[] qualifier,
                                     final String fields) {
    final StringBuilder buf = new StringBuilder(256  // min=181
                                                + fields.length());
    buf.append(classname).append("(table=");
    Bytes.pretty(buf, table);
    buf.append(", key=");
    Bytes.pretty(buf, key);
    buf.append(", family=");
    Bytes.pretty(buf, family);
    buf.append(", qualifier=");
    Bytes.pretty(buf, qualifier);
    buf.append(fields);
    buf.append(", attempt=").append(attempt)
      .append(", region=");
    if (region == null) {
      buf.append("null");
    } else {
      region.toStringbuf(buf);
    }
    buf.append(')');
    return buf.toString();
  }

  // --------------------- //
  // RPC utility functions //
  // --------------------- //

  /*
   * The remaining of this file is just a whole bunch of functions to make
   * it easier to deal with the absolutely horrible Hadoop RPC protocol.
   *
   * One should assume that all the following functions can throw an
   * IndexOutOfBoundsException when reading past the end of a buffer
   * or writing past the end of a fixed-length buffer.
   *
   * A number of functions, particularly those reading something, will
   * throw an IllegalArgumentException if the buffer they're asked to
   * parse contains junk or otherwise corrupted or suspicious data.
   */

  /**
   * Creates a new fixed-length buffer on the heap.
   * @param server_version What RPC protocol version the server is running.
   * @param max_payload_size A good approximation of the size of the payload.
   * The approximation must be an upper bound on the expected size of the
   * payload as trying to store more than {@code max_payload_size} bytes in
   * the buffer returned will cause an {@link ArrayIndexOutOfBoundsException}.
   */
  final ChannelBuffer newBuffer(final byte server_version,
                                final int max_payload_size) {
    // Add extra bytes for the RPC header:
    //   4 bytes: Payload size (always present, even in HBase 0.95+).
    //   4 bytes: RPC ID.
    //   2 bytes: Length of the method name.
    //   N bytes: The method name.
    final int header = 4 + 4 + 2 + method(server_version).length
      // Add extra bytes for the RPC header used in HBase 0.92 and above:
      //   1 byte:  RPC header version.
      //   8 bytes: Client version.  Yeah, 8 bytes, WTF seriously.
      //   4 bytes: Method fingerprint.
      + (server_version < RegionClient.SERVER_VERSION_092_OR_ABOVE ? 0
         : 1 + 8 + 4);
    // Note: with HBase 0.95 and up, the size of the protobuf header varies.
    // It is currently made of (see RequestHeader in RPC.proto):
    //   - uint32 callId: varint 1 to 5 bytes
    //   - RPCTInfo traceInfo: two uint64 varint so 4 to 20 bytes.
    //   - string methodName: varint length (1 byte) and method name.
    //   - bool requestParam: 1 byte
    //   - CellBlockMeta cellBlockMeta: one uint32 varint so 2 to 6 bytes.
    // Additionally each field costs an extra 1 byte, and there is a varint
    // prior to the header for the size of the header.  We don't set traceInfo
    // right now so that leaves us with 4 fields for a total maximum size of
    // 1 varint + 4 fields + 5 + 1 + N + 1 + 6 = 18 bytes max + method name.
    // Since for HBase 0.92 we reserve 19 bytes, we're good, we over-allocate
    // at most 1 bytes.  So the logic above doesn't need to change for 0.95+.
    final ChannelBuffer buf = ChannelBuffers.buffer(header + max_payload_size);
    buf.setIndex(0, header);  // Advance the writerIndex past the header.
    return buf;
  }

  /**
   * Serializes the given protobuf object into a Netty {@link ChannelBuffer}.
   * @param method The name of the method of the RPC we're going to send.
   * @param pb The protobuf to serialize.
   * @return A new channel buffer containing the serialized protobuf, with
   * enough free space at the beginning to tack on the RPC header.
   */
  static final ChannelBuffer toChannelBuffer(final byte[] method,
                                             final AbstractMessageLite pb) {
    final int pblen = pb.getSerializedSize();
    final int vlen = CodedOutputStream.computeRawVarint32Size(pblen);
    final byte[] buf = new byte[4 + 19 + method.length + vlen + pblen];
    try {
      final CodedOutputStream out = CodedOutputStream.newInstance(buf, 4 + 19 + method.length,
                                                                  vlen + pblen);
      out.writeRawVarint32(pblen);
      pb.writeTo(out);
      out.checkNoSpaceLeft();
    } catch (IOException e) {
      throw new RuntimeException("Should never happen", e);
    }
    return ChannelBuffers.wrappedBuffer(buf);
  }

  /**
   * Writes a {@link Boolean boolean} as an HBase RPC parameter.
   * @param buf The buffer to serialize the string to.
   * @param b The boolean value to serialize.
   */
  static void writeHBaseBool(final ChannelBuffer buf, final boolean b) {
    buf.writeByte(1);  // Code for Boolean.class in HbaseObjectWritable
    buf.writeByte(b ? 0x01 : 0x00);
  }

  /**
   * Writes an {@link Integer int} as an HBase RPC parameter.
   * @param buf The buffer to serialize the string to.
   * @param v The value to serialize.
   */
  static void writeHBaseInt(final ChannelBuffer buf, final int v) {
    buf.writeByte(5);  // Code for Integer.class in HbaseObjectWritable
    buf.writeInt(v);
  }

  /**
   * Writes a {@link Long long} as an HBase RPC parameter.
   * @param buf The buffer to serialize the string to.
   * @param v The value to serialize.
   */
  static void writeHBaseLong(final ChannelBuffer buf, final long v) {
    buf.writeByte(6);  // Code for Long.class in HbaseObjectWritable
    buf.writeLong(v);
  }

  /**
   * Writes a {@link String} as an HBase RPC parameter.
   * @param buf The buffer to serialize the string to.
   * @param s The string to serialize.
   */
  static void writeHBaseString(final ChannelBuffer buf, final String s) {
    buf.writeByte(10);  // Code for String.class in HbaseObjectWritable
    final byte[] b = s.getBytes(CharsetUtil.UTF_8);
    writeVLong(buf, b.length);
    buf.writeBytes(b);
  }

  /**
   * Writes a byte array as an HBase RPC parameter.
   * @param buf The buffer to serialize the string to.
   * @param b The byte array to serialize.
   */
  static void writeHBaseByteArray(final ChannelBuffer buf, final byte[] b) {
    buf.writeByte(11);     // Code for byte[].class in HbaseObjectWritable
    writeByteArray(buf, b);
  }

  /**
   * Writes a byte array.
   * @param buf The buffer to serialize the string to.
   * @param b The byte array to serialize.
   */
  static void writeByteArray(final ChannelBuffer buf, final byte[] b) {
    writeVLong(buf, b.length);
    buf.writeBytes(b);
  }

  /**
   * Serializes a `null' reference.
   * @param buf The buffer to write to.
   */
  static void writeHBaseNull(final ChannelBuffer buf) {
    buf.writeByte(14);  // Code type for `Writable'.
    buf.writeByte(17);  // Code type for `NullInstance'.
    buf.writeByte(14);  // Code type for `Writable'.
  }

  /**
   * Upper bound on the size of a byte array we de-serialize.
   * This is to prevent HBase from OOM'ing us, should there be a bug or
   * undetected corruption of an RPC on the network, which would turn a
   * an innocuous RPC into something allocating a ton of memory.
   * The Hadoop RPC protocol doesn't do any checksumming as they probably
   * assumed that TCP checksums would be sufficient (they're not).
   */
  static final long MAX_BYTE_ARRAY_MASK =
    0xFFFFFFFFF0000000L;  // => max = 256MB == 268435455

  /**
   * Verifies that the given length looks like a reasonable array length.
   * This method accepts 0 as a valid length.
   * @param buf The buffer from which the length was read.
   * @param length The length to validate.
   * @throws IllegalArgumentException if the length is negative or
   * suspiciously large.
   */
  static void checkArrayLength(final ChannelBuffer buf, final long length) {
    // 2 checks in 1.  If any of the high bits are set, we know the value is
    // either too large, or is negative (if the most-significant bit is set).
    if ((length & MAX_BYTE_ARRAY_MASK) != 0) {
      if (length < 0) {
        throw new IllegalArgumentException("Read negative byte array length: "
          + length + " in buf=" + buf + '=' + Bytes.pretty(buf));
      } else {
        throw new IllegalArgumentException("Read byte array length that's too"
          + " large: " + length + " > " + ~MAX_BYTE_ARRAY_MASK + " in buf="
          + buf + '=' + Bytes.pretty(buf));
      }
    }
  }

  /**
   * Verifies that the given array looks like a reasonably big array.
   * This method accepts empty arrays.
   * @param array The array to check.
   * @throws IllegalArgumentException if the length of the array is
   * suspiciously large.
   * @throws NullPointerException if the array is {@code null}.
   */
  static void checkArrayLength(final byte[] array) {
    if ((array.length & MAX_BYTE_ARRAY_MASK) != 0) {
      if (array.length < 0) {  // Not possible unless there's a JVM bug.
        throw new AssertionError("Negative byte array length: "
                                 + array.length + ' ' + Bytes.pretty(array));
      } else {
        throw new IllegalArgumentException("Byte array length too big: "
          + array.length + " > " + ~MAX_BYTE_ARRAY_MASK);
        // Don't dump the gigantic byte array in the exception message.
      }
    }
  }

  /**
   * Verifies that the given length looks like a reasonable array length.
   * This method does not accept 0 as a valid length.
   * @param buf The buffer from which the length was read.
   * @param length The length to validate.
   * @throws IllegalArgumentException if the length is zero, negative or
   * suspiciously large.
   */
  static void checkNonEmptyArrayLength(final ChannelBuffer buf,
                                       final long length) {
    if (length == 0) {
      throw new IllegalArgumentException("Read zero-length byte array "
        + " in buf=" + buf + '=' + Bytes.pretty(buf));
    }
    checkArrayLength(buf, length);
  }

  /**
   * Reads a byte array.
   * @param buf The buffer from which to read the array.
   * @return A possibly empty but guaranteed non-{@code null} byte array.
   * @throws IllegalArgumentException if the length we read for the byte array
   * is out of reasonable bounds.
   */
  static byte[] readByteArray(final ChannelBuffer buf) {
    final long length = readVLong(buf);
    checkArrayLength(buf, length);
    final byte[] b = new byte[(int) length];
    buf.readBytes(b);
    return b;
  }

  /**
   * Reads a string encoded by {@code hadoop.io.WritableUtils#readString}.
   * @throws IllegalArgumentException if the length we read for the string
   * is out of reasonable bounds.
   */
  static String readHadoopString(final ChannelBuffer buf) {
    final int length = buf.readInt();
    checkArrayLength(buf, length);
    final byte[] s = new byte[length];
    buf.readBytes(s);
    return new String(s, CharsetUtil.UTF_8);
  }

  /**
   * De-serializes a protobuf from the given buffer.
   * <p>
   * The protobuf is assumed to be prefixed by a varint indicating its size.
   * @param buf The buffer to de-serialize the protobuf from.
   * @param parser The protobuf parser to use for this type of protobuf.
   * @return An instance of the de-serialized type.
   * @throws InvalidResponseException if the buffer contained an invalid
   * protobuf that couldn't be de-serialized.
   */
  static <T> T readProtobuf(final ChannelBuffer buf, final Parser<T> parser) {
    final int length = HBaseRpc.readProtoBufVarint(buf);
    HBaseRpc.checkArrayLength(buf, length);
    final byte[] payload;
    final int offset;
    if (buf.hasArray()) {  // Zero copy.
      payload = buf.array();
      offset = buf.arrayOffset() + buf.readerIndex();
      buf.readerIndex(buf.readerIndex() + length);
    } else {  // We have to copy the entire payload out of the buffer :(
      payload = new byte[length];
      buf.readBytes(payload);
      offset = 0;
    }
    try {
      return parser.parseFrom(payload, offset, length);
    } catch (InvalidProtocolBufferException e) {
      final String msg = "Invalid RPC response: length=" + length
        + ", payload=" + Bytes.pretty(payload);
      LOG.error("Invalid RPC from buffer: " + buf);
      throw new InvalidResponseException(msg, e);
    }
  }

  // -------------------------------------- //
  // Variable-length integer value encoding //
  // -------------------------------------- //

  /*
   * Unofficial documentation of the Hadoop VLong encoding
   * *****************************************************
   *
   * The notation used to refer to binary numbers here is `0b' followed by
   * the bits, as is printed by Python's built-in `bin' function for example.
   *
   * Values between
   *   -112 0b10010000
   * and
   *    127 0b01111111
   * (inclusive) are encoded on a single byte using their normal
   * representation.  The boundary "-112" sounds weird at first (and it is)
   * but it'll become clearer once you understand the format.
   *
   * Values outside of the boundaries above are encoded by first having
   * 1 byte of meta-data followed by a variable number of bytes that make up
   * the value being encoded.
   *
   * The "meta-data byte" always starts with the prefix 0b1000.  Its format
   * is as follows:
   *   1 0 0 0 | S | L L L
   * The bit `S' is the sign bit (1 = positive value, 0 = negative, yes
   * that's weird, I would've done it the other way around).
   * The 3 bits labeled `L' indicate how many bytes make up this variable
   * length value.  They're encoded like so:
   *   1 1 1 = 1 byte follows
   *   1 1 0 = 2 bytes follow
   *   1 0 1 = 3 bytes follow
   *   1 0 0 = 4 bytes follow
   *   0 1 1 = 5 bytes follow
   *   0 1 0 = 6 bytes follow
   *   0 0 1 = 7 bytes follow
   *   0 0 0 = 8 bytes follow
   * Yes, this is weird too, it goes backwards, requires more operations to
   * convert the length into something human readable, and makes sorting the
   * numbers unnecessarily complicated.
   * Notice that the prefix wastes 3 bits.  Also, there's no "VInt", all
   * variable length encoded values are eventually transformed to `long'.
   *
   * The remaining bytes are just the original number, as-is, without the
   * unnecessary leading bytes (that are all zeroes).
   *
   * Examples:
   *   42 is encoded as                   00101010 (as-is, 1 byte)
   *  127 is encoded as                   01111111 (as-is, 1 bytes)
   *  128 is encoded as          10001111 10000000 (2 bytes)
   *  255 is encoded as          10001111 11111111 (2 bytes)
   *  256 is encoded as 10001110 00000001 00000000 (3 bytes)
   *   -1 is encoded as                   11111111 (as-is, 1 byte)
   *  -42 is encoded as                   11010110 (as-is, 1 byte)
   * -112 is encoded as                   10010000 (as-is, 1 byte)
   * -113 is encoded as          10000111 01110000 (2 bytes)
   * -256 is encoded as          10000111 11111111 (2 bytes)
   * -257 is encoded as 10000110 00000001 00000000 (3 bytes)
   *
   * The implementations of writeVLong and readVLong below are on average
   * 14% faster than Hadoop's implementation given a uniformly distributed
   * input (lots of values of all sizes), and can be up to 40% faster on
   * certain input sizes (e.g. big values that fit on 8 bytes).  This is due
   * to two main things: fewer arithmetic and logic operations, and processing
   * multiple bytes together when possible.
   * Reading is about 6% faster than writing (negligible difference).
   * My MacBook Pro with a 2.66 GHz Intel Core 2 Duo easily does 5000 calls to
   * readVLong or writeVLong per millisecond.
   *
   * However, since we use Netty, we don't have to deal with the stupid Java
   * I/O library, so unlike Hadoop we don't use DataOutputStream and
   * ByteArrayOutputStream, instead we use ChannelBuffer.  This gives us a
   * significant extra performance boost over Hadoop.  The 14%-60% difference
   * above becomes a 70% to 80% difference!  Yes, that's >4 times faster!  With
   * the code below my MacBook Pro with a 2.66 GHz Intel Core 2 Duo easily
   * does 11000 writeVLong/ms or 13500 readVLong/ms (notice that reading is
   * 18% faster) when using a properly sized dynamicBuffer.  When using a
   * fixed-size buffer, writing (14200/s) is almost as fast as reading
   * (14500/s).
   *
   * So there's really no reason on Earth to use java.io.  Its API is horrible
   * and so is its performance.
   */

  /**
   * Writes a variable-length {@link Long} value.
   * @param buf The buffer to write to.
   * @param n The value to write.
   */
  @SuppressWarnings("fallthrough")
  static void writeVLong(final ChannelBuffer buf, long n) {
    // All those values can be encoded on 1 byte.
    if (n >= -112 && n <= 127) {
      buf.writeByte((byte) n);
      return;
    }

    // Set the high bit to indicate that more bytes are to come.
    // Both 0x90 and 0x88 have the high bit set (and are thus negative).
    byte b = (byte) 0x90; // 0b10010000
    if (n < 0) {
      n = ~n;
      b = (byte) 0x88;    // 0b10001000
    }

    {
      long tmp = n;
      do {
        tmp >>>= 8;
        // The first time we decrement `b' here, it's going to move the
        // rightmost `1' in `b' to the right, due to the way 2's complement
        // representation works.  So if `n' is positive, and we started with
        // b = 0b10010000, now we'll have b = 0b10001111, which correctly
        // indicates that `n' is positive (5th bit set) and has 1 byte so far
        // (last 3 bits are set).  If `n' is negative, and we started with
        // b = 0b10001000, now we'll have b = 0b10000111, which correctly
        // indicates that `n' is negative (5th bit not set) and has 1 byte.
        // Each time we keep decrementing this value, the last remaining 3
        // bits are going to change according to the format described above.
        b--;
      } while (tmp != 0);
    }

    buf.writeByte(b);
    switch (b & 0x07) {  // Look at the low 3 bits (the length).
      case 0x00:
        buf.writeLong(n);
        break;
      case 0x01:
        buf.writeInt((int) (n >>> 24));
        buf.writeMedium((int) n);
        break;
      case 0x02:
        buf.writeMedium((int) (n >>> 24));
        buf.writeMedium((int) n);
        break;
      case 0x03:
        buf.writeByte((byte) (n >>> 32));
      case 0x04:
        buf.writeInt((int) n);
        break;
      case 0x05:
        buf.writeMedium((int) n);
        break;
      case 0x06:
        buf.writeShort((short) n);
        break;
      case 0x07:
        buf.writeByte((byte) n);
    }
  }

  /**
   * Reads a variable-length {@link Long} value.
   * @param buf The buffer to read from.
   * @return The value read.
   */
  @SuppressWarnings("fallthrough")
  static long readVLong(final ChannelBuffer buf) {
    byte b = buf.readByte();
    // Unless the first half of the first byte starts with 0xb1000, we're
    // dealing with a single-byte value.
    if ((b & 0xF0) != 0x80) {  // 0xF0 = 0b11110000, 0x80 = 0b10000000
      return b;
    }

    // The value is negative if the 5th bit is 0.
    final boolean negate = (b & 0x08) == 0;    // 0x08 = 0b00001000
    long result = 0;
    switch (b & 0x07) {  // Look at the low 3 bits (the length).
      case 0x00:
        result = buf.readLong();
        break;
      case 0x01:
        result = buf.readUnsignedInt();
        result <<= 32;
        result |= buf.readUnsignedMedium();
        break;
      case 0x02:
        result = buf.readUnsignedMedium();
        result <<= 24;
        result |= buf.readUnsignedMedium();
        break;
      case 0x03:
        b = buf.readByte();
        result <<= 8;
        result |= b & 0xFF;
      case 0x04:
        result <<= 32;
        result |= buf.readUnsignedInt();
        break;
      case 0x05:
        result |= buf.readUnsignedMedium();
        break;
      case 0x06:
        result |= buf.readUnsignedShort();
        break;
      case 0x07:
        b = buf.readByte();
        result <<= 8;
        result |= b & 0xFF;
    }
    return negate ? ~result : result;
  }

  /**
   * Reads a 32-bit variable-length integer value as used in Protocol Buffers.
   * @param buf The buffer to read from.
   * @return The integer read.
   */
  static int readProtoBufVarint(final ChannelBuffer buf) {
    int result = buf.readByte();
    if (result >= 0) {
      return result;
    }
    result &= 0x7F;
    result |= buf.readByte() << 7;
    if (result >= 0) {
      return result;
    }
    result &= 0x3FFF;
    result |= buf.readByte() << 14;
    if (result >= 0) {
      return result;
    }
    result &= 0x1FFFFF;
    result |= buf.readByte() << 21;
    if (result >= 0) {
      return result;
    }
    result &= 0x0FFFFFFF;
    final byte b = buf.readByte();
    result |= b << 28;
    if (b >= 0) {
      return result;
    }
    throw new IllegalArgumentException("Not a 32 bit varint: " + result
                                       + " (5th byte: " + b + ")");
  }

}