/* * Copyright (C) 2010-2012 The Async HBase Authors. All rights reserved. * This file is part of Async HBase. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: * - Redistributions of source code must retain the above copyright notice, * this list of conditions and the following disclaimer. * - Redistributions in binary form must reproduce the above copyright notice, * this list of conditions and the following disclaimer in the documentation * and/or other materials provided with the distribution. * - Neither the name of the StumbleUpon nor the names of its contributors * may be used to endorse or promote products derived from this software * without specific prior written permission. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. */ package org.hbase.async; import java.util.Comparator; import java.util.Arrays; import com.google.protobuf.InvalidProtocolBufferException; import com.google.protobuf.ByteString; import org.jboss.netty.buffer.ChannelBuffer; import org.jboss.netty.buffer.ChannelBuffers; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.hbase.async.generated.HBasePB; import static org.hbase.async.HBaseClient.EMPTY_ARRAY; /** * Stores basic information about a region. */ final class RegionInfo implements Comparable<RegionInfo> { private static final Logger LOG = LoggerFactory.getLogger(RegionInfo.class); private final byte[] table; // The region name is of the form: // table_name,start_key,timestamp[.MD5.] // So it contains the start_key. private final byte[] region_name; private final byte[] stop_key; /** * Constructor. */ public RegionInfo(final byte[] table, final byte[] region_name, final byte[] stop_key) { this.table = table; this.region_name = region_name; if (stop_key.length == 0) { this.stop_key = EMPTY_ARRAY; } else { this.stop_key = stop_key; } } /** Returns the name of the table this region belongs to. */ public byte[] table() { return table; } /** Returns the name of the region. */ public byte[] name() { return region_name; } /** Returns the stop key (exclusive) of this region. */ public byte[] stopKey() { return stop_key; } /** * Returns the protobuf representation of this region. */ HBasePB.RegionSpecifier toProtobuf() { return HBasePB.RegionSpecifier.newBuilder() .setType(HBasePB.RegionSpecifier.RegionSpecifierType.REGION_NAME) .setValue(ByteString.copyFrom(region_name)) .build(); } /** * Creates a new {@link RegionInfo} from a META {@link KeyValue}. * @param kv The {@link KeyValue} to use, which is assumed to be from * the cell {@code info:regioninfo} of a {@code .META.} region. * @param out_start_key A (@code {new byte[1][]}). * The start row of the region will be stored in {@code out_start_key[0]}. * Think "pointer-to-pointer" in Java (yeah!). * @return A newly created {@link RegionInfo}. * If calling {@link #table} on the object returned gives a reference to * {@link HBaseClient#EMPTY_ARRAY}, then the META entry indicates that the * region has been split (and thus this entry shouldn't be used). * @throws RegionOfflineException if the META entry indicates that the * region is offline. * @throws BrokenMetaException if the {@link KeyValue} seems invalid. */ static RegionInfo fromKeyValue(final KeyValue kv, final byte[][] out_start_key) { switch (kv.value()[0]) { case 0: // pre 0.92 -- fall through. case 1: // 0.92 to 0.94 return deserializeOldRegionInfo(kv, out_start_key); case 80: // 0.95+ return deserializeProtobufRegionInfo(kv, out_start_key); default: throw new IllegalStateException("Unsupported region info version: " + kv.value()[0] + " in .META. entry: " + kv); } } /** * Creates a new {@link RegionInfo} from a pre-0.95 META {@link KeyValue}. */ private static RegionInfo deserializeOldRegionInfo(final KeyValue kv, final byte[][] out_start_key) { final ChannelBuffer buf = ChannelBuffers.wrappedBuffer(kv.value()); buf.readByte(); // Skip the version. // version 1 was introduced in HBase 0.92 (see HBASE-451). // The differences between v0 and v1 are irrelevant to us, // as we only look at the first few fields, and they didn't // change across these 2 versions. final byte[] stop_key = HBaseRpc.readByteArray(buf); final boolean offline = buf.readByte() != 0; final long region_id = buf.readLong(); final byte[] region_name = HBaseRpc.readByteArray(buf); // TODO(tsuna): Can we easily de-dup this array with another RegionInfo? byte[] table; try { table = tableFromRegionName(region_name); } catch (IllegalArgumentException e) { throw BrokenMetaException.badKV(null, "an `info:regioninfo' cell" + " has a " + e.getMessage(), kv); } final boolean split = buf.readByte() != 0; final byte[] start_key = HBaseRpc.readByteArray(buf); // Table description and hash code are left, but we don't care. if (LOG.isDebugEnabled()) { LOG.debug("Got " + Bytes.pretty(table) + "'s region [" + Bytes.pretty(start_key) + '-' + Bytes.pretty(stop_key) + ") offline=" + offline + ", region_id=" + region_id + ", region_name=" + Bytes.pretty(region_name) + ", split=" + split); } // RegionServers set both `offline' and `split' to `false' on the parent // region after it's been split. We normally don't expect to ever observe // such regions as any META lookup should find the new daughter regions. // But just in case, we make sure to not throw an exception in this case. if (offline && !split) { throw new RegionOfflineException(region_name); } // If the region has been split, we put a special marker instead of // the table name to indicate that this region has been split. final RegionInfo region = new RegionInfo(split ? EMPTY_ARRAY : table, region_name, stop_key); out_start_key[0] = start_key; return region; } /** * Creates a new {@link RegionInfo} from a 0.95+ META {@link KeyValue}. */ private static RegionInfo deserializeProtobufRegionInfo(final KeyValue kv, final byte[][] out_start_key) { final byte[] value = kv.value(); final int magic = Bytes.getInt(value); if (magic != HBaseClient.PBUF_MAGIC) { throw BrokenMetaException.badKV(null, "the magic number is invalid", kv); } final HBasePB.RegionInfo pb; try { pb = HBasePB.RegionInfo.PARSER.parseFrom(value, 4, value.length - 4); } catch (InvalidProtocolBufferException e) { throw new BrokenMetaException("Failed to decode " + Bytes.pretty(value), e); } final byte[] region_id = Long.toString(pb.getRegionId()).getBytes(); final byte[] table = Bytes.get(pb.getTableName().getQualifier()); final byte[] start_key = Bytes.get(pb.getStartKey()); final byte[] stop_key = Bytes.get(pb.getEndKey()); final byte[] region_name = kv.key(); final boolean offline = pb.getOffline(); final boolean split = pb.getSplit(); // XXX what to do with the `recovering' field? if (offline && !split) { throw new RegionOfflineException(region_name); } out_start_key[0] = start_key; return new RegionInfo(split ? EMPTY_ARRAY : table, region_name, stop_key); } /** * Given the name of a region, returns the name of the table it belongs to. * @throws IllegalArgumentException if the name of the region is malformed. */ static byte[] tableFromRegionName(final byte[] region_name) { int comma = 1; // Can't be at the beginning. for (/**/; comma < region_name.length; comma++) { if (region_name[comma] == ',') { break; } } if (comma == region_name.length) { throw new IllegalArgumentException("Malformed region name, contains no" + " comma: " + Bytes.pretty(region_name)); } return Arrays.copyOf(region_name, comma); } @Override public int compareTo(final RegionInfo other) { return Bytes.memcmp(region_name, other.region_name); } public boolean equals(final Object other) { if (other == null || !(other instanceof RegionInfo)) { return false; } return compareTo((RegionInfo) other) == 0; } public int hashCode() { return Arrays.hashCode(table) ^ Arrays.hashCode(region_name) ^ Arrays.hashCode(stop_key); } /** Returns a hint as to how many bytes are needed for {@link #toString}. */ int stringSizeHint() { return 48 // boilerplate + table.length + 2 // region_name and stop_key are likely to contain non-ascii characters, // so let's multiply its length by 2 to avoid re-allocations. + region_name.length * 2 + stop_key.length * 2; } public String toString() { final StringBuilder buf = new StringBuilder(stringSizeHint()); toStringbuf(buf); return buf.toString(); } /** Like {@link #toString} but puts the output in the given buffer. */ void toStringbuf(final StringBuilder buf) { buf.append("RegionInfo(table="); if (table == EMPTY_ARRAY) { buf.append("<NSRE marker>"); } else { Bytes.pretty(buf, table); } buf.append(", region_name="); Bytes.pretty(buf, region_name); buf.append(", stop_key="); Bytes.pretty(buf, stop_key); buf.append(')'); } /** Singleton to compare region names. */ static final RegionNameCmp REGION_NAME_CMP = new RegionNameCmp(); /** * Comparator for region names. * We can't just use {@link Bytes.MEMCMP} because it doesn't play nicely * with the way META keys are built as the first region has an empty start * key. Let's assume we know about those 2 regions in our cache: * <pre> * .META.,,1 * tableA,,1273018455182 * </pre> * We're given an RPC to execute on {@code tableA}, row {@code \000} (1 byte * row key containing a 0). If we use {@code memcmp} to sort the entries in * the cache, when we search for the entry right before {@code tableA,\000,:} * we'll erroneously find {@code .META.,,1} instead of the entry for first * region of {@code tableA}. * <p> * Since this scheme breaks natural ordering, we need this comparator to * implement a special version of {@code memcmp} to handle this scenario. */ private static final class RegionNameCmp implements Comparator<byte[]> { private RegionNameCmp() { // Can't instantiate outside of this class. } @Override public int compare(final byte[] a, final byte[] b) { final int length = Math.min(a.length, b.length); if (a == b) { // Do this after accessing a.length and b.length return 0; // in order to NPE if either a or b is null. } // Reminder: region names are of the form: // table_name,start_key,timestamp[.MD5.] // First compare the table names. int i; for (i = 0; i < length; i++) { final byte ai = a[i]; // Saves one pointer deference every iteration. final byte bi = b[i]; // Saves one pointer deference every iteration. if (ai != bi) { // The name of the tables differ. if (ai == ',') { return -1001; // `a' has a smaller table name. a < b } else if (bi == ',') { return 1001; // `b' has a smaller table name. a > b } return (ai & 0xFF) - (bi & 0xFF); // "promote" to unsigned. } if (ai == ',') { // Remember: at this point ai == bi. break; // We're done comparing the table names. They're equal. } } // Now find the last comma in both `a' and `b'. We need to start the // search from the end as the row key could have an arbitrary number of // commas and we don't know its length. final int a_comma = findCommaFromEnd(a, i); final int b_comma = findCommaFromEnd(b, i); // If either `a' or `b' is followed immediately by another comma, then // they are the first region (it's the empty start key). i++; // No need to check against `length', there MUST be more bytes. // Compare keys. final int first_comma = Math.min(a_comma, b_comma); for (/*nothing*/; i < first_comma; i++) { final byte ai = a[i]; final byte bi = b[i]; if (ai != bi) { // The keys differ. return (ai & 0xFF) - (bi & 0xFF); // "promote" to unsigned. } } if (a_comma < b_comma) { return -1002; // `a' has a shorter key. a < b } else if (b_comma < a_comma) { return 1002; // `b' has a shorter key. a > b } // Keys have the same length and have compared identical. Compare the // rest, which essentially means: use start code as a tie breaker. for (/*nothing*/; i < length; i++) { final byte ai = a[i]; final byte bi = b[i]; if (ai != bi) { // The start codes differ. return (ai & 0xFF) - (bi & 0xFF); // "promote" to unsigned. } } return a.length - b.length; } private static int findCommaFromEnd(final byte[] b, final int offset) { for (int i = b.length - 1; i > offset; i--) { if (b[i] == ',') { return i; } } throw new IllegalArgumentException("No comma found in " + Bytes.pretty(b) + " after offset " + offset); } } }