/** * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase.mapreduce; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.client.Scan; import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.io.Writable; import org.apache.hadoop.io.WritableUtils; import org.apache.hadoop.mapreduce.InputSplit; /** * A table split corresponds to a key range (low, high) and an optional scanner. * All references to row below refer to the key of the row. */ @InterfaceAudience.Public public class TableSplit extends InputSplit implements Writable, Comparable<TableSplit> { /** @deprecated LOG variable would be made private. fix in hbase 3.0 */ @Deprecated public static final Log LOG = LogFactory.getLog(TableSplit.class); // should be < 0 (@see #readFields(DataInput)) // version 1 supports Scan data member enum Version { UNVERSIONED(0), // Initial number we put on TableSplit when we introduced versioning. INITIAL(-1), // Added an encoded region name field for easier identification of split -> region WITH_ENCODED_REGION_NAME(-2); final int code; static final Version[] byCode; static { byCode = Version.values(); for (int i = 0; i < byCode.length; i++) { if (byCode[i].code != -1 * i) { throw new AssertionError("Values in this enum should be descending by one"); } } } Version(int code) { this.code = code; } boolean atLeast(Version other) { return code <= other.code; } static Version fromCode(int code) { return byCode[code * -1]; } } private static final Version VERSION = Version.WITH_ENCODED_REGION_NAME; private TableName tableName; private byte [] startRow; private byte [] endRow; private String regionLocation; private String encodedRegionName = ""; private String scan = ""; // stores the serialized form of the Scan private long length; // Contains estimation of region size in bytes /** Default constructor. */ public TableSplit() { this((TableName)null, null, HConstants.EMPTY_BYTE_ARRAY, HConstants.EMPTY_BYTE_ARRAY, ""); } /** * Creates a new instance while assigning all variables. * Length of region is set to 0 * Encoded name of the region is set to blank * * @param tableName The name of the current table. * @param scan The scan associated with this split. * @param startRow The start row of the split. * @param endRow The end row of the split. * @param location The location of the region. */ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, final String location) { this(tableName, scan, startRow, endRow, location, 0L); } /** * Creates a new instance while assigning all variables. * Encoded name of region is set to blank * * @param tableName The name of the current table. * @param scan The scan associated with this split. * @param startRow The start row of the split. * @param endRow The end row of the split. * @param location The location of the region. */ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, final String location, long length) { this(tableName, scan, startRow, endRow, location, "", length); } /** * Creates a new instance while assigning all variables. * * @param tableName The name of the current table. * @param scan The scan associated with this split. * @param startRow The start row of the split. * @param endRow The end row of the split. * @param encodedRegionName The region ID. * @param location The location of the region. */ public TableSplit(TableName tableName, Scan scan, byte [] startRow, byte [] endRow, final String location, final String encodedRegionName, long length) { this.tableName = tableName; try { this.scan = (null == scan) ? "" : TableMapReduceUtil.convertScanToString(scan); } catch (IOException e) { LOG.warn("Failed to convert Scan to String", e); } this.startRow = startRow; this.endRow = endRow; this.regionLocation = location; this.encodedRegionName = encodedRegionName; this.length = length; } /** * Creates a new instance without a scanner. * Length of region is set to 0 * * @param tableName The name of the current table. * @param startRow The start row of the split. * @param endRow The end row of the split. * @param location The location of the region. */ public TableSplit(TableName tableName, byte[] startRow, byte[] endRow, final String location) { this(tableName, null, startRow, endRow, location); } /** * Creates a new instance without a scanner. * * @param tableName The name of the current table. * @param startRow The start row of the split. * @param endRow The end row of the split. * @param location The location of the region. * @param length Size of region in bytes */ public TableSplit(TableName tableName, byte[] startRow, byte[] endRow, final String location, long length) { this(tableName, null, startRow, endRow, location, length); } /** * Returns a Scan object from the stored string representation. * * @return Returns a Scan object based on the stored scanner. * @throws IOException */ public Scan getScan() throws IOException { return TableMapReduceUtil.convertStringToScan(this.scan); } /** * Returns the table name converted to a byte array. * @see #getTable() * @return The table name. */ public byte [] getTableName() { return tableName.getName(); } /** * Returns the table name. * * @return The table name. */ public TableName getTable() { // It is ugly that usually to get a TableName, the method is called getTableName. We can't do // that in here though because there was an existing getTableName in place already since // deprecated. return tableName; } /** * Returns the start row. * * @return The start row. */ public byte [] getStartRow() { return startRow; } /** * Returns the end row. * * @return The end row. */ public byte [] getEndRow() { return endRow; } /** * Returns the region location. * * @return The region's location. */ public String getRegionLocation() { return regionLocation; } /** * Returns the region's location as an array. * * @return The array containing the region location. * @see org.apache.hadoop.mapreduce.InputSplit#getLocations() */ @Override public String[] getLocations() { return new String[] {regionLocation}; } /** * Returns the region's encoded name. * * @return The region's encoded name. */ public String getEncodedRegionName() { return encodedRegionName; } /** * Returns the length of the split. * * @return The length of the split. * @see org.apache.hadoop.mapreduce.InputSplit#getLength() */ @Override public long getLength() { return length; } /** * Reads the values of each field. * * @param in The input to read from. * @throws IOException When reading the input fails. */ @Override public void readFields(DataInput in) throws IOException { Version version = Version.UNVERSIONED; // TableSplit was not versioned in the beginning. // In order to introduce it now, we make use of the fact // that tableName was written with Bytes.writeByteArray, // which encodes the array length as a vint which is >= 0. // Hence if the vint is >= 0 we have an old version and the vint // encodes the length of tableName. // If < 0 we just read the version and the next vint is the length. // @see Bytes#readByteArray(DataInput) int len = WritableUtils.readVInt(in); if (len < 0) { // what we just read was the version version = Version.fromCode(len); len = WritableUtils.readVInt(in); } byte[] tableNameBytes = new byte[len]; in.readFully(tableNameBytes); tableName = TableName.valueOf(tableNameBytes); startRow = Bytes.readByteArray(in); endRow = Bytes.readByteArray(in); regionLocation = Bytes.toString(Bytes.readByteArray(in)); if (version.atLeast(Version.INITIAL)) { scan = Bytes.toString(Bytes.readByteArray(in)); } length = WritableUtils.readVLong(in); if (version.atLeast(Version.WITH_ENCODED_REGION_NAME)) { encodedRegionName = Bytes.toString(Bytes.readByteArray(in)); } } /** * Writes the field values to the output. * * @param out The output to write to. * @throws IOException When writing the values to the output fails. */ @Override public void write(DataOutput out) throws IOException { WritableUtils.writeVInt(out, VERSION.code); Bytes.writeByteArray(out, tableName.getName()); Bytes.writeByteArray(out, startRow); Bytes.writeByteArray(out, endRow); Bytes.writeByteArray(out, Bytes.toBytes(regionLocation)); Bytes.writeByteArray(out, Bytes.toBytes(scan)); WritableUtils.writeVLong(out, length); Bytes.writeByteArray(out, Bytes.toBytes(encodedRegionName)); } /** * Returns the details about this instance as a string. * * @return The values of this instance as a string. * @see java.lang.Object#toString() */ @Override public String toString() { StringBuilder sb = new StringBuilder(); sb.append("HBase table split("); sb.append("table name: ").append(tableName); // null scan input is represented by "" String printScan = ""; if (!scan.equals("")) { try { // get the real scan here in toString, not the Base64 string printScan = TableMapReduceUtil.convertStringToScan(scan).toString(); } catch (IOException e) { printScan = ""; } } sb.append(", scan: ").append(printScan); sb.append(", start row: ").append(Bytes.toStringBinary(startRow)); sb.append(", end row: ").append(Bytes.toStringBinary(endRow)); sb.append(", region location: ").append(regionLocation); sb.append(", encoded region name: ").append(encodedRegionName); sb.append(")"); return sb.toString(); } /** * Compares this split against the given one. * * @param split The split to compare to. * @return The result of the comparison. * @see java.lang.Comparable#compareTo(java.lang.Object) */ @Override public int compareTo(TableSplit split) { // If The table name of the two splits is the same then compare start row // otherwise compare based on table names int tableNameComparison = getTable().compareTo(split.getTable()); return tableNameComparison != 0 ? tableNameComparison : Bytes.compareTo( getStartRow(), split.getStartRow()); } @Override public boolean equals(Object o) { if (o == null || !(o instanceof TableSplit)) { return false; } return tableName.equals(((TableSplit)o).tableName) && Bytes.equals(startRow, ((TableSplit)o).startRow) && Bytes.equals(endRow, ((TableSplit)o).endRow) && regionLocation.equals(((TableSplit)o).regionLocation); } @Override public int hashCode() { int result = tableName != null ? tableName.hashCode() : 0; result = 31 * result + (scan != null ? scan.hashCode() : 0); result = 31 * result + (startRow != null ? Arrays.hashCode(startRow) : 0); result = 31 * result + (endRow != null ? Arrays.hashCode(endRow) : 0); result = 31 * result + (regionLocation != null ? regionLocation.hashCode() : 0); result = 31 * result + (encodedRegionName != null ? encodedRegionName.hashCode() : 0); return result; } }