/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.hadoop.hbase; import java.io.Serializable; import java.util.Comparator; import org.apache.hadoop.hbase.KeyValue.Type; import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.classification.InterfaceStability; import org.apache.hadoop.hbase.util.Bytes; import com.google.common.primitives.Longs; /** * Compare two HBase cells. Do not use this method comparing <code>-ROOT-</code> or * <code>hbase:meta</code> cells. Cells from these tables need a specialized comparator, one that * takes account of the special formatting of the row where we have commas to delimit table from * regionname, from row. See KeyValue for how it has a special comparator to do hbase:meta cells * and yet another for -ROOT-. */ @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="UNKNOWN", justification="Findbugs doesn't like the way we are negating the result of a compare in below") @InterfaceAudience.Private @InterfaceStability.Evolving public class CellComparator implements Comparator<Cell>, Serializable { private static final long serialVersionUID = -8760041766259623329L; @Override public int compare(Cell a, Cell b) { return compare(a, b, false); } /** * Compare cells. * TODO: Replace with dynamic rather than static comparator so can change comparator * implementation. * @param a * @param b * @param ignoreSequenceid True if we are to compare the key portion only and ignore * the sequenceid. Set to false to compare key and consider sequenceid. * @return 0 if equal, -1 if a < b, and +1 if a > b. */ public static int compare(final Cell a, final Cell b, boolean ignoreSequenceid) { // row int c = compareRows(a, b); if (c != 0) return c; c = compareWithoutRow(a, b); if(c != 0) return c; if (!ignoreSequenceid) { // Negate following comparisons so later edits show up first // mvccVersion: later sorts first return Longs.compare(b.getMvccVersion(), a.getMvccVersion()); } else { return c; } } public static int findCommonPrefixInRowPart(Cell left, Cell right, int rowCommonPrefix) { return findCommonPrefix(left.getRowArray(), right.getRowArray(), left.getRowLength() - rowCommonPrefix, right.getRowLength() - rowCommonPrefix, left.getRowOffset() + rowCommonPrefix, right.getRowOffset() + rowCommonPrefix); } private static int findCommonPrefix(byte[] left, byte[] right, int leftLength, int rightLength, int leftOffset, int rightOffset) { int length = Math.min(leftLength, rightLength); int result = 0; while (result < length && left[leftOffset + result] == right[rightOffset + result]) { result++; } return result; } public static int findCommonPrefixInFamilyPart(Cell left, Cell right, int familyCommonPrefix) { return findCommonPrefix(left.getFamilyArray(), right.getFamilyArray(), left.getFamilyLength() - familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix, left.getFamilyOffset() + familyCommonPrefix, right.getFamilyOffset() + familyCommonPrefix); } public static int findCommonPrefixInQualifierPart(Cell left, Cell right, int qualifierCommonPrefix) { return findCommonPrefix(left.getQualifierArray(), right.getQualifierArray(), left.getQualifierLength() - qualifierCommonPrefix, right.getQualifierLength() - qualifierCommonPrefix, left.getQualifierOffset() + qualifierCommonPrefix, right.getQualifierOffset() + qualifierCommonPrefix); } /**************** equals ****************************/ public static boolean equals(Cell a, Cell b){ return equalsRow(a, b) && equalsFamily(a, b) && equalsQualifier(a, b) && equalsTimestamp(a, b) && equalsType(a, b); } public static boolean equalsRow(Cell a, Cell b){ return Bytes.equals( a.getRowArray(), a.getRowOffset(), a.getRowLength(), b.getRowArray(), b.getRowOffset(), b.getRowLength()); } public static boolean equalsFamily(Cell a, Cell b){ return Bytes.equals( a.getFamilyArray(), a.getFamilyOffset(), a.getFamilyLength(), b.getFamilyArray(), b.getFamilyOffset(), b.getFamilyLength()); } public static boolean equalsQualifier(Cell a, Cell b){ return Bytes.equals( a.getQualifierArray(), a.getQualifierOffset(), a.getQualifierLength(), b.getQualifierArray(), b.getQualifierOffset(), b.getQualifierLength()); } public static boolean equalsTimestamp(Cell a, Cell b){ return a.getTimestamp() == b.getTimestamp(); } public static boolean equalsType(Cell a, Cell b){ return a.getTypeByte() == b.getTypeByte(); } public static int compareColumns(final Cell left, final Cell right) { int lfoffset = left.getFamilyOffset(); int rfoffset = right.getFamilyOffset(); int lclength = left.getQualifierLength(); int rclength = right.getQualifierLength(); int lfamilylength = left.getFamilyLength(); int rfamilylength = right.getFamilyLength(); int diff = compare(left.getFamilyArray(), lfoffset, lfamilylength, right.getFamilyArray(), rfoffset, rfamilylength); if (diff != 0) { return diff; } else { return compare(left.getQualifierArray(), left.getQualifierOffset(), lclength, right.getQualifierArray(), right.getQualifierOffset(), rclength); } } public static int compareFamilies(Cell left, Cell right) { return Bytes.compareTo(left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(), right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength()); } public static int compareQualifiers(Cell left, Cell right) { return Bytes.compareTo(left.getQualifierArray(), left.getQualifierOffset(), left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(), right.getQualifierLength()); } public int compareFlatKey(Cell left, Cell right) { int compare = compareRows(left, right); if (compare != 0) { return compare; } return compareWithoutRow(left, right); } /** * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash) * so can't be treated as plain byte arrays as this method does. */ public static int compareRows(final Cell left, final Cell right) { return Bytes.compareTo(left.getRowArray(), left.getRowOffset(), left.getRowLength(), right.getRowArray(), right.getRowOffset(), right.getRowLength()); } /** * Do not use comparing rows from hbase:meta. Meta table Cells have schema (table,startrow,hash) * so can't be treated as plain byte arrays as this method does. */ public static int compareRows(byte[] left, int loffset, int llength, byte[] right, int roffset, int rlength) { return Bytes.compareTo(left, loffset, llength, right, roffset, rlength); } public static int compareWithoutRow(final Cell leftCell, final Cell rightCell) { // If the column is not specified, the "minimum" key type appears the // latest in the sorted order, regardless of the timestamp. This is used // for specifying the last key/value in a given row, because there is no // "lexicographically last column" (it would be infinitely long). The // "maximum" key type does not need this behavior. // Copied from KeyValue. This is bad in that we can't do memcmp w/ special rules like this. // TODO if (leftCell.getFamilyLength() + leftCell.getQualifierLength() == 0 && leftCell.getTypeByte() == Type.Minimum.getCode()) { // left is "bigger", i.e. it appears later in the sorted order return 1; } if (rightCell.getFamilyLength() + rightCell.getQualifierLength() == 0 && rightCell.getTypeByte() == Type.Minimum.getCode()) { return -1; } boolean sameFamilySize = (leftCell.getFamilyLength() == rightCell.getFamilyLength()); if (!sameFamilySize) { // comparing column family is enough. return Bytes.compareTo(leftCell.getFamilyArray(), leftCell.getFamilyOffset(), leftCell.getFamilyLength(), rightCell.getFamilyArray(), rightCell.getFamilyOffset(), rightCell.getFamilyLength()); } int diff = compareColumns(leftCell, rightCell); if (diff != 0) return diff; diff = compareTimestamps(leftCell, rightCell); if (diff != 0) return diff; // Compare types. Let the delete types sort ahead of puts; i.e. types // of higher numbers sort before those of lesser numbers. Maximum (255) // appears ahead of everything, and minimum (0) appears after // everything. return (0xff & rightCell.getTypeByte()) - (0xff & leftCell.getTypeByte()); } public static int compareTimestamps(final Cell left, final Cell right) { long ltimestamp = left.getTimestamp(); long rtimestamp = right.getTimestamp(); return compareTimestamps(ltimestamp, rtimestamp); } /********************* hashCode ************************/ /** * Returns a hash code that is always the same for two Cells having a matching equals(..) result. * Currently does not guard against nulls, but it could if necessary. */ public static int hashCode(Cell cell){ if (cell == null) {// return 0 for empty Cell return 0; } int hash = calculateHashForKeyValue(cell); hash = 31 * hash + (int)cell.getMvccVersion(); return hash; } /** * Returns a hash code that is always the same for two Cells having a matching * equals(..) result. Currently does not guard against nulls, but it could if * necessary. Note : Ignore mvcc while calculating the hashcode * * @param cell * @return hashCode */ public static int hashCodeIgnoreMvcc(Cell cell) { if (cell == null) {// return 0 for empty Cell return 0; } int hash = calculateHashForKeyValue(cell); return hash; } private static int calculateHashForKeyValue(Cell cell) { //pre-calculate the 3 hashes made of byte ranges int rowHash = Bytes.hashCode(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); int familyHash = Bytes.hashCode(cell.getFamilyArray(), cell.getFamilyOffset(), cell.getFamilyLength()); int qualifierHash = Bytes.hashCode(cell.getQualifierArray(), cell.getQualifierOffset(), cell.getQualifierLength()); //combine the 6 sub-hashes int hash = 31 * rowHash + familyHash; hash = 31 * hash + qualifierHash; hash = 31 * hash + (int)cell.getTimestamp(); hash = 31 * hash + cell.getTypeByte(); return hash; } /******************** lengths *************************/ public static boolean areKeyLengthsEqual(Cell a, Cell b) { return a.getRowLength() == b.getRowLength() && a.getFamilyLength() == b.getFamilyLength() && a.getQualifierLength() == b.getQualifierLength(); } public static boolean areRowLengthsEqual(Cell a, Cell b) { return a.getRowLength() == b.getRowLength(); } /*********************common prefixes*************************/ private static int compare(byte[] left, int leftOffset, int leftLength, byte[] right, int rightOffset, int rightLength) { return Bytes.compareTo(left, leftOffset, leftLength, right, rightOffset, rightLength); } public static int compareCommonRowPrefix(Cell left, Cell right, int rowCommonPrefix) { return compare(left.getRowArray(), left.getRowOffset() + rowCommonPrefix, left.getRowLength() - rowCommonPrefix, right.getRowArray(), right.getRowOffset() + rowCommonPrefix, right.getRowLength() - rowCommonPrefix); } public static int compareCommonFamilyPrefix(Cell left, Cell right, int familyCommonPrefix) { return compare(left.getFamilyArray(), left.getFamilyOffset() + familyCommonPrefix, left.getFamilyLength() - familyCommonPrefix, right.getFamilyArray(), right.getFamilyOffset() + familyCommonPrefix, right.getFamilyLength() - familyCommonPrefix); } public static int compareCommonQualifierPrefix(Cell left, Cell right, int qualCommonPrefix) { return compare(left.getQualifierArray(), left.getQualifierOffset() + qualCommonPrefix, left.getQualifierLength() - qualCommonPrefix, right.getQualifierArray(), right.getQualifierOffset() + qualCommonPrefix, right.getQualifierLength() - qualCommonPrefix); } /***************** special cases ****************************/ /** * special case for KeyValue.equals */ public static boolean equalsIgnoreMvccVersion(Cell a, Cell b){ return 0 == compareStaticIgnoreMvccVersion(a, b); } private static int compareStaticIgnoreMvccVersion(Cell a, Cell b) { // row int c = compareRows(a, b); if (c != 0) return c; // family c = compareColumns(a, b); if (c != 0) return c; // timestamp: later sorts first c = compareTimestamps(a, b); if (c != 0) return c; //type c = (0xff & b.getTypeByte()) - (0xff & a.getTypeByte()); return c; } private static int compareTimestamps(final long ltimestamp, final long rtimestamp) { // The below older timestamps sorting ahead of newer timestamps looks // wrong but it is intentional. This way, newer timestamps are first // found when we iterate over a memstore and newer versions are the // first we trip over when reading from a store file. if (ltimestamp < rtimestamp) { return 1; } else if (ltimestamp > rtimestamp) { return -1; } return 0; } /** * Counter part for the KeyValue.RowOnlyComparator */ public static class RowComparator extends CellComparator { @Override public int compare(Cell a, Cell b) { return compareRows(a, b); } } /** * Try to return a Cell that falls between <code>left</code> and <code>right</code> but that is * shorter; i.e. takes up less space. This trick is used building HFile block index. * Its an optimization. It does not always work. In this case we'll just return the * <code>right</code> cell. * @param comparator Comparator to use. * @param left * @param right * @return A cell that sorts between <code>left</code> and <code>right</code>. */ public static Cell getMidpoint(final KeyValue.KVComparator comparator, final Cell left, final Cell right) { // TODO: Redo so only a single pass over the arrays rather than one to compare and then a // second composing midpoint. if (right == null) { throw new IllegalArgumentException("right cell can not be null"); } if (left == null) { return right; } // If Cells from meta table, don't mess around. meta table Cells have schema // (table,startrow,hash) so can't be treated as plain byte arrays. Just skip out without // trying to do this optimization. if (comparator != null && comparator instanceof KeyValue.MetaComparator) { return right; } int diff = compareRows(left, right); if (diff > 0) { throw new IllegalArgumentException("Left row sorts after right row; left=" + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); } if (diff < 0) { // Left row is < right row. byte [] midRow = getMinimumMidpointArray(left.getRowArray(), left.getRowOffset(), left.getRowLength(), right.getRowArray(), right.getRowOffset(), right.getRowLength()); // If midRow is null, just return 'right'. Can't do optimization. if (midRow == null) return right; return CellUtil.createCell(midRow); } // Rows are same. Compare on families. diff = compareFamilies(left, right); if (diff > 0) { throw new IllegalArgumentException("Left family sorts after right family; left=" + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); } if (diff < 0) { byte [] midRow = getMinimumMidpointArray(left.getFamilyArray(), left.getFamilyOffset(), left.getFamilyLength(), right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength()); // If midRow is null, just return 'right'. Can't do optimization. if (midRow == null) return right; // Return new Cell where we use right row and then a mid sort family. return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(), midRow, 0, midRow.length, HConstants.EMPTY_BYTE_ARRAY, 0, HConstants.EMPTY_BYTE_ARRAY.length); } // Families are same. Compare on qualifiers. diff = compareQualifiers(left, right); if (diff > 0) { throw new IllegalArgumentException("Left qualifier sorts after right qualifier; left=" + CellUtil.getCellKeyAsString(left) + ", right=" + CellUtil.getCellKeyAsString(right)); } if (diff < 0) { byte [] midRow = getMinimumMidpointArray(left.getQualifierArray(), left.getQualifierOffset(), left.getQualifierLength(), right.getQualifierArray(), right.getQualifierOffset(), right.getQualifierLength()); // If midRow is null, just return 'right'. Can't do optimization. if (midRow == null) return right; // Return new Cell where we use right row and family and then a mid sort qualifier. return CellUtil.createCell(right.getRowArray(), right.getRowOffset(), right.getRowLength(), right.getFamilyArray(), right.getFamilyOffset(), right.getFamilyLength(), midRow, 0, midRow.length); } // No opportunity for optimization. Just return right key. return right; } /** * @param leftArray * @param leftOffset * @param leftLength * @param rightArray * @param rightOffset * @param rightLength * @return Return a new array that is between left and right and minimally sized else just return * null as indicator that we could not create a mid point. */ private static byte [] getMinimumMidpointArray(final byte [] leftArray, final int leftOffset, final int leftLength, final byte [] rightArray, final int rightOffset, final int rightLength) { // rows are different int minLength = leftLength < rightLength ? leftLength : rightLength; short diffIdx = 0; while (diffIdx < minLength && leftArray[leftOffset + diffIdx] == rightArray[rightOffset + diffIdx]) { diffIdx++; } byte [] minimumMidpointArray = null; if (diffIdx >= minLength) { // leftKey's row is prefix of rightKey's. minimumMidpointArray = new byte[diffIdx + 1]; System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1); } else { int diffByte = leftArray[leftOffset + diffIdx]; if ((0xff & diffByte) < 0xff && (diffByte + 1) < (rightArray[rightOffset + diffIdx] & 0xff)) { minimumMidpointArray = new byte[diffIdx + 1]; System.arraycopy(leftArray, leftOffset, minimumMidpointArray, 0, diffIdx); minimumMidpointArray[diffIdx] = (byte) (diffByte + 1); } else { minimumMidpointArray = new byte[diffIdx + 1]; System.arraycopy(rightArray, rightOffset, minimumMidpointArray, 0, diffIdx + 1); } } return minimumMidpointArray; } }