// This file is part of OpenTSDB. // Copyright (C) 2011-2012 The OpenTSDB Authors. // // This program is free software: you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 2.1 of the License, or (at your // option) any later version. This program is distributed in the hope that it // will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser // General Public License for more details. You should have received a copy // of the GNU Lesser General Public License along with this program. If not, // see <http://www.gnu.org/licenses/>. package net.opentsdb.core; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.Iterator; import java.util.List; import java.util.PriorityQueue; import java.util.concurrent.ConcurrentSkipListMap; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import com.stumbleupon.async.Callback; import com.stumbleupon.async.Deferred; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.hbase.async.Bytes; import org.hbase.async.HBaseRpc; import org.hbase.async.KeyValue; import org.hbase.async.PleaseThrottleException; import net.opentsdb.meta.Annotation; import net.opentsdb.stats.StatsCollector; import net.opentsdb.utils.JSON; /** * "Queue" of rows to compact. * <p> * Whenever we write a data point to HBase, the row key we write to is added * to this queue, which is effectively a sorted set. There is a separate * thread that periodically goes through the queue and look for "old rows" to * compact. A row is considered "old" if the timestamp in the row key is * older than a certain threshold. * <p> * The compaction process consists in reading all the cells within a given row * and writing them back out as a single big cell. Once that writes succeeds, * we delete all the individual little cells. * <p> * This process is effective because in HBase the row key is repeated for * every single cell. And because there is no way to efficiently append bytes * at the end of a cell, we have to do this instead. */ final class CompactionQueue extends ConcurrentSkipListMap<byte[], Boolean> { private static final Logger LOG = LoggerFactory.getLogger(CompactionQueue.class); /** * How many items are currently in the queue. * Because {@link ConcurrentSkipListMap#size} has O(N) complexity. */ private final AtomicInteger size = new AtomicInteger(); private final AtomicLong duplicates_different = new AtomicLong(); private final AtomicLong duplicates_same = new AtomicLong(); private final AtomicLong compaction_count = new AtomicLong(); private final AtomicLong written_cells = new AtomicLong(); private final AtomicLong deleted_cells = new AtomicLong(); /** The {@code TSDB} instance we belong to. */ private final TSDB tsdb; /** On how many bytes do we encode metrics IDs. */ private final short metric_width; /** How frequently the compaction thread wakes up to flush stuff. */ private final int flush_interval; // seconds /** Minimum number of rows we'll attempt to compact at once. */ private final int min_flush_threshold; // rows /** Maximum number of rows we'll compact concurrently. */ private final int max_concurrent_flushes; // rows /** If this is X then we'll flush X times faster than we really need. */ private final int flush_speed; // multiplicative factor /** * Constructor. * @param tsdb The TSDB we belong to. */ public CompactionQueue(final TSDB tsdb) { super(new Cmp(tsdb)); this.tsdb = tsdb; metric_width = tsdb.metrics.width(); flush_interval = tsdb.config.getInt("tsd.storage.compaction.flush_interval"); min_flush_threshold = tsdb.config.getInt("tsd.storage.compaction.min_flush_threshold"); max_concurrent_flushes = tsdb.config.getInt("tsd.storage.compaction.max_concurrent_flushes"); flush_speed = tsdb.config.getInt("tsd.storage.compaction.flush_speed"); if (tsdb.config.enable_compactions()) { startCompactionThread(); } } @Override public int size() { return size.get(); } public void add(final byte[] row) { if (super.put(row, Boolean.TRUE) == null) { size.incrementAndGet(); // We added a new entry, count it. } } /** * Forces a flush of the all old entries in the compaction queue. * @return A deferred that will be called back once everything has been * flushed (or something failed, in which case the deferred will carry the * exception). In case of success, the kind of object returned is * unspecified. */ public Deferred<ArrayList<Object>> flush() { final int size = size(); if (size > 0) { LOG.info("Flushing all old outstanding rows out of " + size + " rows"); } final long now = System.currentTimeMillis(); return flush(now / 1000 - Const.MAX_TIMESPAN - 1, Integer.MAX_VALUE); } /** * Collects the stats and metrics tracked by this instance. * @param collector The collector to use. */ void collectStats(final StatsCollector collector) { collector.record("compaction.count", compaction_count); collector.record("compaction.duplicates", duplicates_same, "type=identical"); collector.record("compaction.duplicates", duplicates_different, "type=variant"); if (!tsdb.config.enable_compactions()) { return; } // The remaining stats only make sense with compactions enabled. collector.record("compaction.queue.size", size); collector.record("compaction.errors", handle_read_error.errors, "rpc=read"); collector.record("compaction.errors", handle_write_error.errors, "rpc=put"); collector.record("compaction.errors", handle_delete_error.errors, "rpc=delete"); collector.record("compaction.writes", written_cells); collector.record("compaction.deletes", deleted_cells); } /** * Flushes all the rows in the compaction queue older than the cutoff time. * @param cut_off A UNIX timestamp in seconds (unsigned 32-bit integer). * @param maxflushes How many rows to flush off the queue at once. * This integer is expected to be strictly positive. * @return A deferred that will be called back once everything has been * flushed. */ private Deferred<ArrayList<Object>> flush(final long cut_off, int maxflushes) { assert maxflushes > 0: "maxflushes must be > 0, but I got " + maxflushes; // We can't possibly flush more entries than size(). maxflushes = Math.min(maxflushes, size()); if (maxflushes == 0) { // Because size() might be 0. return Deferred.fromResult(new ArrayList<Object>(0)); } final ArrayList<Deferred<Object>> ds = new ArrayList<Deferred<Object>>(Math.min(maxflushes, max_concurrent_flushes)); int nflushes = 0; int seed = (int) (System.nanoTime() % 3); for (final byte[] row : this.keySet()) { if (maxflushes == 0) { break; } if (seed == row.hashCode() % 3) { continue; } final long base_time = Bytes.getUnsignedInt(row, Const.SALT_WIDTH() + metric_width); if (base_time > cut_off) { break; } else if (nflushes == max_concurrent_flushes) { // We kicked off the compaction of too many rows already, let's wait // until they're done before kicking off more. break; } // You'd think that it would be faster to grab an iterator on the map // and then call remove() on the iterator to "unlink" the element // directly from where the iterator is at, but no, the JDK implements // it by calling remove(key) so it has to lookup the key again anyway. if (super.remove(row) == null) { // We didn't remove anything. continue; // So someone else already took care of this entry. } nflushes++; maxflushes--; size.decrementAndGet(); ds.add(tsdb.get(row).addCallbacks(compactcb, handle_read_error)); } final Deferred<ArrayList<Object>> group = Deferred.group(ds); if (nflushes == max_concurrent_flushes && maxflushes > 0) { // We're not done yet. Once this group of flushes completes, we need // to kick off more. tsdb.getClient().flush(); // Speed up this batch by telling the client to flush. final int maxflushez = maxflushes; // Make it final for closure. final class FlushMoreCB implements Callback<Deferred<ArrayList<Object>>, ArrayList<Object>> { @Override public Deferred<ArrayList<Object>> call(final ArrayList<Object> arg) { return flush(cut_off, maxflushez); } @Override public String toString() { return "Continue flushing with cut_off=" + cut_off + ", maxflushes=" + maxflushez; } } group.addCallbackDeferring(new FlushMoreCB()); } return group; } private final CompactCB compactcb = new CompactCB(); /** * Callback to compact a row once it's been read. * <p> * This is used once the "get" completes, to actually compact the row and * write back the compacted version. */ private final class CompactCB implements Callback<Object, ArrayList<KeyValue>> { @Override public Object call(final ArrayList<KeyValue> row) { return compact(row, null); } @Override public String toString() { return "compact"; } } /** * Compacts a row into a single {@link KeyValue}. * @param row The row containing all the KVs to compact. * Must contain at least one element. * @return A compacted version of this row. */ KeyValue compact(final ArrayList<KeyValue> row, List<Annotation> annotations) { final KeyValue[] compacted = { null }; compact(row, compacted, annotations); return compacted[0]; } /** * Maintains state for a single compaction; exists to break the steps down into manageable * pieces without having to worry about returning multiple values and passing many parameters * around. * * @since 2.1 */ private class Compaction { // parameters for the compaction private final ArrayList<KeyValue> row; private final KeyValue[] compacted; private final List<Annotation> annotations; private final int nkvs; // keeps a list of KeyValues to be deleted private final List<KeyValue> to_delete; // heap of columns, ordered by increasing timestamp private PriorityQueue<ColumnDatapointIterator> heap; // true if any ms-resolution datapoints have been seen in the column private boolean ms_in_row; // true if any s-resolution datapoints have been seen in the column private boolean s_in_row; // KeyValue containing the longest qualifier for the datapoint, used to optimize // checking if the compacted qualifier already exists. private KeyValue longest; // the latest append column. If set then we don't want to re-write the row // and if we only had a single column with a single value, we return this. private KeyValue last_append_column; public Compaction(ArrayList<KeyValue> row, KeyValue[] compacted, List<Annotation> annotations) { nkvs = row.size(); this.row = row; this.compacted = compacted; this.annotations = annotations; to_delete = new ArrayList<KeyValue>(nkvs); } /** * Check if there are no fixups or merges required. This will be the case when: * <ul> * <li>there are no columns in the heap</li> * <li>there is only one single-valued column needing no fixups</li> * </ul> * * @return true if we know no additional work is required */ private boolean noMergesOrFixups() { switch (heap.size()) { case 0: // no data points, nothing to do return true; case 1: // only one column, check to see if it needs fixups ColumnDatapointIterator col = heap.peek(); // either a 2-byte qualifier or one 4-byte ms qualifier, and no fixups required return (col.qualifier.length == 2 || (col.qualifier.length == 4 && Internal.inMilliseconds(col.qualifier))) && !col.needsFixup(); default: // more than one column, need to merge return false; } } /** * Perform the compaction. * * @return A {@link Deferred} if the compaction processed required a write * to HBase, otherwise {@code null}. */ public Deferred<Object> compact() { // no columns in row, nothing to do if (nkvs == 0) { return null; } // go through all the columns, process annotations, and heap = new PriorityQueue<ColumnDatapointIterator>(nkvs); int tot_values = buildHeapProcessAnnotations(); // if there are no datapoints or only one that needs no fixup, we are done if (noMergesOrFixups()) { // return the single non-annotation entry if requested if (compacted != null && heap.size() == 1) { compacted[0] = findFirstDatapointColumn(); } return null; } // merge the datapoints, ordered by timestamp and removing duplicates final ByteBufferList compacted_qual = new ByteBufferList(tot_values); final ByteBufferList compacted_val = new ByteBufferList(tot_values); compaction_count.incrementAndGet(); mergeDatapoints(compacted_qual, compacted_val); // if we wound up with no data in the compacted column, we are done if (compacted_qual.segmentCount() == 0) { return null; } // build the compacted columns final KeyValue compact = buildCompactedColumn(compacted_qual, compacted_val); final boolean write = updateDeletesCheckForWrite(compact); if (compacted != null) { // Caller is interested in the compacted form. compacted[0] = compact; final long base_time = Bytes.getUnsignedInt(compact.key(), Const.SALT_WIDTH() + metric_width); final long cut_off = System.currentTimeMillis() / 1000 - Const.MAX_TIMESPAN - 1; if (base_time > cut_off) { // If row is too recent... return null; // ... Don't write back compacted. } } // if compactions aren't enabled or there is nothing to write, we're done if (!tsdb.config.enable_compactions() || (!write && to_delete.isEmpty())) { return null; } final byte[] key = compact.key(); //LOG.debug("Compacting row " + Arrays.toString(key)); deleted_cells.addAndGet(to_delete.size()); // We're going to delete this. if (write) { written_cells.incrementAndGet(); Deferred<Object> deferred = tsdb.put(key, compact.qualifier(), compact.value()); if (!to_delete.isEmpty()) { deferred = deferred.addCallbacks(new DeleteCompactedCB(to_delete), handle_write_error); } return deferred; } else if (last_append_column == null) { // We had nothing to write, because one of the cells is already the // correctly compacted version, so we can go ahead and delete the // individual cells directly. new DeleteCompactedCB(to_delete).call(null); return null; } else { return null; } } /** * Find the first datapoint column in a row. It may be an appended column * * @return the first found datapoint column in the row, or null if none */ private KeyValue findFirstDatapointColumn() { if (last_append_column != null) { return last_append_column; } for (final KeyValue kv : row) { if (isDatapoint(kv)) { return kv; } } return null; } /** * Build a heap of columns containing datapoints. Assumes that non-datapoint columns are * never merged. Adds datapoint columns to the list of rows to be deleted. * * @return an estimate of the number of total values present, which may be high */ private int buildHeapProcessAnnotations() { int tot_values = 0; for (final KeyValue kv : row) { byte[] qual = kv.qualifier(); int len = qual.length; if ((len & 1) != 0) { // process annotations and other extended formats if (qual[0] == Annotation.PREFIX()) { annotations.add(JSON.parseToObject(kv.value(), Annotation.class)); } else if (qual[0] == AppendDataPoints.APPEND_COLUMN_PREFIX){ final AppendDataPoints adp = new AppendDataPoints(); tot_values += adp.parseKeyValue(tsdb, kv).size(); last_append_column = new KeyValue(kv.key(), kv.family(), adp.qualifier(), kv.timestamp(), adp.value()); if (longest == null || longest.qualifier().length < last_append_column.qualifier().length) { longest = last_append_column; } final ColumnDatapointIterator col = new ColumnDatapointIterator(last_append_column); if (col.hasMoreData()) { heap.add(col); } } else { LOG.warn("Ignoring unexpected extended format type " + qual[0]); } continue; } // estimate number of points based on the size of the first entry // in the column; if ms/sec datapoints are mixed, this will be // incorrect, which will cost a reallocation/copy final int entry_size = Internal.inMilliseconds(qual) ? 4 : 2; tot_values += (len + entry_size - 1) / entry_size; if (longest == null || longest.qualifier().length < kv.qualifier().length) { longest = kv; } ColumnDatapointIterator col = new ColumnDatapointIterator(kv); if (col.hasMoreData()) { heap.add(col); } to_delete.add(kv); } return tot_values; } /** * Process datapoints from the heap in order, merging into a sorted list. Handles duplicates * by keeping the most recent (based on HBase column timestamps; if duplicates in the ) * * @param compacted_qual qualifiers for sorted datapoints * @param compacted_val values for sorted datapoints */ private void mergeDatapoints(ByteBufferList compacted_qual, ByteBufferList compacted_val) { int prevTs = -1; while (!heap.isEmpty()) { final ColumnDatapointIterator col = heap.remove(); final int ts = col.getTimestampOffsetMs(); if (ts == prevTs) { // check to see if it is a complete duplicate, or if the value changed final byte[] existingVal = compacted_val.getLastSegment(); final byte[] discardedVal = col.getCopyOfCurrentValue(); if (!Arrays.equals(existingVal, discardedVal)) { duplicates_different.incrementAndGet(); if (!tsdb.config.fix_duplicates()) { throw new IllegalDataException("Duplicate timestamp for key=" + Arrays.toString(row.get(0).key()) + ", ms_offset=" + ts + ", older=" + Arrays.toString(existingVal) + ", newer=" + Arrays.toString(discardedVal) + "; set tsd.storage.fix_duplicates=true to fix automatically or run Fsck"); } LOG.warn("Duplicate timestamp for key=" + Arrays.toString(row.get(0).key()) + ", ms_offset=" + ts + ", kept=" + Arrays.toString(existingVal) + ", discarded=" + Arrays.toString(discardedVal)); } else { duplicates_same.incrementAndGet(); } } else { prevTs = ts; col.writeToBuffers(compacted_qual, compacted_val); ms_in_row |= col.isMilliseconds(); s_in_row |= !col.isMilliseconds(); } if (col.advance()) { // there is still more data in this column, so add it back to the heap heap.add(col); } } } /** * Build the compacted column from the list of byte buffers that were * merged together. * * @param compacted_qual list of merged qualifiers * @param compacted_val list of merged values * * @return {@link KeyValue} instance for the compacted column */ private KeyValue buildCompactedColumn(ByteBufferList compacted_qual, ByteBufferList compacted_val) { // metadata is a single byte for a multi-value column, otherwise nothing final int metadata_length = compacted_val.segmentCount() > 1 ? 1 : 0; final byte[] cq = compacted_qual.toBytes(0); final byte[] cv = compacted_val.toBytes(metadata_length); // add the metadata flag, which right now only includes whether we mix s/ms datapoints if (metadata_length > 0) { byte metadata_flag = 0; if (ms_in_row && s_in_row) { metadata_flag |= Const.MS_MIXED_COMPACT; } cv[cv.length - 1] = metadata_flag; } final KeyValue first = row.get(0); return new KeyValue(first.key(), first.family(), cq, cv); } /** * Make sure we don't delete the row that is the result of the compaction, so we * remove the compacted value from the list of values to delete if it is there. * Also, if one or more columns were appends then we don't want to mess with * the row for now. * * @param compact the compacted column * @return true if we need to write the compacted value */ private boolean updateDeletesCheckForWrite(KeyValue compact) { if (last_append_column != null) { // TODO appends are involved so we may want to squash dps into the // append or vice-versa. return false; } // if the longest entry isn't as long as the compacted one, obviously the compacted // one can't have already existed if (longest != null && longest.qualifier().length >= compact.qualifier().length) { final Iterator<KeyValue> deleteIterator = to_delete.iterator(); while (deleteIterator.hasNext()) { final KeyValue cur = deleteIterator.next(); if (Arrays.equals(cur.qualifier(), compact.qualifier())) { // the compacted row already existed, so remove from the list to delete deleteIterator.remove(); // if the key and value are the same, we don't need to write it return !Arrays.equals(cur.value(), compact.value()); } } } return true; } } /** * Check if a particular column is a datapoint column (as opposed to annotation or other * extended formats). * * @param kv column to check * @return true if the column represents one or more datapoint */ protected static boolean isDatapoint(KeyValue kv) { return (kv.qualifier().length & 1) == 0; } /** * Compacts a row into a single {@link KeyValue}. * <p> * If the {@code row} is empty, this function does literally nothing. * If {@code compacted} is not {@code null}, then the compacted form of this * {@code row} will be stored in {@code compacted[0]}. Obviously, if the * {@code row} contains a single cell, then that cell is the compacted form. * Otherwise the compaction process takes places. * @param row The row containing all the KVs to compact. Must be non-null. * @param compacted If non-null, the first item in the array will be set to * a {@link KeyValue} containing the compacted form of this row. * If non-null, we will also not write the compacted form back to HBase * unless the timestamp in the row key is old enough. * @param annotations supplied list which will have all encountered * annotations added to it. * @return A {@link Deferred} if the compaction processed required a write * to HBase, otherwise {@code null}. */ Deferred<Object> compact(final ArrayList<KeyValue> row, final KeyValue[] compacted, List<Annotation> annotations) { return new Compaction(row, compacted, annotations).compact(); } /** * Callback to delete a row that's been successfully compacted. */ private final class DeleteCompactedCB implements Callback<Object, Object> { /** What we're going to delete. */ private final byte[] key; private final byte[][] qualifiers; public DeleteCompactedCB(final List<KeyValue> cells) { final KeyValue first = cells.get(0); key = first.key(); qualifiers = new byte[cells.size()][]; for (int i = 0; i < qualifiers.length; i++) { qualifiers[i] = cells.get(i).qualifier(); } } @Override public Object call(final Object arg) { return tsdb.delete(key, qualifiers).addErrback(handle_delete_error); } @Override public String toString() { return "delete compacted cells"; } } private final HandleErrorCB handle_read_error = new HandleErrorCB("read"); private final HandleErrorCB handle_write_error = new HandleErrorCB("write"); private final HandleErrorCB handle_delete_error = new HandleErrorCB("delete"); /** * Callback to handle exceptions during the compaction process. */ private final class HandleErrorCB implements Callback<Object, Exception> { private volatile int errors; private final String what; /** * Constructor. * @param what String describing what kind of operation (e.g. "read"). */ public HandleErrorCB(final String what) { this.what = what; } @Override public Object call(final Exception e) { if (e instanceof PleaseThrottleException) { // HBase isn't keeping up. final HBaseRpc rpc = ((PleaseThrottleException) e).getFailedRpc(); if (rpc instanceof HBaseRpc.HasKey) { // We failed to compact this row. Whether it's because of a failed // get, put or delete, we should re-schedule this row for a future // compaction. add(((HBaseRpc.HasKey) rpc).key()); return Boolean.TRUE; // We handled it, so don't return an exception. } else { // Should never get in this clause. LOG.error("WTF? Cannot retry this RPC, and this shouldn't happen: " + rpc); } } // `++' is not atomic but doesn't matter if we miss some increments. if (++errors % 100 == 1) { // Basic rate-limiting to not flood logs. LOG.error("Failed to " + what + " a row to re-compact", e); } return e; } @Override public String toString() { return "handle " + what + " error"; } } static final long serialVersionUID = 1307386642; /** Starts a compaction thread. Only one such thread is needed. */ private void startCompactionThread() { final Thrd thread = new Thrd(); thread.setDaemon(true); thread.start(); } /** * Background thread to trigger periodic compactions. */ final class Thrd extends Thread { public Thrd() { super("CompactionThread"); } @Override public void run() { while (true) { try { final int size = size(); // Flush if we have too many rows to recompact. // Note that in we might not be able to actually // flush anything if the rows aren't old enough. if (size > min_flush_threshold) { // How much should we flush during this iteration? This scheme is // adaptive and flushes at a rate that is proportional to the size // of the queue, so we flush more aggressively if the queue is big. // Let's suppose MAX_TIMESPAN = 1h. We have `size' rows to compact, // and we better compact them all in less than 1h, otherwise we're // going to "fall behind" when after a new hour starts (as we'll be // inserting a ton of new rows then). So slice MAX_TIMESPAN using // FLUSH_INTERVAL to compute what fraction of `size' we need to // flush at each iteration. Note that `size' will usually account // for many rows that can't be flushed yet (not old enough) so we're // overshooting a bit (flushing more aggressively than necessary). // This isn't a problem at all. The only thing that matters is that // the rate at which we flush stuff is proportional to how much work // is sitting in the queue. The multiplicative factor FLUSH_SPEED // is added to make flush even faster than we need. For example, if // FLUSH_SPEED is 2, then instead of taking 1h to flush what we have // for the previous hour, we'll take only 30m. This is desirable so // that we evict old entries from the queue a bit faster. final int maxflushes = Math.max(min_flush_threshold, size * flush_interval * flush_speed / Const.MAX_TIMESPAN); final long now = System.currentTimeMillis(); flush(now / 1000 - Const.MAX_TIMESPAN - 1, maxflushes); if (LOG.isDebugEnabled()) { final int newsize = size(); LOG.debug("flush() took " + (System.currentTimeMillis() - now) + "ms, new queue size=" + newsize + " (" + (newsize - size) + ')'); } } } catch (Exception e) { LOG.error("Uncaught exception in compaction thread", e); } catch (OutOfMemoryError e) { // Let's free up some memory by throwing away the compaction queue. final int sz = size.get(); CompactionQueue.super.clear(); size.set(0); LOG.error("Discarded the compaction queue, size=" + sz, e); } catch (Throwable e) { LOG.error("Uncaught *Throwable* in compaction thread", e); // Catching this kind of error is totally unexpected and is really // bad. If we do nothing and let this thread die, we'll run out of // memory as new entries are added to the queue. We could always // commit suicide, but it's kind of drastic and nothing else in the // code does this. If `enable_compactions' wasn't final, we could // always set it to false, but that's not an option. So in order to // try to get a fresh start, let this compaction thread terminate // and spin off a new one instead. try { Thread.sleep(1000); // Avoid busy looping creating new threads. } catch (InterruptedException i) { LOG.error("Compaction thread interrupted in error handling", i); return; // Don't flush, we're truly hopeless. } startCompactionThread(); return; } try { Thread.sleep(flush_interval * 1000); } catch (InterruptedException e) { LOG.error("Compaction thread interrupted, doing one last flush", e); flush(); return; } } } } /** * Helper to sort the byte arrays in the compaction queue. * <p> * This comparator sorts things by timestamp first, this way we can find * all rows of the same age at once. */ private static final class Cmp implements Comparator<byte[]> { /** The position with which the timestamp of metric starts. */ private final short timestamp_pos; public Cmp(final TSDB tsdb) { timestamp_pos = (short) (Const.SALT_WIDTH() + tsdb.metrics.width()); } @Override public int compare(final byte[] a, final byte[] b) { final int c = Bytes.memcmp(a, b, timestamp_pos, Const.TIMESTAMP_BYTES); // If the timestamps are equal, sort according to the entire row key. return c != 0 ? c : Bytes.memcmp(a, b); } } }