// This file is part of OpenTSDB. // Copyright (C) 2015 The OpenTSDB Authors. // // This program is free software: you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 2.1 of the License, or (at your // option) any later version. This program is distributed in the hope that it // will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser // General Public License for more details. You should have received a copy // of the GNU Lesser General Public License along with this program. If not, // see <http://www.gnu.org/licenses/>. package net.opentsdb.core; import java.util.Collection; import java.util.Map; import java.util.TreeMap; import net.opentsdb.core.Internal.Cell; import net.opentsdb.utils.DateTime; import org.hbase.async.Bytes; import org.hbase.async.KeyValue; import org.hbase.async.PutRequest; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.stumbleupon.async.Deferred; /** * A class that deals with serializing/deserializing appended data point columns. * In busy TSDB installs appends can save on storage at write time and network * bandwidth as TSD compactions are no longer necessary. Each data point is * concatenated to a byte array in storage. At query time, the values are ordered * and de-duped. Optionally the column can be re-written when out of order or * duplicates are detected. * NOTE: This will increase CPU usage on your HBase servers as it has to perform * the atomic read-modify-write operation on the column. * @since 2.2 */ public class AppendDataPoints { private static final Logger LOG = LoggerFactory.getLogger(AppendDataPoints.class); /** The prefix ID of append columns */ public static final byte APPEND_COLUMN_PREFIX = 0x05; /** The full column qualifier for append columns */ public static final byte[] APPEND_COLUMN_QUALIFIER = new byte[] { APPEND_COLUMN_PREFIX, 0x00, 0x00}; /** A threshold in seconds where we avoid writing repairs */ public static final int REPAIR_THRESHOLD = 3600; /** Filled with the qualifiers in the compacted data points format after parsing */ private byte[] qualifier; /** Filled with the values in the compacted data points format after parsing */ private byte[] value; /** A deferred that is set if a repaired column was sent to storage */ private Deferred<Object> repaired_deferred = null; /** * Default empty ctor */ public AppendDataPoints() { } /** * Creates a new AppendDataPoints object from a qualifier and value. You can * then call {@link #getBytes()} to write to TSDB. * @param qualifier The qualifier with the time offset, type and length flags. * @param value The value to append * @throws IllegalArgumentException if the qualifier or value is null or empty */ public AppendDataPoints(final byte[] qualifier, final byte[] value) { if (qualifier == null || qualifier.length < 1) { throw new IllegalArgumentException("Qualifier cannot be null or empty"); } if (value == null || value.length < 1) { throw new IllegalArgumentException("Value cannot be null or empty"); } this.qualifier = qualifier; this.value = value; } /** * Concatenates the qualifier and value for appending to a column in the * backing data store. * @return A byte array to append to the value of a column. */ public byte[] getBytes() { final byte[] bytes = new byte[qualifier.length + value.length]; System.arraycopy(this.qualifier, 0, bytes, 0, qualifier.length); System.arraycopy(value, 0, bytes, qualifier.length, value.length); return bytes; } /** * Parses a column from storage, orders and drops newer duplicate data points. * The parsing will return both a Cell collection for debugging and add * the cells to concatenated qualifier and value arrays in the compacted data * point format so that the results can be merged with other non-append * columns or rows. * <p> * WARNING: If the "tsd.core.repair_appends" config is set to true then this * method will issue puts against the database, overwriting the column with * sorted and de-duplicated data. It will only do this for rows that are at * least an hour old so as to avoid pounding current rows. * <p> * TODO (CL) - allow for newer or older data points depending on a config. * @param tsdb The TSDB to which we belong * @param kv The key value t parse * @throws IllegalArgumentException if the given KV is not an append column * or we were unable to parse the value. */ public final Collection<Cell> parseKeyValue(final TSDB tsdb, final KeyValue kv) { if (kv.qualifier().length != 3 || kv.qualifier()[0] != APPEND_COLUMN_PREFIX) { // it's really not an issue if the offset is not 0, maybe in the future // we'll support appends at different offsets. throw new IllegalArgumentException("Can not parse cell, it is not " + " an appended cell. It has a different qualifier " + Bytes.pretty(kv.qualifier()) + ", row key " + Bytes.pretty(kv.key())); } final boolean repair = tsdb.getConfig().repair_appends(); final long base_time; try { base_time = Internal.baseTime(tsdb, kv.key()); } catch (ArrayIndexOutOfBoundsException oob) { throw new IllegalDataException("Corrupted value: invalid row key: " + kv, oob); } int val_idx = 0; int val_length = 0; int qual_length = 0; int last_delta = -1; // Time delta, extracted from the qualifier. final Map<Integer, Internal.Cell> deltas = new TreeMap<Integer, Cell>(); boolean has_duplicates = false; boolean out_of_order = false; boolean needs_repair = false; try { while (val_idx < kv.value().length) { byte[] q = Internal.extractQualifier(kv.value(), val_idx); System.arraycopy(kv.value(), val_idx, q, 0, q.length); val_idx=val_idx + q.length; int vlen = Internal.getValueLengthFromQualifier(q, 0); byte[] v = new byte[vlen]; System.arraycopy(kv.value(), val_idx, v, 0, vlen); val_idx += vlen; int delta = Internal.getOffsetFromQualifier(q); final Cell duplicate = deltas.get(delta); if (duplicate != null) { // This is a duplicate cell, skip it has_duplicates = true; qual_length -= duplicate.qualifier.length; val_length -= duplicate.value.length; } qual_length += q.length; val_length += vlen; final Cell cell = new Cell(q, v); deltas.put(delta, cell); if (!out_of_order) { // Data points needs to be sorted if we find at least one out of // order data if (delta <= last_delta) { out_of_order = true; } last_delta = delta; } } } catch (ArrayIndexOutOfBoundsException oob) { throw new IllegalDataException("Corrupted value: couldn't break down" + " into individual values (consumed " + val_idx + " bytes, but was" + " expecting to consume " + (kv.value().length) + "): " + kv + ", cells so far: " + deltas.values(), oob); } if (has_duplicates || out_of_order) { if ((DateTime.currentTimeMillis() / 1000) - base_time > REPAIR_THRESHOLD) { needs_repair = true; } } // Check we consumed all the bytes of the value. if (val_idx != kv.value().length) { throw new IllegalDataException("Corrupted value: couldn't break down" + " into individual values (consumed " + val_idx + " bytes, but was" + " expecting to consume " + (kv.value().length) + "): " + kv + ", cells so far: " + deltas.values()); } val_idx = 0; int qual_idx = 0; byte[] healed_cell = null; int healed_index = 0; this.value = new byte[val_length]; this.qualifier = new byte[qual_length]; if (repair && needs_repair) { healed_cell = new byte[val_length+qual_length]; } for (final Cell cell: deltas.values()) { System.arraycopy(cell.qualifier, 0, this.qualifier, qual_idx, cell.qualifier.length); qual_idx += cell.qualifier.length; System.arraycopy(cell.value, 0, this.value, val_idx, cell.value.length); val_idx += cell.value.length; if (repair && needs_repair) { System.arraycopy(cell.qualifier, 0, healed_cell, healed_index, cell.qualifier.length); healed_index += cell.qualifier.length; System.arraycopy(cell.value, 0, healed_cell, healed_index, cell.value.length); healed_index += cell.value.length; } } if (repair && needs_repair) { LOG.debug("Repairing appended data column " + kv); final PutRequest put = new PutRequest(tsdb.table, kv.key(), TSDB.FAMILY(), kv.qualifier(), healed_cell); repaired_deferred = tsdb.getClient().put(put); } return deltas.values(); } /** @return the sorted qualifier in a compacted data point format after * {@link #parseKeyValue(TSDB, KeyValue)} has been called */ public byte[] qualifier() { return qualifier; } /** @return the sorted value in a compacted data point format after * {@link #parseKeyValue(TSDB, KeyValue)} has been called */ public byte[] value() { return value; } /** @return a deferred to wait on if the call to * {@link #parseKeyValue(TSDB, KeyValue)} triggered a put to storage. */ public Deferred<Object> repairedDeferred() { return repaired_deferred; } /** @return whether or not a qualifier of AppendDataPoints */ public static boolean isAppendDataPoints(byte[] qualifier) { return qualifier != null && qualifier.length == 3 && qualifier[0] == APPEND_COLUMN_PREFIX; } }