// This file is part of OpenTSDB. // Copyright (C) 2010-2012 The OpenTSDB Authors. // // This program is free software: you can redistribute it and/or modify it // under the terms of the GNU Lesser General Public License as published by // the Free Software Foundation, either version 2.1 of the License, or (at your // option) any later version. This program is distributed in the hope that it // will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty // of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser // General Public License for more details. You should have received a copy // of the GNU Lesser General Public License along with this program. If not, // see <http://www.gnu.org/licenses/>. package net.opentsdb.core; import java.util.ArrayList; import java.util.Arrays; import java.util.Collections; import java.util.Date; import java.util.List; import java.util.Map; import com.stumbleupon.async.Callback; import com.stumbleupon.async.Deferred; import org.hbase.async.AppendRequest; import org.hbase.async.Bytes; import org.hbase.async.PutRequest; import org.hbase.async.Bytes.ByteMap; import net.opentsdb.meta.Annotation; import net.opentsdb.stats.Histogram; /** * Receives new data points and stores them in HBase. */ final class IncomingDataPoints implements WritableDataPoints { /** For how long to buffer edits when doing batch imports (in ms). */ private static final short DEFAULT_BATCH_IMPORT_BUFFER_INTERVAL = 5000; /** * Keep track of the latency (in ms) we perceive sending edits to HBase. We * want buckets up to 16s, with 2 ms interval between each bucket up to 100 ms * after we which we switch to exponential buckets. */ static final Histogram putlatency = new Histogram(16000, (short) 2, 100); /** The {@code TSDB} instance we belong to. */ private final TSDB tsdb; /** * The row key. Optional salt + 3 bytes for the metric name, 4 bytes for * the base timestamp, 6 bytes per tag (3 for the name, 3 for the value). */ private byte[] row; /** * Qualifiers for individual data points. The last Const.FLAG_BITS bits are * used to store flags (the type of the data point - integer or floating point * - and the size of the data point in bytes). The remaining MSBs store a * delta in seconds from the base timestamp stored in the row key. */ private short[] qualifiers; /** Each value in the row. */ private long[] values; /** Track the last timestamp written for this series */ private long last_ts; /** Number of data points in this row. */ private short size; /** Are we doing a batch import? */ private boolean batch_import; /** The metric for this time series */ private String metric; /** Copy of the tags given us by the caller */ private Map<String, String> tags; /** * Constructor. * * @param tsdb * The TSDB we belong to. */ IncomingDataPoints(final TSDB tsdb) { this.tsdb = tsdb; // the qualifiers and values were meant for pre-compacting the rows. We // could implement this later, but for now we don't need to track the values // as they'll just consume space during an import // this.qualifiers = new short[3]; // this.values = new long[3]; } /** * Validates the given metric and tags. * * @throws IllegalArgumentException * if any of the arguments aren't valid. */ static void checkMetricAndTags(final String metric, final Map<String, String> tags) { if (tags.size() <= 0) { throw new IllegalArgumentException("Need at least one tag (metric=" + metric + ", tags=" + tags + ')'); } else if (tags.size() > Const.MAX_NUM_TAGS()) { throw new IllegalArgumentException("Too many tags: " + tags.size() + " maximum allowed: " + Const.MAX_NUM_TAGS() + ", tags: " + tags); } Tags.validateString("metric name", metric); for (final Map.Entry<String, String> tag : tags.entrySet()) { Tags.validateString("tag name", tag.getKey()); Tags.validateString("tag value", tag.getValue()); } } /** * Returns a partially initialized row key for this metric and these tags. The * only thing left to fill in is the base timestamp. */ static byte[] rowKeyTemplate(final TSDB tsdb, final String metric, final Map<String, String> tags) { final short metric_width = tsdb.metrics.width(); final short tag_name_width = tsdb.tag_names.width(); final short tag_value_width = tsdb.tag_values.width(); final short num_tags = (short) tags.size(); int row_size = (Const.SALT_WIDTH() + metric_width + Const.TIMESTAMP_BYTES + tag_name_width * num_tags + tag_value_width * num_tags); final byte[] row = new byte[row_size]; short pos = (short) Const.SALT_WIDTH(); copyInRowKey(row, pos, (tsdb.config.auto_metric() ? tsdb.metrics.getOrCreateId(metric) : tsdb.metrics.getId(metric))); pos += metric_width; pos += Const.TIMESTAMP_BYTES; for (final byte[] tag : Tags.resolveOrCreateAll(tsdb, tags)) { copyInRowKey(row, pos, tag); pos += tag.length; } return row; } /** * Returns a partially initialized row key for this metric and these tags. The * only thing left to fill in is the base timestamp. * * @since 2.0 */ static Deferred<byte[]> rowKeyTemplateAsync(final TSDB tsdb, final String metric, final Map<String, String> tags) { final short metric_width = tsdb.metrics.width(); final short tag_name_width = tsdb.tag_names.width(); final short tag_value_width = tsdb.tag_values.width(); final short num_tags = (short) tags.size(); int row_size = (Const.SALT_WIDTH() + metric_width + Const.TIMESTAMP_BYTES + tag_name_width * num_tags + tag_value_width * num_tags); final byte[] row = new byte[row_size]; // Lookup or create the metric ID. final Deferred<byte[]> metric_id; if (tsdb.config.auto_metric()) { metric_id = tsdb.metrics.getOrCreateIdAsync(metric, metric, tags); } else { metric_id = tsdb.metrics.getIdAsync(metric); } // Copy the metric ID at the beginning of the row key. class CopyMetricInRowKeyCB implements Callback<byte[], byte[]> { public byte[] call(final byte[] metricid) { copyInRowKey(row, (short) Const.SALT_WIDTH(), metricid); return row; } } // Copy the tag IDs in the row key. class CopyTagsInRowKeyCB implements Callback<Deferred<byte[]>, ArrayList<byte[]>> { public Deferred<byte[]> call(final ArrayList<byte[]> tags) { short pos = (short) (Const.SALT_WIDTH() + metric_width); pos += Const.TIMESTAMP_BYTES; for (final byte[] tag : tags) { copyInRowKey(row, pos, tag); pos += tag.length; } // Once we've resolved all the tags, schedule the copy of the metric // ID and return the row key we produced. return metric_id.addCallback(new CopyMetricInRowKeyCB()); } } // Kick off the resolution of all tags. return Tags.resolveOrCreateAllAsync(tsdb, metric, tags) .addCallbackDeferring(new CopyTagsInRowKeyCB()); } public void setSeries(final String metric, final Map<String, String> tags) { checkMetricAndTags(metric, tags); try { row = rowKeyTemplate(tsdb, metric, tags); RowKey.prefixKeyWithSalt(row); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never happen", e); } this.metric = metric; this.tags = tags; size = 0; } /** * Copies the specified byte array at the specified offset in the row key. * * @param row * The row key into which to copy the bytes. * @param offset * The offset in the row key to start writing at. * @param bytes * The bytes to copy. */ private static void copyInRowKey(final byte[] row, final short offset, final byte[] bytes) { System.arraycopy(bytes, 0, row, offset, bytes.length); } /** * Updates the base time in the row key. * * @param timestamp * The timestamp from which to derive the new base time. * @return The updated base time. */ private long updateBaseTime(final long timestamp) { // We force the starting timestamp to be on a MAX_TIMESPAN boundary // so that all TSDs create rows with the same base time. Otherwise // we'd need to coordinate TSDs to avoid creating rows that cover // overlapping time periods. final long base_time = timestamp - (timestamp % Const.MAX_TIMESPAN); // Clone the row key since we're going to change it. We must clone it // because the HBase client may still hold a reference to it in its // internal datastructures. row = Arrays.copyOf(row, row.length); Bytes.setInt(row, (int) base_time, Const.SALT_WIDTH() + tsdb.metrics.width()); RowKey.prefixKeyWithSalt(row); // in case the timestamp will be involved in // salting later tsdb.scheduleForCompaction(row, (int) base_time); return base_time; } /** * Implements {@link #addPoint} by storing a value with a specific flag. * * @param timestamp * The timestamp to associate with the value. * @param value * The value to store. * @param flags * Flags to store in the qualifier (size and type of the data point). * @return A deferred object that indicates the completion of the request. */ private Deferred<Object> addPointInternal(final long timestamp, final byte[] value, final short flags) { if (row == null) { throw new IllegalStateException("setSeries() never called!"); } final boolean ms_timestamp = (timestamp & Const.SECOND_MASK) != 0; // we only accept unix epoch timestamps in seconds or milliseconds if (timestamp < 0 || (ms_timestamp && timestamp > 9999999999999L)) { throw new IllegalArgumentException((timestamp < 0 ? "negative " : "bad") + " timestamp=" + timestamp + " when trying to add value=" + Arrays.toString(value) + " to " + this); } // always maintain last_ts in milliseconds if ((ms_timestamp ? timestamp : timestamp * 1000) <= last_ts) { throw new IllegalArgumentException("New timestamp=" + timestamp + " is less than or equal to previous=" + last_ts + " when trying to add value=" + Arrays.toString(value) + " to " + this); } /** Callback executed for chaining filter calls to see if the value * should be written or not. */ final class WriteCB implements Callback<Deferred<Object>, Boolean> { @Override public Deferred<Object> call(final Boolean allowed) throws Exception { if (!allowed) { return Deferred.fromResult(null); } last_ts = (ms_timestamp ? timestamp : timestamp * 1000); long base_time = baseTime(); long incoming_base_time; if (ms_timestamp) { // drop the ms timestamp to seconds to calculate the base timestamp incoming_base_time = ((timestamp / 1000) - ((timestamp / 1000) % Const.MAX_TIMESPAN)); } else { incoming_base_time = (timestamp - (timestamp % Const.MAX_TIMESPAN)); } if (incoming_base_time - base_time >= Const.MAX_TIMESPAN) { // Need to start a new row as we've exceeded Const.MAX_TIMESPAN. base_time = updateBaseTime((ms_timestamp ? timestamp / 1000 : timestamp)); } // Java is so stupid with its auto-promotion of int to float. final byte[] qualifier = Internal.buildQualifier(timestamp, flags); // TODO(tsuna): The following timing is rather useless. First of all, // the histogram never resets, so it tends to converge to a certain // distribution and never changes. What we really want is a moving // histogram so we can see how the latency distribution varies over time. // The other problem is that the Histogram class isn't thread-safe and // here we access it from a callback that runs in an unknown thread, so // we might miss some increments. So let's comment this out until we // have a proper thread-safe moving histogram. // final long start_put = System.nanoTime(); // final Callback<Object, Object> cb = new Callback<Object, Object>() { // public Object call(final Object arg) { // putlatency.add((int) ((System.nanoTime() - start_put) / 1000000)); // return arg; // } // public String toString() { // return "time put request"; // } // }; // TODO(tsuna): Add an errback to handle some error cases here. if (tsdb.getConfig().enable_appends()) { final AppendDataPoints kv = new AppendDataPoints(qualifier, value); final AppendRequest point = new AppendRequest(tsdb.table, row, TSDB.FAMILY, AppendDataPoints.APPEND_COLUMN_QUALIFIER, kv.getBytes()); point.setDurable(!batch_import); return tsdb.client.append(point);/* .addBoth(cb) */ } else { final PutRequest point = new PutRequest(tsdb.table, row, TSDB.FAMILY, qualifier, value); point.setDurable(!batch_import); return tsdb.client.put(point)/* .addBoth(cb) */; } } @Override public String toString() { return "IncomingDataPoints.addPointInternal Write Callback"; } } if (tsdb.getTSfilter() != null && tsdb.getTSfilter().filterDataPoints()) { return tsdb.getTSfilter().allowDataPoint(metric, timestamp, value, tags, flags) .addCallbackDeferring(new WriteCB()); } return Deferred.fromResult(true).addCallbackDeferring(new WriteCB()); } private void grow() { // We can't have more than 1 value per second, so MAX_TIMESPAN values. final int new_size = Math.min(size * 2, Const.MAX_TIMESPAN); if (new_size == size) { throw new AssertionError("Can't grow " + this + " larger than " + size); } values = Arrays.copyOf(values, new_size); qualifiers = Arrays.copyOf(qualifiers, new_size); } /** Extracts the base timestamp from the row key. */ private long baseTime() { return Bytes.getUnsignedInt(row, Const.SALT_WIDTH() + tsdb.metrics.width()); } public Deferred<Object> addPoint(final long timestamp, final long value) { final byte[] v; if (Byte.MIN_VALUE <= value && value <= Byte.MAX_VALUE) { v = new byte[] { (byte) value }; } else if (Short.MIN_VALUE <= value && value <= Short.MAX_VALUE) { v = Bytes.fromShort((short) value); } else if (Integer.MIN_VALUE <= value && value <= Integer.MAX_VALUE) { v = Bytes.fromInt((int) value); } else { v = Bytes.fromLong(value); } final short flags = (short) (v.length - 1); // Just the length. return addPointInternal(timestamp, v, flags); } public Deferred<Object> addPoint(final long timestamp, final float value) { if (Float.isNaN(value) || Float.isInfinite(value)) { throw new IllegalArgumentException("value is NaN or Infinite: " + value + " for timestamp=" + timestamp); } final short flags = Const.FLAG_FLOAT | 0x3; // A float stored on 4 bytes. return addPointInternal(timestamp, Bytes.fromInt(Float.floatToRawIntBits(value)), flags); } public void setBufferingTime(final short time) { if (time < 0) { throw new IllegalArgumentException("negative time: " + time); } tsdb.client.setFlushInterval(time); } public void setBatchImport(final boolean batchornot) { if (batch_import == batchornot) { return; } final long current_interval = tsdb.client.getFlushInterval(); if (batchornot) { batch_import = true; // If we already were given a larger interval, don't override it. if (DEFAULT_BATCH_IMPORT_BUFFER_INTERVAL > current_interval) { setBufferingTime(DEFAULT_BATCH_IMPORT_BUFFER_INTERVAL); } } else { batch_import = false; // If we're using the default batch import buffer interval, // revert back to 0. if (current_interval == DEFAULT_BATCH_IMPORT_BUFFER_INTERVAL) { setBufferingTime((short) 0); } } } public String metricName() { try { return metricNameAsync().joinUninterruptibly(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never be here", e); } } public Deferred<String> metricNameAsync() { if (row == null) { throw new IllegalStateException( "The row key was null, setSeries was not called."); } final byte[] id = Arrays.copyOfRange( row, Const.SALT_WIDTH(), tsdb.metrics.width() + Const.SALT_WIDTH()); return tsdb.metrics.getNameAsync(id); } @Override public byte[] metricUID() { return Arrays.copyOfRange(row, Const.SALT_WIDTH(), Const.SALT_WIDTH() + TSDB.metrics_width()); } public Map<String, String> getTags() { try { return getTagsAsync().joinUninterruptibly(); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException("Should never be here", e); } } @Override public ByteMap<byte[]> getTagUids() { return Tags.getTagUids(row); } public Deferred<Map<String, String>> getTagsAsync() { return Tags.getTagsAsync(tsdb, row); } public List<String> getAggregatedTags() { return Collections.emptyList(); } public Deferred<List<String>> getAggregatedTagsAsync() { final List<String> empty = Collections.emptyList(); return Deferred.fromResult(empty); } @Override public List<byte[]> getAggregatedTagUids() { return Collections.emptyList(); } public List<String> getTSUIDs() { return Collections.emptyList(); } public List<Annotation> getAnnotations() { return null; } public int size() { return size; } public int aggregatedSize() { return 0; } public SeekableView iterator() { return new DataPointsIterator(this); } /** * @throws IndexOutOfBoundsException * if {@code i} is out of bounds. */ private void checkIndex(final int i) { if (i > size) { throw new IndexOutOfBoundsException("index " + i + " > " + size + " for this=" + this); } if (i < 0) { throw new IndexOutOfBoundsException("negative index " + i + " for this=" + this); } } private static short delta(final short qualifier) { return (short) ((qualifier & 0xFFFF) >>> Const.FLAG_BITS); } public long timestamp(final int i) { checkIndex(i); return baseTime() + (delta(qualifiers[i]) & 0xFFFF); } public boolean isInteger(final int i) { checkIndex(i); return (qualifiers[i] & Const.FLAG_FLOAT) == 0x0; } public long longValue(final int i) { // Don't call checkIndex(i) because isInteger(i) already calls it. if (isInteger(i)) { return values[i]; } throw new ClassCastException("value #" + i + " is not a long in " + this); } public double doubleValue(final int i) { // Don't call checkIndex(i) because isInteger(i) already calls it. if (!isInteger(i)) { return Float.intBitsToFloat((int) values[i]); } throw new ClassCastException("value #" + i + " is not a float in " + this); } /** Returns a human readable string representation of the object. */ public String toString() { // The argument passed to StringBuilder is a pretty good estimate of the // length of the final string based on the row key and number of elements. final String metric = metricName(); final StringBuilder buf = new StringBuilder(80 + metric.length() + row.length * 4 + size * 16); final long base_time = baseTime(); buf.append("IncomingDataPoints(") .append(row == null ? "<null>" : Arrays.toString(row)) .append(" (metric=").append(metric).append("), base_time=") .append(base_time).append(" (") .append(base_time > 0 ? new Date(base_time * 1000) : "no date") .append("), ["); for (short i = 0; i < size; i++) { buf.append('+').append(delta(qualifiers[i])); if (isInteger(i)) { buf.append(":long(").append(longValue(i)); } else { buf.append(":float(").append(doubleValue(i)); } buf.append(')'); if (i != size - 1) { buf.append(", "); } } buf.append("])"); return buf.toString(); } @Override public Deferred<Object> persist() { return Deferred.fromResult((Object) null); } public int getQueryIndex() { throw new UnsupportedOperationException("Not mapped to a query"); } }