// This file is part of OpenTSDB.
// Copyright (C) 2010-2012 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.core;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;
import com.stumbleupon.async.DeferredGroupException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.hbase.async.Bytes;
import org.hbase.async.ClientStats;
import org.hbase.async.DeleteRequest;
import org.hbase.async.GetRequest;
import org.hbase.async.HBaseClient;
import org.hbase.async.HBaseException;
import org.hbase.async.KeyValue;
import org.hbase.async.PutRequest;
import net.opentsdb.uid.UniqueId;
import net.opentsdb.stats.Histogram;
import net.opentsdb.stats.StatsCollector;
/**
* Thread-safe implementation of the TSDB client.
* <p>
* This class is the central class of OpenTSDB. You use it to add new data
* points or query the database.
*/
public final class TSDB {
static final byte[] FAMILY = { 't' };
private static final String METRICS_QUAL = "metrics";
private static final short METRICS_WIDTH = 3;
private static final String TAG_NAME_QUAL = "tagk";
private static final short TAG_NAME_WIDTH = 3;
private static final String TAG_VALUE_QUAL = "tagv";
private static final short TAG_VALUE_WIDTH = 3;
static final boolean enable_compactions;
static {
final String compactions = System.getProperty("tsd.feature.compactions");
enable_compactions = compactions != null && !"false".equals(compactions);
}
/** Client for the HBase cluster to use. */
final HBaseClient client;
/** Name of the table in which timeseries are stored. */
final byte[] table;
/** Unique IDs for the metric names. */
public final UniqueId metrics;
/** Unique IDs for the tag names. */
public final UniqueId tag_names;
/** Unique IDs for the tag values. */
public final UniqueId tag_values;
/**
* Row keys that need to be compacted.
* Whenever we write a new data point to a row, we add the row key to this
* set. Every once in a while, the compaction thread will go through old
* row keys and will read re-compact them.
*/
private final CompactionQueue compactionq;
/**
* Constructor.
* @param client The HBase client to use.
* @param timeseries_table The name of the HBase table where time series
* data is stored.
* @param uniqueids_table The name of the HBase table where the unique IDs
* are stored.
*/
public TSDB(final HBaseClient client,
final String timeseries_table,
final String uniqueids_table) {
this.client = client;
table = timeseries_table.getBytes();
final byte[] uidtable = uniqueids_table.getBytes();
metrics = new UniqueId(client, uidtable, METRICS_QUAL, METRICS_WIDTH);
tag_names = new UniqueId(client, uidtable, TAG_NAME_QUAL, TAG_NAME_WIDTH);
tag_values = new UniqueId(client, uidtable, TAG_VALUE_QUAL,
TAG_VALUE_WIDTH);
compactionq = new CompactionQueue(this);
}
/** Number of cache hits during lookups involving UIDs. */
public int uidCacheHits() {
return (metrics.cacheHits() + tag_names.cacheHits()
+ tag_values.cacheHits());
}
/** Number of cache misses during lookups involving UIDs. */
public int uidCacheMisses() {
return (metrics.cacheMisses() + tag_names.cacheMisses()
+ tag_values.cacheMisses());
}
/** Number of cache entries currently in RAM for lookups involving UIDs. */
public int uidCacheSize() {
return (metrics.cacheSize() + tag_names.cacheSize()
+ tag_values.cacheSize());
}
/**
* Collects the stats and metrics tracked by this instance.
* @param collector The collector to use.
*/
public void collectStats(final StatsCollector collector) {
collectUidStats(metrics, collector);
collectUidStats(tag_names, collector);
collectUidStats(tag_values, collector);
{
final Runtime runtime = Runtime.getRuntime();
collector.record("jvm.ramfree", runtime.freeMemory());
collector.record("jvm.ramused", runtime.totalMemory());
}
collector.addExtraTag("class", "IncomingDataPoints");
try {
collector.record("hbase.latency", IncomingDataPoints.putlatency, "method=put");
} finally {
collector.clearExtraTag("class");
}
collector.addExtraTag("class", "TsdbQuery");
try {
collector.record("hbase.latency", TsdbQuery.scanlatency, "method=scan");
} finally {
collector.clearExtraTag("class");
}
final ClientStats stats = client.stats();
collector.record("hbase.root_lookups", stats.rootLookups());
collector.record("hbase.meta_lookups",
stats.uncontendedMetaLookups(), "type=uncontended");
collector.record("hbase.meta_lookups",
stats.contendedMetaLookups(), "type=contended");
collector.record("hbase.rpcs",
stats.atomicIncrements(), "type=increment");
collector.record("hbase.rpcs", stats.deletes(), "type=delete");
collector.record("hbase.rpcs", stats.gets(), "type=get");
collector.record("hbase.rpcs", stats.puts(), "type=put");
collector.record("hbase.rpcs", stats.rowLocks(), "type=rowLock");
collector.record("hbase.rpcs", stats.scannersOpened(), "type=openScanner");
collector.record("hbase.rpcs", stats.scans(), "type=scan");
collector.record("hbase.rpcs.batched", stats.numBatchedRpcSent());
collector.record("hbase.flushes", stats.flushes());
collector.record("hbase.connections.created", stats.connectionsCreated());
collector.record("hbase.nsre", stats.noSuchRegionExceptions());
collector.record("hbase.nsre.rpcs_delayed",
stats.numRpcDelayedDueToNSRE());
compactionq.collectStats(collector);
}
/** Returns a latency histogram for Put RPCs used to store data points. */
public Histogram getPutLatencyHistogram() {
return IncomingDataPoints.putlatency;
}
/** Returns a latency histogram for Scan RPCs used to fetch data points. */
public Histogram getScanLatencyHistogram() {
return TsdbQuery.scanlatency;
}
/**
* Collects the stats for a {@link UniqueId}.
* @param uid The instance from which to collect stats.
* @param collector The collector to use.
*/
private static void collectUidStats(final UniqueId uid,
final StatsCollector collector) {
collector.record("uid.cache-hit", uid.cacheHits(), "kind=" + uid.kind());
collector.record("uid.cache-miss", uid.cacheMisses(), "kind=" + uid.kind());
collector.record("uid.cache-size", uid.cacheSize(), "kind=" + uid.kind());
}
/**
* Returns a new {@link Query} instance suitable for this TSDB.
*/
public Query newQuery() {
return new TsdbQuery(this);
}
/**
* Returns a new {@link WritableDataPoints} instance suitable for this TSDB.
* <p>
* If you want to add a single data-point, consider using {@link #addPoint}
* instead.
*/
public WritableDataPoints newDataPoints() {
return new IncomingDataPoints(this);
}
/**
* Adds a single integer value data point in the TSDB.
* @param metric A non-empty string.
* @param timestamp The timestamp associated with the value.
* @param value The value of the data point.
* @param tags The tags on this series. This map must be non-empty.
* @return A deferred object that indicates the completion of the request.
* The {@link Object} has not special meaning and can be {@code null} (think
* of it as {@code Deferred<Void>}). But you probably want to attach at
* least an errback to this {@code Deferred} to handle failures.
* @throws IllegalArgumentException if the timestamp is less than or equal
* to the previous timestamp added or 0 for the first timestamp, or if the
* difference with the previous timestamp is too large.
* @throws IllegalArgumentException if the metric name is empty or contains
* illegal characters.
* @throws IllegalArgumentException if the tags list is empty or one of the
* elements contains illegal characters.
* @throws HBaseException (deferred) if there was a problem while persisting
* data.
*/
public Deferred<Object> addPoint(final String metric,
final long timestamp,
final long value,
final Map<String, String> tags) {
final short flags = 0x7; // An int stored on 8 bytes.
return addPointInternal(metric, timestamp, Bytes.fromLong(value),
tags, flags);
}
/**
* Adds a single floating-point value data point in the TSDB.
* @param metric A non-empty string.
* @param timestamp The timestamp associated with the value.
* @param value The value of the data point.
* @param tags The tags on this series. This map must be non-empty.
* @return A deferred object that indicates the completion of the request.
* The {@link Object} has not special meaning and can be {@code null} (think
* of it as {@code Deferred<Void>}). But you probably want to attach at
* least an errback to this {@code Deferred} to handle failures.
* @throws IllegalArgumentException if the timestamp is less than or equal
* to the previous timestamp added or 0 for the first timestamp, or if the
* difference with the previous timestamp is too large.
* @throws IllegalArgumentException if the metric name is empty or contains
* illegal characters.
* @throws IllegalArgumentException if the value is NaN or infinite.
* @throws IllegalArgumentException if the tags list is empty or one of the
* elements contains illegal characters.
* @throws HBaseException (deferred) if there was a problem while persisting
* data.
*/
public Deferred<Object> addPoint(final String metric,
final long timestamp,
final float value,
final Map<String, String> tags) {
if (Float.isNaN(value) || Float.isInfinite(value)) {
throw new IllegalArgumentException("value is NaN or Infinite: " + value
+ " for metric=" + metric
+ " timestamp=" + timestamp);
}
final short flags = Const.FLAG_FLOAT | 0x3; // A float stored on 4 bytes.
return addPointInternal(metric, timestamp,
Bytes.fromInt(Float.floatToRawIntBits(value)),
tags, flags);
}
private Deferred<Object> addPointInternal(final String metric,
final long timestamp,
final byte[] value,
final Map<String, String> tags,
final short flags) {
if ((timestamp & 0xFFFFFFFF00000000L) != 0) {
// => timestamp < 0 || timestamp > Integer.MAX_VALUE
throw new IllegalArgumentException((timestamp < 0 ? "negative " : "bad")
+ " timestamp=" + timestamp
+ " when trying to add value=" + Arrays.toString(value) + '/' + flags
+ " to metric=" + metric + ", tags=" + tags);
}
IncomingDataPoints.checkMetricAndTags(metric, tags);
final byte[] row = IncomingDataPoints.rowKeyTemplate(this, metric, tags);
final long base_time = (timestamp - (timestamp % Const.MAX_TIMESPAN));
Bytes.setInt(row, (int) base_time, metrics.width());
scheduleForCompaction(row, (int) base_time);
final short qualifier = (short) ((timestamp - base_time) << Const.FLAG_BITS
| flags);
final PutRequest point = new PutRequest(table, row, FAMILY,
Bytes.fromShort(qualifier), value);
// TODO(tsuna): Add a callback to time the latency of HBase and store the
// timing in a moving Histogram (once we have a class for this).
return client.put(point);
}
/**
* Forces a flush of any un-committed in memory data.
* <p>
* For instance, any data point not persisted will be sent to HBase.
* @return A {@link Deferred} that will be called once all the un-committed
* data has been successfully and durably stored. The value of the deferred
* object return is meaningless and unspecified, and can be {@code null}.
* @throws HBaseException (deferred) if there was a problem sending
* un-committed data to HBase. Please refer to the {@link HBaseException}
* hierarchy to handle the possible failures. Some of them are easily
* recoverable by retrying, some are not.
*/
public Deferred<Object> flush() throws HBaseException {
return client.flush();
}
/**
* Gracefully shuts down this instance.
* <p>
* This does the same thing as {@link #flush} and also releases all other
* resources.
* @return A {@link Deferred} that will be called once all the un-committed
* data has been successfully and durably stored, and all resources used by
* this instance have been released. The value of the deferred object
* return is meaningless and unspecified, and can be {@code null}.
* @throws HBaseException (deferred) if there was a problem sending
* un-committed data to HBase. Please refer to the {@link HBaseException}
* hierarchy to handle the possible failures. Some of them are easily
* recoverable by retrying, some are not.
*/
public Deferred<Object> shutdown() {
final class HClientShutdown implements Callback<Object, ArrayList<Object>> {
public Object call(final ArrayList<Object> args) {
return client.shutdown();
}
public String toString() {
return "shutdown HBase client";
}
}
final class ShutdownErrback implements Callback<Object, Exception> {
public Object call(final Exception e) {
final Logger LOG = LoggerFactory.getLogger(ShutdownErrback.class);
if (e instanceof DeferredGroupException) {
final DeferredGroupException ge = (DeferredGroupException) e;
for (final Object r : ge.results()) {
if (r instanceof Exception) {
LOG.error("Failed to flush the compaction queue", (Exception) r);
}
}
} else {
LOG.error("Failed to flush the compaction queue", e);
}
return client.shutdown();
}
public String toString() {
return "shutdown HBase client after error";
}
}
// First flush the compaction queue, then shutdown the HBase client.
return enable_compactions
? compactionq.flush().addCallbacks(new HClientShutdown(),
new ShutdownErrback())
: client.shutdown();
}
/**
* Given a prefix search, returns a few matching metric names.
* @param search A prefix to search.
*/
public List<String> suggestMetrics(final String search) {
return metrics.suggest(search);
}
/**
* Given a prefix search, returns a few matching tag names.
* @param search A prefix to search.
*/
public List<String> suggestTagNames(final String search) {
return tag_names.suggest(search);
}
/**
* Given a prefix search, returns a few matching tag values.
* @param search A prefix to search.
*/
public List<String> suggestTagValues(final String search) {
return tag_values.suggest(search);
}
/**
* Discards all in-memory caches.
* @since 1.1
*/
public void dropCaches() {
metrics.dropCaches();
tag_names.dropCaches();
tag_values.dropCaches();
}
// ------------------ //
// Compaction helpers //
// ------------------ //
final KeyValue compact(final ArrayList<KeyValue> row) {
return compactionq.compact(row);
}
/**
* Schedules the given row key for later re-compaction.
* Once this row key has become "old enough", we'll read back all the data
* points in that row, write them back to HBase in a more compact fashion,
* and delete the individual data points.
* @param row The row key to re-compact later. Will not be modified.
* @param base_time The 32-bit unsigned UNIX timestamp.
*/
final void scheduleForCompaction(final byte[] row, final int base_time) {
if (enable_compactions) {
compactionq.add(row);
}
}
// ------------------------ //
// HBase operations helpers //
// ------------------------ //
/** Gets the entire given row from the data table. */
final Deferred<ArrayList<KeyValue>> get(final byte[] key) {
return client.get(new GetRequest(table, key));
}
/** Puts the given value into the data table. */
final Deferred<Object> put(final byte[] key,
final byte[] qualifier,
final byte[] value) {
return client.put(new PutRequest(table, key, FAMILY, qualifier, value));
}
/** Deletes the given cells from the data table. */
final Deferred<Object> delete(final byte[] key, final byte[][] qualifiers) {
return client.delete(new DeleteRequest(table, key, FAMILY, qualifiers));
}
}