// This file is part of OpenTSDB.
// Copyright (C) 2010-2012 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.core;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.hbase.async.Bytes;
import org.hbase.async.Bytes.ByteMap;
import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;
import net.opentsdb.meta.Annotation;
/**
* Groups multiple spans together and offers a dynamic "view" on them.
* <p>
* This is used for queries to the TSDB, where we might group multiple
* {@link Span}s that are for the same time series but different tags
* together. We need to "hide" data points that are outside of the
* time period of the query and do on-the-fly aggregation of the data
* points coming from the different Spans, using an {@link Aggregator}.
* Since not all the Spans will have their data points at exactly the
* same time, we also do on-the-fly linear interpolation. If needed,
* this view can also return the rate of change instead of the actual
* data points.
* <p>
* This is one of the rare (if not the only) implementations of
* {@link DataPoints} for which {@link #getTags} can potentially return
* an empty map.
* <p>
* The implementation can also dynamically downsample the data when a
* sampling interval a downsampling function (in the form of an
* {@link Aggregator}) are given. This is done by using a special
* iterator when using the {@link Span.DownsamplingIterator}.
*/
final class SpanGroup implements DataPoints {
/** Annotations */
private final ArrayList<Annotation> annotations;
/** Start time (UNIX timestamp in seconds or ms) on 32 bits ("unsigned" int). */
private final long start_time;
/** End time (UNIX timestamp in seconds or ms) on 32 bits ("unsigned" int). */
private final long end_time;
/**
* The tags of this group.
* This is the intersection set between the tags of all the Spans
* in this group.
* @see #computeTags
*/
private Map<String, String> tags;
private ByteMap<byte[]> tag_uids;
/**
* The names of the tags that aren't shared by every single data point.
* This is the symmetric difference between the tags of all the Spans
* in this group.
* @see #computeTags
*/
private List<String> aggregated_tags;
private Set<byte[]> aggregated_tag_uids;
/** Spans in this group. They must all be for the same metric. */
private final ArrayList<Span> spans = new ArrayList<Span>();
/** If true, use rate of change instead of actual values. */
private final boolean rate;
/** Specifies the various options for rate calculations */
private RateOptions rate_options;
/** Aggregator to use to aggregate data points from different Spans. */
private final Aggregator aggregator;
/** Downsampling specification to use, if any (can be {@code null}). */
private DownsamplingSpecification downsampler;
/** Start timestamp of the query for filtering */
private final long query_start;
/** End timestamp of the query for filtering */
private final long query_end;
/** Index of the query in the TSQuery class */
private final int query_index;
/** The TSDB to which we belong, used for resolution */
private final TSDB tsdb;
/**
* Ctor.
* @param tsdb The TSDB we belong to.
* @param start_time Any data point strictly before this timestamp will be
* ignored.
* @param end_time Any data point strictly after this timestamp will be
* ignored.
* @param spans A sequence of initial {@link Spans} to add to this group.
* Ignored if {@code null}. Additional spans can be added with {@link #add}.
* @param rate If {@code true}, the rate of the series will be used instead
* of the actual values.
* @param aggregator The aggregation function to use.
* @param interval Number of milliseconds wanted between each data point.
* @param downsampler Aggregation function to use to group data points
* within an interval.
*/
SpanGroup(final TSDB tsdb,
final long start_time, final long end_time,
final Iterable<Span> spans,
final boolean rate,
final Aggregator aggregator,
final long interval, final Aggregator downsampler) {
this(tsdb, start_time, end_time, spans, rate, new RateOptions(false,
Long.MAX_VALUE, RateOptions.DEFAULT_RESET_VALUE), aggregator, interval,
downsampler);
}
/**
* Ctor.
* @param tsdb The TSDB we belong to.
* @param start_time Any data point strictly before this timestamp will be
* ignored.
* @param end_time Any data point strictly after this timestamp will be
* ignored.
* @param spans A sequence of initial {@link Spans} to add to this group.
* Ignored if {@code null}. Additional spans can be added with {@link #add}.
* @param rate If {@code true}, the rate of the series will be used instead
* of the actual values.
* @param rate_options Specifies the optional additional rate calculation options.
* @param aggregator The aggregation function to use.
* @param interval Number of milliseconds wanted between each data point.
* @param downsampler Aggregation function to use to group data points
* within an interval.
* @since 2.0
*/
SpanGroup(final TSDB tsdb,
final long start_time, final long end_time,
final Iterable<Span> spans,
final boolean rate, final RateOptions rate_options,
final Aggregator aggregator,
final long interval, final Aggregator downsampler) {
this(tsdb, start_time, end_time, spans, rate, rate_options, aggregator,
interval, downsampler, -1, FillPolicy.NONE);
}
/**
* Ctor.
* @param tsdb The TSDB we belong to.
* @param start_time Any data point strictly before this timestamp will be
* ignored.
* @param end_time Any data point strictly after this timestamp will be
* ignored.
* @param spans A sequence of initial {@link Spans} to add to this group.
* Ignored if {@code null}. Additional spans can be added with {@link #add}.
* @param rate If {@code true}, the rate of the series will be used instead
* of the actual values.
* @param rate_options Specifies the optional additional rate calculation options.
* @param aggregator The aggregation function to use.
* @param interval Number of milliseconds wanted between each data point.
* @param downsampler Aggregation function to use to group data points
* within an interval.
* @param query_index index of the original query
* @param fill_policy Policy specifying whether to interpolate or to fill
* missing intervals with special values.
* @since 2.2
*/
SpanGroup(final TSDB tsdb,
final long start_time, final long end_time,
final Iterable<Span> spans,
final boolean rate, final RateOptions rate_options,
final Aggregator aggregator,
final long interval, final Aggregator downsampler, final int query_index,
final FillPolicy fill_policy) {
this(tsdb, start_time, end_time, spans, rate, rate_options, aggregator,
downsampler != null ?
new DownsamplingSpecification(interval, downsampler, fill_policy) :
null,
0, 0, query_index);
}
/**
* Ctor.
* @param tsdb The TSDB we belong to.
* @param start_time Any data point strictly before this timestamp will be
* ignored.
* @param end_time Any data point strictly after this timestamp will be
* ignored.
* @param spans A sequence of initial {@link Spans} to add to this group.
* Ignored if {@code null}. Additional spans can be added with {@link #add}.
* @param rate If {@code true}, the rate of the series will be used instead
* of the actual values.
* @param rate_options Specifies the optional additional rate calculation options.
* @param aggregator The aggregation function to use.
* @param downsampler The specification to use for downsampling, may be null.
* @param query_start Start of the actual query
* @param query_end End of the actual query
* @param query_index index of the original query
* @since 2.3
*/
SpanGroup(final TSDB tsdb,
final long start_time,
final long end_time,
final Iterable<Span> spans,
final boolean rate,
final RateOptions rate_options,
final Aggregator aggregator,
final DownsamplingSpecification downsampler,
final long query_start,
final long query_end,
final int query_index) {
annotations = new ArrayList<Annotation>();
this.start_time = (start_time & Const.SECOND_MASK) == 0 ?
start_time * 1000 : start_time;
this.end_time = (end_time & Const.SECOND_MASK) == 0 ?
end_time * 1000 : end_time;
if (spans != null) {
for (final Span span : spans) {
add(span);
}
}
this.rate = rate;
this.rate_options = rate_options;
this.aggregator = aggregator;
this.downsampler = downsampler;
this.query_start = query_start;
this.query_end = query_end;
this.query_index = query_index;
this.tsdb = tsdb;
}
/**
* Adds a span to this group, provided that it's in the right time range.
* <b>Must not</b> be called once {@link #getTags} or
* {@link #getAggregatedTags} has been called on this instance.
* @param span The span to add to this group. If none of the data points
* fall within our time range, this method will silently ignore that span.
*/
void add(final Span span) {
if (tags != null) {
throw new AssertionError("The set of tags has already been computed"
+ ", you can't add more Spans to " + this);
}
// normalize timestamps to milliseconds for proper comparison
final long start = (start_time & Const.SECOND_MASK) == 0 ?
start_time * 1000 : start_time;
final long end = (end_time & Const.SECOND_MASK) == 0 ?
end_time * 1000 : end_time;
if (span.size() == 0) {
// copy annotations that are in the time range
for (Annotation annot : span.getAnnotations()) {
long annot_start = annot.getStartTime();
if ((annot_start & Const.SECOND_MASK) == 0) {
annot_start *= 1000;
}
long annot_end = annot.getStartTime();
if ((annot_end & Const.SECOND_MASK) == 0) {
annot_end *= 1000;
}
if (annot_end >= start && annot_start <= end) {
annotations.add(annot);
}
}
} else {
long first_dp = span.timestamp(0);
if ((first_dp & Const.SECOND_MASK) == 0) {
first_dp *= 1000;
}
// The following call to timestamp() will throw an
// IndexOutOfBoundsException if size == 0, which is OK since it would
// be a programming error.
long last_dp = span.timestamp(span.size() - 1);
if ((last_dp & Const.SECOND_MASK) == 0) {
last_dp *= 1000;
}
if (first_dp <= end && last_dp >= start) {
this.spans.add(span);
annotations.addAll(span.getAnnotations());
}
}
}
/**
* Computes the intersection set + symmetric difference of tags in all spans.
* This method loads the UID aggregated list and tag pair maps with byte arrays
* but does not actually resolve the UIDs to strings.
* On the first run, it will initialize the UID collections (which may be empty)
* and subsequent calls will skip processing.
*/
private void computeTags() {
if (tag_uids != null && aggregated_tag_uids != null) {
return;
}
if (spans.isEmpty()) {
tag_uids = new ByteMap<byte[]>();
aggregated_tag_uids = new HashSet<byte[]>();
return;
}
// local tag uids
final ByteMap<byte[]> tag_set = new ByteMap<byte[]>();
// value is always null, we just want the set of unique keys
final ByteMap<byte[]> discards = new ByteMap<byte[]>();
final Iterator<Span> it = spans.iterator();
while (it.hasNext()) {
final Span span = it.next();
final ByteMap<byte[]> uids = span.getTagUids();
for (final Map.Entry<byte[], byte[]> tag_pair : uids.entrySet()) {
// we already know it's an aggregated tag
if (discards.containsKey(tag_pair.getKey())) {
continue;
}
final byte[] tag_value = tag_set.get(tag_pair.getKey());
if (tag_value == null) {
tag_set.put(tag_pair.getKey(), tag_pair.getValue());
} else if (Bytes.memcmp(tag_value, tag_pair.getValue()) != 0) {
// bump to aggregated tags
discards.put(tag_pair.getKey(), null);
tag_set.remove(tag_pair.getKey());
}
}
}
aggregated_tag_uids = discards.keySet();
tag_uids = tag_set;
}
public String metricName() {
try {
return metricNameAsync().joinUninterruptibly();
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException("Should never be here", e);
}
}
public Deferred<String> metricNameAsync() {
return spans.isEmpty() ? Deferred.fromResult("") :
spans.get(0).metricNameAsync();
}
@Override
public byte[] metricUID() {
return spans.isEmpty() ? new byte[] {} : spans.get(0).metricUID();
}
public Map<String, String> getTags() {
try {
return getTagsAsync().joinUninterruptibly();
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException("Should never be here", e);
}
}
public Deferred<Map<String, String>> getTagsAsync() {
if (tags != null) {
return Deferred.fromResult(tags);
}
if (spans.isEmpty()) {
tags = new HashMap<String, String>(0);
return Deferred.fromResult(tags);
}
if (tag_uids == null) {
computeTags();
}
return resolveTags(tag_uids);
}
@Override
public ByteMap<byte[]> getTagUids() {
if (tag_uids == null) {
computeTags();
}
return tag_uids;
}
public List<String> getAggregatedTags() {
try {
return getAggregatedTagsAsync().joinUninterruptibly();
} catch (RuntimeException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException("Should never be here", e);
}
}
public Deferred<List<String>> getAggregatedTagsAsync() {
if (aggregated_tags != null) {
return Deferred.fromResult(aggregated_tags);
}
if (spans.isEmpty()) {
aggregated_tags = new ArrayList<String>(0);
return Deferred.fromResult(aggregated_tags);
}
if (aggregated_tag_uids == null) {
computeTags();
}
return resolveAggTags(aggregated_tag_uids);
}
@Override
public List<byte[]> getAggregatedTagUids() {
if (aggregated_tag_uids != null) {
return new ArrayList<byte[]>(aggregated_tag_uids);
}
if (spans.isEmpty()) {
return Collections.emptyList();
}
if (aggregated_tag_uids == null) {
computeTags();
}
return new ArrayList<byte[]>(aggregated_tag_uids);
}
public List<String> getTSUIDs() {
List<String> tsuids = new ArrayList<String>(spans.size());
for (Span sp : spans) {
tsuids.addAll(sp.getTSUIDs());
}
return tsuids;
}
/**
* Compiles the annotations for each span into a new array list
* @return Null if none of the spans had any annotations, a list if one or
* more were found
*/
public List<Annotation> getAnnotations() {
return annotations.isEmpty() ? null : annotations;
}
public int size() {
// TODO(tsuna): There is a way of doing this way more efficiently by
// inspecting the Spans and counting only data points that fall in
// our time range.
final SeekableView it = iterator();
int size = 0;
while (it.hasNext()) {
it.next();
size++;
}
return size;
}
public int aggregatedSize() {
int size = 0;
for (final Span span : spans) {
size += span.size();
}
return size;
}
public SeekableView iterator() {
return AggregationIterator.create(spans, start_time, end_time, aggregator,
aggregator.interpolationMethod(),
downsampler, query_start, query_end,
rate, rate_options);
}
/**
* Finds the {@code i}th data point of this group in {@code O(n)}.
* Where {@code n} is the number of data points in this group.
*/
private DataPoint getDataPoint(int i) {
if (i < 0) {
throw new IndexOutOfBoundsException("negative index: " + i);
}
final int saved_i = i;
final SeekableView it = iterator();
DataPoint dp = null;
while (it.hasNext() && i >= 0) {
dp = it.next();
i--;
}
if (i != -1 || dp == null) {
throw new IndexOutOfBoundsException("index " + saved_i
+ " too large (it's >= " + size() + ") for " + this);
}
return dp;
}
public long timestamp(final int i) {
return getDataPoint(i).timestamp();
}
public boolean isInteger(final int i) {
return getDataPoint(i).isInteger();
}
public double doubleValue(final int i) {
return getDataPoint(i).doubleValue();
}
public long longValue(final int i) {
return getDataPoint(i).longValue();
}
@Override
public String toString() {
return "SpanGroup(" + toStringSharedAttributes()
+ ", spans=" + spans
+ ')';
}
private String toStringSharedAttributes() {
return "start_time=" + start_time
+ ", end_time=" + end_time
+ ", tags=" + tags
+ ", aggregated_tags=" + aggregated_tags
+ ", rate=" + rate
+ ", aggregator=" + aggregator
+ ", downsampler=" + downsampler
+ ", query_start=" + query_start
+ ", query_end" + query_end
+ ')';
}
public int getQueryIndex() {
return query_index;
}
/**
* Resolves the set of tag keys to their string names.
* @param tagks The set of unique tag names
* @return a deferred to wait on for all of the tag keys to be resolved. The
* result should be null.
*/
private Deferred<List<String>> resolveAggTags(final Set<byte[]> tagks) {
if (aggregated_tags != null) {
return Deferred.fromResult(null);
}
aggregated_tags = new ArrayList<String>(tagks.size());
final List<Deferred<String>> names =
new ArrayList<Deferred<String>>(tagks.size());
for (final byte[] tagk : tagks) {
names.add(tsdb.tag_names.getNameAsync(tagk));
}
/** Adds the names to the aggregated_tags list */
final class ResolveCB implements Callback<List<String>, ArrayList<String>> {
@Override
public List<String> call(final ArrayList<String> names) throws Exception {
for (final String name : names) {
aggregated_tags.add(name);
}
return aggregated_tags;
}
}
return Deferred.group(names).addCallback(new ResolveCB());
}
/**
* Resolves the tags to their names, loading them into {@link tags} after
* initializing that map.
* @param tag_uids The tag UIDs
* @return A defeferred to wait on for resolution to complete, the result
* should be null.
*/
private Deferred<Map<String, String>> resolveTags(final ByteMap<byte[]> tag_uids) {
if (tags != null) {
return Deferred.fromResult(null);
}
tags = new HashMap<String, String>(tag_uids.size());
final List<Deferred<Object>> deferreds =
new ArrayList<Deferred<Object>>(tag_uids.size());
/** Dumps the pairs into the map in the correct order */
final class PairCB implements Callback<Object, ArrayList<String>> {
@Override
public Object call(final ArrayList<String> pair) throws Exception {
tags.put(pair.get(0), pair.get(1));
return null;
}
}
/** Callback executed once all of the pairs are resolved and stored in the map */
final class GroupCB implements Callback<Map<String, String>, ArrayList<Object>> {
@Override
public Map<String, String> call(final ArrayList<Object> group)
throws Exception {
return tags;
}
}
for (Map.Entry<byte[], byte[]> tag_pair : tag_uids.entrySet()) {
final List<Deferred<String>> resolve_pair =
new ArrayList<Deferred<String>>(2);
resolve_pair.add(tsdb.tag_names.getNameAsync(tag_pair.getKey()));
resolve_pair.add(tsdb.tag_values.getNameAsync(tag_pair.getValue()));
deferreds.add(Deferred.groupInOrder(resolve_pair).addCallback(new PairCB()));
}
return Deferred.group(deferreds).addCallback(new GroupCB());
}
}