SpanGroup.java example

Explorer
opentsdb-master
- src
- test
// This file is part of OpenTSDB.
// Copyright (C) 2010-2012  The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.  You should have received a copy
// of the GNU Lesser General Public License along with this program.  If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.core;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.hbase.async.Bytes;
import org.hbase.async.Bytes.ByteMap;

import com.stumbleupon.async.Callback;
import com.stumbleupon.async.Deferred;

import net.opentsdb.meta.Annotation;

/**
 * Groups multiple spans together and offers a dynamic "view" on them.
 * <p>
 * This is used for queries to the TSDB, where we might group multiple
 * {@link Span}s that are for the same time series but different tags
 * together.  We need to "hide" data points that are outside of the
 * time period of the query and do on-the-fly aggregation of the data
 * points coming from the different Spans, using an {@link Aggregator}.
 * Since not all the Spans will have their data points at exactly the
 * same time, we also do on-the-fly linear interpolation.  If needed,
 * this view can also return the rate of change instead of the actual
 * data points.
 * <p>
 * This is one of the rare (if not the only) implementations of
 * {@link DataPoints} for which {@link #getTags} can potentially return
 * an empty map.
 * <p>
 * The implementation can also dynamically downsample the data when a
 * sampling interval a downsampling function (in the form of an
 * {@link Aggregator}) are given.  This is done by using a special
 * iterator when using the {@link Span.DownsamplingIterator}.
 */
final class SpanGroup implements DataPoints {
  /** Annotations */
  private final ArrayList<Annotation> annotations;

  /** Start time (UNIX timestamp in seconds or ms) on 32 bits ("unsigned" int). */
  private final long start_time;

  /** End time (UNIX timestamp in seconds or ms) on 32 bits ("unsigned" int). */
  private final long end_time;

  /**
   * The tags of this group.
   * This is the intersection set between the tags of all the Spans
   * in this group.
   * @see #computeTags
   */
  private Map<String, String> tags;
  private ByteMap<byte[]> tag_uids;

  /**
   * The names of the tags that aren't shared by every single data point.
   * This is the symmetric difference between the tags of all the Spans
   * in this group.
   * @see #computeTags
   */
  private List<String> aggregated_tags;
  private Set<byte[]> aggregated_tag_uids;

  /** Spans in this group.  They must all be for the same metric. */
  private final ArrayList<Span> spans = new ArrayList<Span>();

  /** If true, use rate of change instead of actual values. */
  private final boolean rate;
  
  /** Specifies the various options for rate calculations */
  private RateOptions rate_options; 

  /** Aggregator to use to aggregate data points from different Spans. */
  private final Aggregator aggregator;

  /** Downsampling specification to use, if any (can be {@code null}). */
  private DownsamplingSpecification downsampler;
  
  /** Start timestamp of the query for filtering */  
  private final long query_start;
  
  /** End timestamp of the query for filtering */
  private final long query_end;  

  /** Index of the query in the TSQuery class */
  private final int query_index;
  
  /** The TSDB to which we belong, used for resolution */
  private final TSDB tsdb;
  
  /**
   * Ctor.
   * @param tsdb The TSDB we belong to.
   * @param start_time Any data point strictly before this timestamp will be
   * ignored.
   * @param end_time Any data point strictly after this timestamp will be
   * ignored.
   * @param spans A sequence of initial {@link Spans} to add to this group.
   * Ignored if {@code null}.  Additional spans can be added with {@link #add}.
   * @param rate If {@code true}, the rate of the series will be used instead
   * of the actual values.
   * @param aggregator The aggregation function to use.
   * @param interval Number of milliseconds wanted between each data point.
   * @param downsampler Aggregation function to use to group data points
   * within an interval.
   */
  SpanGroup(final TSDB tsdb,
            final long start_time, final long end_time,
            final Iterable<Span> spans,
            final boolean rate,
            final Aggregator aggregator,
            final long interval, final Aggregator downsampler) {
    this(tsdb, start_time, end_time, spans, rate, new RateOptions(false,
        Long.MAX_VALUE, RateOptions.DEFAULT_RESET_VALUE), aggregator, interval,
        downsampler);
  }

  /**
   * Ctor.
   * @param tsdb The TSDB we belong to.
   * @param start_time Any data point strictly before this timestamp will be
   * ignored.
   * @param end_time Any data point strictly after this timestamp will be
   * ignored.
   * @param spans A sequence of initial {@link Spans} to add to this group.
   * Ignored if {@code null}. Additional spans can be added with {@link #add}.
   * @param rate If {@code true}, the rate of the series will be used instead
   * of the actual values.
   * @param rate_options Specifies the optional additional rate calculation options.
   * @param aggregator The aggregation function to use.
   * @param interval Number of milliseconds wanted between each data point.
   * @param downsampler Aggregation function to use to group data points
   * within an interval.
   * @since 2.0
   */
  SpanGroup(final TSDB tsdb,
            final long start_time, final long end_time,
            final Iterable<Span> spans,
            final boolean rate, final RateOptions rate_options,
            final Aggregator aggregator,
            final long interval, final Aggregator downsampler) {
    this(tsdb, start_time, end_time, spans, rate, rate_options, aggregator, 
        interval, downsampler, -1, FillPolicy.NONE);
  }

  /**
   * Ctor.
   * @param tsdb The TSDB we belong to.
   * @param start_time Any data point strictly before this timestamp will be
   * ignored.
   * @param end_time Any data point strictly after this timestamp will be
   * ignored.
   * @param spans A sequence of initial {@link Spans} to add to this group.
   * Ignored if {@code null}. Additional spans can be added with {@link #add}.
   * @param rate If {@code true}, the rate of the series will be used instead
   * of the actual values.
   * @param rate_options Specifies the optional additional rate calculation options.
   * @param aggregator The aggregation function to use.
   * @param interval Number of milliseconds wanted between each data point.
   * @param downsampler Aggregation function to use to group data points
   * within an interval.
   * @param query_index index of the original query
   * @param fill_policy Policy specifying whether to interpolate or to fill
   * missing intervals with special values.
   * @since 2.2
   */
  SpanGroup(final TSDB tsdb,
            final long start_time, final long end_time,
            final Iterable<Span> spans,
            final boolean rate, final RateOptions rate_options,
            final Aggregator aggregator,
            final long interval, final Aggregator downsampler, final int query_index,
            final FillPolicy fill_policy) {
     this(tsdb, start_time, end_time, spans, rate, rate_options, aggregator,
         downsampler != null ? 
             new DownsamplingSpecification(interval, downsampler, fill_policy) : 
           null,
         0, 0, query_index);
  }
  
  /**
   * Ctor.
   * @param tsdb The TSDB we belong to.
   * @param start_time Any data point strictly before this timestamp will be
   * ignored.
   * @param end_time Any data point strictly after this timestamp will be
   * ignored.
   * @param spans A sequence of initial {@link Spans} to add to this group.
   * Ignored if {@code null}. Additional spans can be added with {@link #add}.
   * @param rate If {@code true}, the rate of the series will be used instead
   * of the actual values.
   * @param rate_options Specifies the optional additional rate calculation options.
   * @param aggregator The aggregation function to use.
   * @param downsampler The specification to use for downsampling, may be null.
   * @param query_start Start of the actual query
   * @param query_end End of the actual query
   * @param query_index index of the original query
   * @since 2.3
   */
  SpanGroup(final TSDB tsdb,
            final long start_time, 
            final long end_time,
            final Iterable<Span> spans,
            final boolean rate, 
            final RateOptions rate_options,
            final Aggregator aggregator,
            final DownsamplingSpecification downsampler, 
            final long query_start,
            final long query_end,
            final int query_index) {
     annotations = new ArrayList<Annotation>();
     this.start_time = (start_time & Const.SECOND_MASK) == 0 ? 
         start_time * 1000 : start_time;
     this.end_time = (end_time & Const.SECOND_MASK) == 0 ? 
         end_time * 1000 : end_time;
     if (spans != null) {
       for (final Span span : spans) {
         add(span);
       }
     }
     this.rate = rate;
     this.rate_options = rate_options;
     this.aggregator = aggregator;
     this.downsampler = downsampler;
     this.query_start = query_start;
     this.query_end = query_end;
     this.query_index = query_index;
     this.tsdb = tsdb;
  }
  
  /**
   * Adds a span to this group, provided that it's in the right time range.
   * <b>Must not</b> be called once {@link #getTags} or
   * {@link #getAggregatedTags} has been called on this instance.
   * @param span The span to add to this group.  If none of the data points
   * fall within our time range, this method will silently ignore that span.
   */
  void add(final Span span) {
    if (tags != null) {
      throw new AssertionError("The set of tags has already been computed"
                               + ", you can't add more Spans to " + this);
    }

    // normalize timestamps to milliseconds for proper comparison
    final long start = (start_time & Const.SECOND_MASK) == 0 ? 
        start_time * 1000 : start_time;
    final long end = (end_time & Const.SECOND_MASK) == 0 ? 
        end_time * 1000 : end_time;

    if (span.size() == 0) {
      // copy annotations that are in the time range
      for (Annotation annot : span.getAnnotations()) {
        long annot_start = annot.getStartTime();
        if ((annot_start & Const.SECOND_MASK) == 0) {
          annot_start *= 1000;
        }
        long annot_end = annot.getStartTime();
        if ((annot_end & Const.SECOND_MASK) == 0) {
          annot_end *= 1000;
        }
        if (annot_end >= start && annot_start <= end) {
          annotations.add(annot);
        }
      }
    } else {
      long first_dp = span.timestamp(0);
      if ((first_dp & Const.SECOND_MASK) == 0) {
        first_dp *= 1000;
      }
      // The following call to timestamp() will throw an
      // IndexOutOfBoundsException if size == 0, which is OK since it would
      // be a programming error.
      long last_dp = span.timestamp(span.size() - 1);
      if ((last_dp & Const.SECOND_MASK) == 0) {
        last_dp *= 1000;
      }
      if (first_dp <= end && last_dp >= start) {
        this.spans.add(span);
        annotations.addAll(span.getAnnotations());
      }
    }
  }

  /**
   * Computes the intersection set + symmetric difference of tags in all spans.
   * This method loads the UID aggregated list and tag pair maps with byte arrays
   * but does not actually resolve the UIDs to strings. 
   * On the first run, it will initialize the UID collections (which may be empty)
   * and subsequent calls will skip processing.
   */
  private void computeTags() {
    if (tag_uids != null && aggregated_tag_uids != null) {
      return;
    }
    if (spans.isEmpty()) {
      tag_uids = new ByteMap<byte[]>();
      aggregated_tag_uids = new HashSet<byte[]>();
      return;
    }
    
    // local tag uids
    final ByteMap<byte[]> tag_set = new ByteMap<byte[]>();
    
    // value is always null, we just want the set of unique keys
    final ByteMap<byte[]> discards = new ByteMap<byte[]>();
    final Iterator<Span> it = spans.iterator();
    while (it.hasNext()) {
      final Span span = it.next();
      final ByteMap<byte[]> uids = span.getTagUids();
      
      for (final Map.Entry<byte[], byte[]> tag_pair : uids.entrySet()) {
        // we already know it's an aggregated tag
        if (discards.containsKey(tag_pair.getKey())) {
          continue;
        }
        
        final byte[] tag_value = tag_set.get(tag_pair.getKey());
        if (tag_value == null) {
          tag_set.put(tag_pair.getKey(), tag_pair.getValue());
        } else if (Bytes.memcmp(tag_value, tag_pair.getValue()) != 0) {
          // bump to aggregated tags
          discards.put(tag_pair.getKey(), null);
          tag_set.remove(tag_pair.getKey());
        }
      }
    }
    
    aggregated_tag_uids = discards.keySet();
    tag_uids = tag_set;
  }

  public String metricName() {
    try {
      return metricNameAsync().joinUninterruptibly();
    } catch (RuntimeException e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("Should never be here", e);
    }
  }
  
  public Deferred<String> metricNameAsync() {
    return spans.isEmpty() ? Deferred.fromResult("") : 
      spans.get(0).metricNameAsync();
  }

  @Override
  public byte[] metricUID() {
    return spans.isEmpty() ? new byte[] {} : spans.get(0).metricUID();
  }
  
  public Map<String, String> getTags() {
    try {
      return getTagsAsync().joinUninterruptibly();
    } catch (RuntimeException e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("Should never be here", e);
    }
  }
  
  public Deferred<Map<String, String>> getTagsAsync() {
    if (tags != null) {
      return Deferred.fromResult(tags);
    }
    
    if (spans.isEmpty()) {
      tags = new HashMap<String, String>(0);
      return Deferred.fromResult(tags);
    }
    
    if (tag_uids == null) {
      computeTags();
    }
    
    return resolveTags(tag_uids);
  }

  @Override
  public ByteMap<byte[]> getTagUids() {
    if (tag_uids == null) {
      computeTags();
    }
    return tag_uids;
  }
  
  public List<String> getAggregatedTags() {
    try {
      return getAggregatedTagsAsync().joinUninterruptibly();
    } catch (RuntimeException e) {
      throw e;
    } catch (Exception e) {
      throw new RuntimeException("Should never be here", e);
    }
  }
  
  public Deferred<List<String>> getAggregatedTagsAsync() {
    if (aggregated_tags != null) {
      return Deferred.fromResult(aggregated_tags);
    }
    
    if (spans.isEmpty()) {
      aggregated_tags = new ArrayList<String>(0);
      return Deferred.fromResult(aggregated_tags);
    }
    
    if (aggregated_tag_uids == null) {
      computeTags();
    }
    
    return resolveAggTags(aggregated_tag_uids);
  }
  
  @Override
  public List<byte[]> getAggregatedTagUids() {
    if (aggregated_tag_uids != null) {
      return new ArrayList<byte[]>(aggregated_tag_uids);
    }
    
    if (spans.isEmpty()) {
      return Collections.emptyList();
    }
    
    if (aggregated_tag_uids == null) {
      computeTags();
    }
    return new ArrayList<byte[]>(aggregated_tag_uids);
  }

  public List<String> getTSUIDs() {
    List<String> tsuids = new ArrayList<String>(spans.size());
    for (Span sp : spans) {
      tsuids.addAll(sp.getTSUIDs());
    }
    return tsuids;
  }
  
  /**
   * Compiles the annotations for each span into a new array list
   * @return Null if none of the spans had any annotations, a list if one or
   * more were found
   */
  public List<Annotation> getAnnotations() {
    return annotations.isEmpty() ? null : annotations;
  }

  public int size() {
    // TODO(tsuna): There is a way of doing this way more efficiently by
    // inspecting the Spans and counting only data points that fall in
    // our time range.
    final SeekableView it = iterator();
    int size = 0;
    while (it.hasNext()) {
      it.next();
      size++;
    }
    return size;
  }

  public int aggregatedSize() {
    int size = 0;
    for (final Span span : spans) {
      size += span.size();
    }
    return size;
  }

  public SeekableView iterator() {
    return AggregationIterator.create(spans, start_time, end_time, aggregator,
                                  aggregator.interpolationMethod(),
                                  downsampler, query_start, query_end,
                                  rate, rate_options);
  }

  /**
   * Finds the {@code i}th data point of this group in {@code O(n)}.
   * Where {@code n} is the number of data points in this group.
   */
  private DataPoint getDataPoint(int i) {
    if (i < 0) {
      throw new IndexOutOfBoundsException("negative index: " + i);
    }
    final int saved_i = i;
    final SeekableView it = iterator();
    DataPoint dp = null;
    while (it.hasNext() && i >= 0) {
      dp = it.next();
      i--;
    }
    if (i != -1 || dp == null) {
      throw new IndexOutOfBoundsException("index " + saved_i
          + " too large (it's >= " + size() + ") for " + this);
    }
    return dp;
  }

  public long timestamp(final int i) {
    return getDataPoint(i).timestamp();
  }

  public boolean isInteger(final int i) {
    return getDataPoint(i).isInteger();
  }

  public double doubleValue(final int i) {
    return getDataPoint(i).doubleValue();
  }

  public long longValue(final int i) {
    return getDataPoint(i).longValue();
  }

  @Override
  public String toString() {
    return "SpanGroup(" + toStringSharedAttributes()
      + ", spans=" + spans
      + ')';
  }

  private String toStringSharedAttributes() {
    return "start_time=" + start_time
      + ", end_time=" + end_time
      + ", tags=" + tags
      + ", aggregated_tags=" + aggregated_tags
      + ", rate=" + rate
      + ", aggregator=" + aggregator
      + ", downsampler=" + downsampler
      + ", query_start=" + query_start
      + ", query_end" + query_end
      + ')';
  }

  public int getQueryIndex() {
    return query_index;
  }

  /**
   * Resolves the set of tag keys to their string names.
   * @param tagks The set of unique tag names
   * @return a deferred to wait on for all of the tag keys to be resolved. The
   * result should be null.
   */
  private Deferred<List<String>> resolveAggTags(final Set<byte[]> tagks) {
    if (aggregated_tags != null) {
      return Deferred.fromResult(null);
    }
    aggregated_tags = new ArrayList<String>(tagks.size());
    
    final List<Deferred<String>> names = 
        new ArrayList<Deferred<String>>(tagks.size());
    for (final byte[] tagk : tagks) {
      names.add(tsdb.tag_names.getNameAsync(tagk));
    }
    
    /** Adds the names to the aggregated_tags list */
    final class ResolveCB implements Callback<List<String>, ArrayList<String>> {
      @Override
      public List<String> call(final ArrayList<String> names) throws Exception {
        for (final String name : names) {
          aggregated_tags.add(name);
        }
        return aggregated_tags;
      }
    }
    
    return Deferred.group(names).addCallback(new ResolveCB());
  }
  
  /**
   * Resolves the tags to their names, loading them into {@link tags} after
   * initializing that map.
   * @param tag_uids The tag UIDs
   * @return A defeferred to wait on for resolution to complete, the result
   * should be null.
   */
  private Deferred<Map<String, String>> resolveTags(final ByteMap<byte[]> tag_uids) {
    if (tags != null) {
      return Deferred.fromResult(null);
    }
    tags = new HashMap<String, String>(tag_uids.size());
    
    final List<Deferred<Object>> deferreds = 
        new ArrayList<Deferred<Object>>(tag_uids.size());
    
    /** Dumps the pairs into the map in the correct order */
    final class PairCB implements Callback<Object, ArrayList<String>> {
      @Override
      public Object call(final ArrayList<String> pair) throws Exception {
        tags.put(pair.get(0), pair.get(1));
        return null;
      }
    }
    
    /** Callback executed once all of the pairs are resolved and stored in the map */
    final class GroupCB implements Callback<Map<String, String>, ArrayList<Object>> {
      @Override
      public Map<String, String> call(final ArrayList<Object> group) 
          throws Exception {
        return tags;
      }
    }
    
    for (Map.Entry<byte[], byte[]> tag_pair : tag_uids.entrySet()) {
      final List<Deferred<String>> resolve_pair = 
          new ArrayList<Deferred<String>>(2);
      resolve_pair.add(tsdb.tag_names.getNameAsync(tag_pair.getKey()));
      resolve_pair.add(tsdb.tag_values.getNameAsync(tag_pair.getValue()));
      deferreds.add(Deferred.groupInOrder(resolve_pair).addCallback(new PairCB()));
    }
    
    return Deferred.group(deferreds).addCallback(new GroupCB());
  }
}