IntersectionIterator.java example

Explorer
opentsdb-master
- src
- test
// This file is part of OpenTSDB.
// Copyright (C) 2015  The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version.  This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser
// General Public License for more details.  You should have received a copy
// of the GNU Lesser General Public License along with this program.  If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.query.expression;

import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;

import net.opentsdb.core.IllegalDataException;
import net.opentsdb.core.TSDB;
import net.opentsdb.utils.ByteSet;

import org.hbase.async.Bytes;
import org.hbase.async.Bytes.ByteMap;
import org.hbase.async.HBaseClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import sun.reflect.generics.reflectiveObjects.NotImplementedException;

/**
 * This class handles taking a set of queries and their results and iterates 
 * over each series in each set with time alignment after computing the 
 * intersection of all sets.
 * <p>
 * The iterator performs the following:
 * - calculates the intersection of all queries based on the tags or query tags
 *   and optionally the aggregated tags.
 * - any series that are not members of ever set are kicked out (and logged).
 * - series are aligned across queries so that expressions can operate over them.
 * - series are also time aligned and maintain alignment during iteration.
 * <p>
 * The {@link #current_values} map will map the expression "variables" to the
 * proper iterator for each serie's array. E.g.
 *   <"A", [1, 2, 3, 4]>
 *   <"B", [1, 2, 3, 4]>
 * <p>
 * So to use it's you simply fetch the result map, call {@link #hasNext()} and
 * {@link #next()} to iterate and in a for loop, iterate {@link #getSeriesSize()}
 * times to get all of the current values.
 * For efficiency, call {@link #getResults()} once before iterating, then on 
 * each call to {@link #next()} you can just iterate over the same result map 
 * again as the values will be updated.
 * @since 2.3
 */
public class IntersectionIterator implements ITimeSyncedIterator, VariableIterator {
  private static final Logger LOG = LoggerFactory.getLogger(IntersectionIterator.class);
  
  /** The queries compiled and fetched from storage */
  private final Map<String, ITimeSyncedIterator> queries;
  
  /** A list of the current values for each series post intersection */
  private final Map<String, ExpressionDataPoint[]> current_values;

  /** A map of the sub query index to their names for intersection computation */
  private final String[] index_to_names;
  
  /** Whether or not to intersect on the query tagks instead of the result set
   * tagks */
  private final boolean intersect_on_query_tagks;
  
  /** Whether or not to include the aggregated tags in the result set */
  private final boolean include_agg_tags;
  
  /** The start/current timestamp for the iterator in ms */
  private long timestamp;
  
  /** Post intersection number of time series */
  private int series_size;
  
  /** The ID of this iterator */
  private final String id;
  
  /** The index of this iterator in a list of iterators */
  private int index;
  
  /**
   * Ctor to create the expression lock-step iterator from a set of query results.
   * If the results map is empty, then the ctor will complete but the results map
   * will be empty and calls to {@link #hasNext()} will always return false.
   * @param results The query results to store
   * @param intersect_on_query_tagks Whether or not to include only the query 
   * specified tags during intersection
   * @param include_agg_tags Whether or not to include aggregated tags during
   * intersection
   * @throws IllegalDataException if, after computing the intersection, no results
   * would be left.
   */
  public IntersectionIterator(final String id, final Map<String, ITimeSyncedIterator> results, 
      final boolean intersect_on_query_tagks, final boolean include_agg_tags) {
    this.id = id;
    this.intersect_on_query_tagks = intersect_on_query_tagks;
    this.include_agg_tags = include_agg_tags;
    timestamp = Long.MAX_VALUE;
    queries = new HashMap<String, ITimeSyncedIterator>(results.size());
    current_values = new HashMap<String, ExpressionDataPoint[]>(results.size());
    index_to_names = new String[results.size()];
    
    int max_series = 0;
    int i = 0;
    for (final Map.Entry<String, ITimeSyncedIterator> entry : results.entrySet()) {
      if (LOG.isDebugEnabled()) {
        LOG.debug("Adding iterator " + entry.getValue());
      }
      queries.put(entry.getKey(), entry.getValue());
      entry.getValue().setIndex(i);
      index_to_names[i] = entry.getKey();
      if (entry.getValue().values().length > max_series) {
        max_series = entry.getValue().values().length;
      }
      ++i;
    }
    
    if (max_series < 1) {
      // we don't want to throw an exception here, just set it up so that the
      // call to {@link #hasNext()} will be false.
      LOG.debug("No series in the result sets");
      return;
    }
    
    computeIntersection();
    
    // calculate the starting timestamp from the various iterators
    for (final ITimeSyncedIterator it : queries.values()) {
      final long ts = it.nextTimestamp();
      if (ts < timestamp) {
        timestamp = ts;
      }
    }
  }

  /**
   * A sort of copy constructor that populates the iterator from an existing 
   * iterator, copying all child iterators.
   * @param iterator The iterator to copy from.
   */
  private IntersectionIterator(final IntersectionIterator iterator) {
    id = iterator.id;
    intersect_on_query_tagks = iterator.intersect_on_query_tagks;
    include_agg_tags = iterator.include_agg_tags;
    timestamp = Long.MAX_VALUE;
    queries = new HashMap<String, ITimeSyncedIterator>(iterator.queries.size());
    current_values = new HashMap<String, ExpressionDataPoint[]>(queries.size());
    index_to_names = new String[queries.size()];
    
    int max_series = 0;
    int i = 0;
    for (final Entry<String, ITimeSyncedIterator> entry : iterator.queries.entrySet()) {
      queries.put(entry.getKey(), entry.getValue().getCopy());
      entry.getValue().setIndex(i);
      index_to_names[i] = entry.getKey();
      if (entry.getValue().values().length > max_series) {
        max_series = entry.getValue().values().length;
      }
      ++i;
    }
    
    if (max_series < 1) {
      // we don't want to throw an exception here, just set it up so that the
      // call to {@link #hasNext()} will be false.
      LOG.debug("No series in the result sets");
      return;
    }
    
    computeIntersection();
    
    // calculate the starting timestamp from the various iterators
    for (final ITimeSyncedIterator it : queries.values()) {
      final long ts = it.nextTimestamp();
      if (ts < timestamp) {
        timestamp = ts;
      }
    }
  }
  
  @Override
  public String toString() {
    final StringBuilder buf = new StringBuilder();
    buf.append("IntersectionIterator(id=")
       .append(id)
       .append(", useQueryTags=")
       .append(intersect_on_query_tagks)
       .append(", includeAggTags=")
       .append(include_agg_tags)
       .append(", index=")
       .append(index)
       .append(", queries=")
       .append(queries);
    return buf.toString();
  }
  
  @Override
  public boolean hasNext() {
    for (final ITimeSyncedIterator sub : queries.values()) {
      if (sub.hasNext()) {
        return true;
      }
    }
    return false;
  }
  
  /** fetch the next set of time aligned results for all series */
  @Override
  public void next() {
    if (!hasNext()) {
      throw new IllegalDataException("No more data");
    }
    for (final ITimeSyncedIterator sub : queries.values()) {
      sub.next(timestamp);
    }
    timestamp = nextTimestamp();
  }
  
  /** @return a map of values that will change on each iteration */
  @Override 
  public Map<String, ExpressionDataPoint[]> getResults() {
    return current_values;
  }

  /** @return the number of series in each map of the result set */
  @Override
  public int getSeriesSize() {
    return series_size;
  }
  
  /** @return the next timestamp calculated from all series in the set */
  public long nextTimestamp() {
    long ts = Long.MAX_VALUE;
    for (final ITimeSyncedIterator sub : queries.values()) {
      if (sub != null) {
        final long t = sub.nextTimestamp();
        if (t < ts) {
          ts = t;
        }
      }
    }
    return ts;
  }
  
  /**
   * A super ugly messy way to compute the intersection of the various sets of 
   * time series returned from the sub queries. 
   * <p>
   * The process is:
   * - Iterate over each query set
   * - For the first set, flatten each series' tag and (optionally) aggregated tag
   *   set into a single byte array for use as an ID.
   * - Populate a map with the IDs and references to the series iterator for the
   *   first query set.
   * - For each additional set, flatten the tags and if the tag set ID isn't in
   *   the intersection map, kick it out.
   * - For each key in the intersection map, if it doesn't appear in the current
   *   query set, kick it out.
   * - Once all sets are finished, align the resulting series iterators in the 
   *   {@link #current_values} map which is then prepped for expression processing.
   * @throws IllegalDataException if more than one series was supplied and 
   * the resulting intersection failed to produce any series
   */
  private void computeIntersection() {
    final ByteMap<ExpressionDataPoint[]> ordered_intersection = 
        new ByteMap<ExpressionDataPoint[]>(); 
    final Iterator<ITimeSyncedIterator> it = queries.values().iterator();
    
    // assume we have at least on query in our set
    ITimeSyncedIterator sub = it.next();
    Map<String, ByteMap<Integer>> flattened_tags = 
        new HashMap<String, ByteMap<Integer>>(queries.size()); 
    ByteMap<Integer> tags = new ByteMap<Integer>();
    flattened_tags.put(sub.getId(), tags);
    ExpressionDataPoint[] dps = sub.values();
    
    for (int i = 0; i < sub.size(); i++) {
      final byte[] tagks = flattenTags(intersect_on_query_tagks, include_agg_tags,
          dps[i].tags(), dps[i].aggregatedTags(), sub);
      tags.put(tagks, i);

      final ExpressionDataPoint[] idps = new ExpressionDataPoint[queries.size()];
      idps[sub.getIndex()] = dps[i];
      ordered_intersection.put(tagks, idps);
    }
    
    if (!it.hasNext()) {
      setCurrentAndMeta(ordered_intersection);
      return;
    }
    
    while (it.hasNext()) {
      sub = it.next();
      tags = new ByteMap<Integer>();
      flattened_tags.put(sub.getId(), tags);
      dps = sub.values();
      
      // loop through the series in the sub iterator, compute the flattened tag
      // ids, then kick out any that are NOT in the existing intersection map.
      for (int i = 0; i < sub.size(); i++) {
        final byte[] tagks = flattenTags(intersect_on_query_tagks, include_agg_tags, 
            dps[i].tags(), dps[i].aggregatedTags(), sub);
        tags.put(tagks, i);

        final ExpressionDataPoint[] idps = ordered_intersection.get(tagks);
        if (idps == null) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Kicking out " + Bytes.pretty(tagks) + " from " + sub.getId());
          }
          sub.nullIterator(i);
          continue;
        }
        idps[sub.getIndex()] = dps[i];
      }
      
      // gotta go backwards now to complete the intersection by kicking
      // any series that appear in other sets but not HERE
      final Iterator<Entry<byte[], ExpressionDataPoint[]>> reverse_it = 
          ordered_intersection.iterator();
      while (reverse_it.hasNext()) {
        Entry<byte[], ExpressionDataPoint[]> e = reverse_it.next();
        if (!tags.containsKey(e.getKey())) {
          if (LOG.isDebugEnabled()) {
            LOG.debug("Kicking out " + Bytes.pretty(e.getKey()) + 
                " from the main list since the query for " + sub.getId() + 
                " didn't have it");
          }
          
          // null the iterators for the other sets
          for (final Map.Entry<String, ByteMap<Integer>> entry : 
              flattened_tags.entrySet()) {
            if (entry.getKey().equals(sub.getId())) {
              continue;
            }
            final Integer index = entry.getValue().get(e.getKey());
            if (index != null) {
              queries.get(entry.getKey()).nullIterator(index);
            }
          }
          
          reverse_it.remove();
        }
      }
    }
    
    // now set our properly condensed and ordered values
    if (ordered_intersection.size() < 1) {
      // TODO - is it best to toss an exception here or return an empty result?
      throw new IllegalDataException("No intersections found: " + this);
    }
    
    setCurrentAndMeta(ordered_intersection);
  }
  
  /**
   * Takes the resulting intersection and builds the {@link #current_values}
   * and {@link #meta} maps.
   * @param ordered_intersection The intersection to build from.
   */
  private void setCurrentAndMeta(final ByteMap<ExpressionDataPoint[]> 
      ordered_intersection) {
    for (final String id : queries.keySet()) {
      current_values.put(id, new ExpressionDataPoint[ordered_intersection.size()]);
    }
    
    int i = 0;
    for (final ExpressionDataPoint[] idps : ordered_intersection.values()) {
      for (int x = 0; x < idps.length; x++) {
        final ExpressionDataPoint[] current_dps = 
            current_values.get(index_to_names[x]);
        current_dps[i] = idps[x];
      }
      ++i;
    }
    series_size = ordered_intersection.size();
  }
  
  /**
   * Flattens the appropriate tags into a single byte array
   * @param use_query_tags Whether or not to include tags returned with the
   * results or just use those group by'd in the query
   * @param include_agg_tags Whether or not to include the aggregated tags in
   * the identifier
   * @param tags The map of tags from the result set
   * @param agg_tags The list of aggregated tags
   * @param sub The sub query iterator
   * @return A byte array with the flattened tag keys and values. Note that
   * if the tags set is empty, this may return an empty array (but not a null
   * array)
   */
  static byte[] flattenTags(final boolean use_query_tags, 
      final boolean include_agg_tags, final ByteMap<byte[]> tags, 
      final ByteSet agg_tags, final ITimeSyncedIterator sub) {
    if (tags.isEmpty()) {
      return HBaseClient.EMPTY_ARRAY;
    }
    final ByteSet query_tagks;
    // NOTE: We MAY need the agg tags but I'm not sure yet
    final int tag_size;
    if (use_query_tags) {
      int i = 0;
      if (sub.getQueryTagKs() != null && !sub.getQueryTagKs().isEmpty()) {
        query_tagks = sub.getQueryTagKs();
        for (final Map.Entry<byte[], byte[]> pair : tags.entrySet()) {
          if (query_tagks.contains(pair.getKey())) {
            i++;
          }
        }
      } else {
        query_tagks = new ByteSet();
      }
      tag_size = i;
    } else {
      query_tagks = new ByteSet();
      tag_size = tags.size();
    }
    
    int len = (tag_size * (TSDB.tagk_width() + TSDB.tagv_width())) +
      (include_agg_tags ? (agg_tags.size() * TSDB.tagk_width()) : 0);
    final byte[] tagks = new byte[len];
    int i = 0;
    for (final Map.Entry<byte[], byte[]> pair : tags.entrySet()) {
      if (use_query_tags && !query_tagks.contains(pair.getKey())) {
        continue;
      }
      System.arraycopy(pair.getKey(), 0, tagks, i, TSDB.tagk_width());
      i += TSDB.tagk_width();
      System.arraycopy(pair.getValue(), 0, tagks, i, TSDB.tagv_width());
      i += TSDB.tagv_width();
    }
    if (include_agg_tags) {
      for (final byte[] tagk : agg_tags) {
        System.arraycopy(tagk, 0, tagks, i, TSDB.tagk_width());
        i += TSDB.tagk_width();
      }
    }
    return tagks;
  }

  @Override
  public ExpressionDataPoint[] next(long timestamp) {
    throw new NotImplementedException();
  }

  @Override
  public int size() {
    throw new NotImplementedException();
  }

  @Override
  public ExpressionDataPoint[] values() {
    throw new NotImplementedException();
  }

  @Override
  public void nullIterator(int index) {
    throw new NotImplementedException();
  }

  @Override
  public int getIndex() {
    return index;
  }

  @Override
  public void setIndex(int index) {
    this.index = index;
  }

  @Override
  public String getId() {
    return id;
  }

  @Override
  public ByteSet getQueryTagKs() {
    throw new NotImplementedException();
  }

  @Override
  public void setFillPolicy(NumericFillPolicy policy) {
    throw new NotImplementedException();
  }

  @Override
  public NumericFillPolicy getFillPolicy() {
    throw new NotImplementedException();
  }

  @Override
  public ITimeSyncedIterator getCopy() {
    return new IntersectionIterator(this);
  }

  @Override
  public boolean hasNext(int index) {
    for (final ITimeSyncedIterator sub : queries.values()) {
      if (sub.hasNext(index)) {
        return true;
      }
    }
    return false;
  }

  @Override
  public void next(int index) {
    if (!hasNext()) {
      throw new IllegalDataException("No more data");
    }
    for (final ITimeSyncedIterator sub : queries.values()) {
      sub.next(index);
    }
  }

}