// This file is part of OpenTSDB.
// Copyright (C) 2015 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.query.expression;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import net.opentsdb.core.IllegalDataException;
import net.opentsdb.core.TSDB;
import net.opentsdb.utils.ByteSet;
import org.hbase.async.Bytes;
import org.hbase.async.Bytes.ByteMap;
import org.hbase.async.HBaseClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
/**
* This class handles taking a set of queries and their results and iterates
* over each series in each set with time alignment after computing the
* intersection of all sets.
* <p>
* The iterator performs the following:
* - calculates the intersection of all queries based on the tags or query tags
* and optionally the aggregated tags.
* - any series that are not members of ever set are kicked out (and logged).
* - series are aligned across queries so that expressions can operate over them.
* - series are also time aligned and maintain alignment during iteration.
* <p>
* The {@link #current_values} map will map the expression "variables" to the
* proper iterator for each serie's array. E.g.
* <"A", [1, 2, 3, 4]>
* <"B", [1, 2, 3, 4]>
* <p>
* So to use it's you simply fetch the result map, call {@link #hasNext()} and
* {@link #next()} to iterate and in a for loop, iterate {@link #getSeriesSize()}
* times to get all of the current values.
* For efficiency, call {@link #getResults()} once before iterating, then on
* each call to {@link #next()} you can just iterate over the same result map
* again as the values will be updated.
* @since 2.3
*/
public class IntersectionIterator implements ITimeSyncedIterator, VariableIterator {
private static final Logger LOG = LoggerFactory.getLogger(IntersectionIterator.class);
/** The queries compiled and fetched from storage */
private final Map<String, ITimeSyncedIterator> queries;
/** A list of the current values for each series post intersection */
private final Map<String, ExpressionDataPoint[]> current_values;
/** A map of the sub query index to their names for intersection computation */
private final String[] index_to_names;
/** Whether or not to intersect on the query tagks instead of the result set
* tagks */
private final boolean intersect_on_query_tagks;
/** Whether or not to include the aggregated tags in the result set */
private final boolean include_agg_tags;
/** The start/current timestamp for the iterator in ms */
private long timestamp;
/** Post intersection number of time series */
private int series_size;
/** The ID of this iterator */
private final String id;
/** The index of this iterator in a list of iterators */
private int index;
/**
* Ctor to create the expression lock-step iterator from a set of query results.
* If the results map is empty, then the ctor will complete but the results map
* will be empty and calls to {@link #hasNext()} will always return false.
* @param results The query results to store
* @param intersect_on_query_tagks Whether or not to include only the query
* specified tags during intersection
* @param include_agg_tags Whether or not to include aggregated tags during
* intersection
* @throws IllegalDataException if, after computing the intersection, no results
* would be left.
*/
public IntersectionIterator(final String id, final Map<String, ITimeSyncedIterator> results,
final boolean intersect_on_query_tagks, final boolean include_agg_tags) {
this.id = id;
this.intersect_on_query_tagks = intersect_on_query_tagks;
this.include_agg_tags = include_agg_tags;
timestamp = Long.MAX_VALUE;
queries = new HashMap<String, ITimeSyncedIterator>(results.size());
current_values = new HashMap<String, ExpressionDataPoint[]>(results.size());
index_to_names = new String[results.size()];
int max_series = 0;
int i = 0;
for (final Map.Entry<String, ITimeSyncedIterator> entry : results.entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding iterator " + entry.getValue());
}
queries.put(entry.getKey(), entry.getValue());
entry.getValue().setIndex(i);
index_to_names[i] = entry.getKey();
if (entry.getValue().values().length > max_series) {
max_series = entry.getValue().values().length;
}
++i;
}
if (max_series < 1) {
// we don't want to throw an exception here, just set it up so that the
// call to {@link #hasNext()} will be false.
LOG.debug("No series in the result sets");
return;
}
computeIntersection();
// calculate the starting timestamp from the various iterators
for (final ITimeSyncedIterator it : queries.values()) {
final long ts = it.nextTimestamp();
if (ts < timestamp) {
timestamp = ts;
}
}
}
/**
* A sort of copy constructor that populates the iterator from an existing
* iterator, copying all child iterators.
* @param iterator The iterator to copy from.
*/
private IntersectionIterator(final IntersectionIterator iterator) {
id = iterator.id;
intersect_on_query_tagks = iterator.intersect_on_query_tagks;
include_agg_tags = iterator.include_agg_tags;
timestamp = Long.MAX_VALUE;
queries = new HashMap<String, ITimeSyncedIterator>(iterator.queries.size());
current_values = new HashMap<String, ExpressionDataPoint[]>(queries.size());
index_to_names = new String[queries.size()];
int max_series = 0;
int i = 0;
for (final Entry<String, ITimeSyncedIterator> entry : iterator.queries.entrySet()) {
queries.put(entry.getKey(), entry.getValue().getCopy());
entry.getValue().setIndex(i);
index_to_names[i] = entry.getKey();
if (entry.getValue().values().length > max_series) {
max_series = entry.getValue().values().length;
}
++i;
}
if (max_series < 1) {
// we don't want to throw an exception here, just set it up so that the
// call to {@link #hasNext()} will be false.
LOG.debug("No series in the result sets");
return;
}
computeIntersection();
// calculate the starting timestamp from the various iterators
for (final ITimeSyncedIterator it : queries.values()) {
final long ts = it.nextTimestamp();
if (ts < timestamp) {
timestamp = ts;
}
}
}
@Override
public String toString() {
final StringBuilder buf = new StringBuilder();
buf.append("IntersectionIterator(id=")
.append(id)
.append(", useQueryTags=")
.append(intersect_on_query_tagks)
.append(", includeAggTags=")
.append(include_agg_tags)
.append(", index=")
.append(index)
.append(", queries=")
.append(queries);
return buf.toString();
}
@Override
public boolean hasNext() {
for (final ITimeSyncedIterator sub : queries.values()) {
if (sub.hasNext()) {
return true;
}
}
return false;
}
/** fetch the next set of time aligned results for all series */
@Override
public void next() {
if (!hasNext()) {
throw new IllegalDataException("No more data");
}
for (final ITimeSyncedIterator sub : queries.values()) {
sub.next(timestamp);
}
timestamp = nextTimestamp();
}
/** @return a map of values that will change on each iteration */
@Override
public Map<String, ExpressionDataPoint[]> getResults() {
return current_values;
}
/** @return the number of series in each map of the result set */
@Override
public int getSeriesSize() {
return series_size;
}
/** @return the next timestamp calculated from all series in the set */
public long nextTimestamp() {
long ts = Long.MAX_VALUE;
for (final ITimeSyncedIterator sub : queries.values()) {
if (sub != null) {
final long t = sub.nextTimestamp();
if (t < ts) {
ts = t;
}
}
}
return ts;
}
/**
* A super ugly messy way to compute the intersection of the various sets of
* time series returned from the sub queries.
* <p>
* The process is:
* - Iterate over each query set
* - For the first set, flatten each series' tag and (optionally) aggregated tag
* set into a single byte array for use as an ID.
* - Populate a map with the IDs and references to the series iterator for the
* first query set.
* - For each additional set, flatten the tags and if the tag set ID isn't in
* the intersection map, kick it out.
* - For each key in the intersection map, if it doesn't appear in the current
* query set, kick it out.
* - Once all sets are finished, align the resulting series iterators in the
* {@link #current_values} map which is then prepped for expression processing.
* @throws IllegalDataException if more than one series was supplied and
* the resulting intersection failed to produce any series
*/
private void computeIntersection() {
final ByteMap<ExpressionDataPoint[]> ordered_intersection =
new ByteMap<ExpressionDataPoint[]>();
final Iterator<ITimeSyncedIterator> it = queries.values().iterator();
// assume we have at least on query in our set
ITimeSyncedIterator sub = it.next();
Map<String, ByteMap<Integer>> flattened_tags =
new HashMap<String, ByteMap<Integer>>(queries.size());
ByteMap<Integer> tags = new ByteMap<Integer>();
flattened_tags.put(sub.getId(), tags);
ExpressionDataPoint[] dps = sub.values();
for (int i = 0; i < sub.size(); i++) {
final byte[] tagks = flattenTags(intersect_on_query_tagks, include_agg_tags,
dps[i].tags(), dps[i].aggregatedTags(), sub);
tags.put(tagks, i);
final ExpressionDataPoint[] idps = new ExpressionDataPoint[queries.size()];
idps[sub.getIndex()] = dps[i];
ordered_intersection.put(tagks, idps);
}
if (!it.hasNext()) {
setCurrentAndMeta(ordered_intersection);
return;
}
while (it.hasNext()) {
sub = it.next();
tags = new ByteMap<Integer>();
flattened_tags.put(sub.getId(), tags);
dps = sub.values();
// loop through the series in the sub iterator, compute the flattened tag
// ids, then kick out any that are NOT in the existing intersection map.
for (int i = 0; i < sub.size(); i++) {
final byte[] tagks = flattenTags(intersect_on_query_tagks, include_agg_tags,
dps[i].tags(), dps[i].aggregatedTags(), sub);
tags.put(tagks, i);
final ExpressionDataPoint[] idps = ordered_intersection.get(tagks);
if (idps == null) {
if (LOG.isDebugEnabled()) {
LOG.debug("Kicking out " + Bytes.pretty(tagks) + " from " + sub.getId());
}
sub.nullIterator(i);
continue;
}
idps[sub.getIndex()] = dps[i];
}
// gotta go backwards now to complete the intersection by kicking
// any series that appear in other sets but not HERE
final Iterator<Entry<byte[], ExpressionDataPoint[]>> reverse_it =
ordered_intersection.iterator();
while (reverse_it.hasNext()) {
Entry<byte[], ExpressionDataPoint[]> e = reverse_it.next();
if (!tags.containsKey(e.getKey())) {
if (LOG.isDebugEnabled()) {
LOG.debug("Kicking out " + Bytes.pretty(e.getKey()) +
" from the main list since the query for " + sub.getId() +
" didn't have it");
}
// null the iterators for the other sets
for (final Map.Entry<String, ByteMap<Integer>> entry :
flattened_tags.entrySet()) {
if (entry.getKey().equals(sub.getId())) {
continue;
}
final Integer index = entry.getValue().get(e.getKey());
if (index != null) {
queries.get(entry.getKey()).nullIterator(index);
}
}
reverse_it.remove();
}
}
}
// now set our properly condensed and ordered values
if (ordered_intersection.size() < 1) {
// TODO - is it best to toss an exception here or return an empty result?
throw new IllegalDataException("No intersections found: " + this);
}
setCurrentAndMeta(ordered_intersection);
}
/**
* Takes the resulting intersection and builds the {@link #current_values}
* and {@link #meta} maps.
* @param ordered_intersection The intersection to build from.
*/
private void setCurrentAndMeta(final ByteMap<ExpressionDataPoint[]>
ordered_intersection) {
for (final String id : queries.keySet()) {
current_values.put(id, new ExpressionDataPoint[ordered_intersection.size()]);
}
int i = 0;
for (final ExpressionDataPoint[] idps : ordered_intersection.values()) {
for (int x = 0; x < idps.length; x++) {
final ExpressionDataPoint[] current_dps =
current_values.get(index_to_names[x]);
current_dps[i] = idps[x];
}
++i;
}
series_size = ordered_intersection.size();
}
/**
* Flattens the appropriate tags into a single byte array
* @param use_query_tags Whether or not to include tags returned with the
* results or just use those group by'd in the query
* @param include_agg_tags Whether or not to include the aggregated tags in
* the identifier
* @param tags The map of tags from the result set
* @param agg_tags The list of aggregated tags
* @param sub The sub query iterator
* @return A byte array with the flattened tag keys and values. Note that
* if the tags set is empty, this may return an empty array (but not a null
* array)
*/
static byte[] flattenTags(final boolean use_query_tags,
final boolean include_agg_tags, final ByteMap<byte[]> tags,
final ByteSet agg_tags, final ITimeSyncedIterator sub) {
if (tags.isEmpty()) {
return HBaseClient.EMPTY_ARRAY;
}
final ByteSet query_tagks;
// NOTE: We MAY need the agg tags but I'm not sure yet
final int tag_size;
if (use_query_tags) {
int i = 0;
if (sub.getQueryTagKs() != null && !sub.getQueryTagKs().isEmpty()) {
query_tagks = sub.getQueryTagKs();
for (final Map.Entry<byte[], byte[]> pair : tags.entrySet()) {
if (query_tagks.contains(pair.getKey())) {
i++;
}
}
} else {
query_tagks = new ByteSet();
}
tag_size = i;
} else {
query_tagks = new ByteSet();
tag_size = tags.size();
}
int len = (tag_size * (TSDB.tagk_width() + TSDB.tagv_width())) +
(include_agg_tags ? (agg_tags.size() * TSDB.tagk_width()) : 0);
final byte[] tagks = new byte[len];
int i = 0;
for (final Map.Entry<byte[], byte[]> pair : tags.entrySet()) {
if (use_query_tags && !query_tagks.contains(pair.getKey())) {
continue;
}
System.arraycopy(pair.getKey(), 0, tagks, i, TSDB.tagk_width());
i += TSDB.tagk_width();
System.arraycopy(pair.getValue(), 0, tagks, i, TSDB.tagv_width());
i += TSDB.tagv_width();
}
if (include_agg_tags) {
for (final byte[] tagk : agg_tags) {
System.arraycopy(tagk, 0, tagks, i, TSDB.tagk_width());
i += TSDB.tagk_width();
}
}
return tagks;
}
@Override
public ExpressionDataPoint[] next(long timestamp) {
throw new NotImplementedException();
}
@Override
public int size() {
throw new NotImplementedException();
}
@Override
public ExpressionDataPoint[] values() {
throw new NotImplementedException();
}
@Override
public void nullIterator(int index) {
throw new NotImplementedException();
}
@Override
public int getIndex() {
return index;
}
@Override
public void setIndex(int index) {
this.index = index;
}
@Override
public String getId() {
return id;
}
@Override
public ByteSet getQueryTagKs() {
throw new NotImplementedException();
}
@Override
public void setFillPolicy(NumericFillPolicy policy) {
throw new NotImplementedException();
}
@Override
public NumericFillPolicy getFillPolicy() {
throw new NotImplementedException();
}
@Override
public ITimeSyncedIterator getCopy() {
return new IntersectionIterator(this);
}
@Override
public boolean hasNext(int index) {
for (final ITimeSyncedIterator sub : queries.values()) {
if (sub.hasNext(index)) {
return true;
}
}
return false;
}
@Override
public void next(int index) {
if (!hasNext()) {
throw new IllegalDataException("No more data");
}
for (final ITimeSyncedIterator sub : queries.values()) {
sub.next(index);
}
}
}