// This file is part of OpenTSDB.
// Copyright (C) 2015 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.query.expression;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Map.Entry;
import net.opentsdb.core.FillPolicy;
import net.opentsdb.core.IllegalDataException;
import net.opentsdb.core.TSDB;
import net.opentsdb.utils.ByteSet;
import org.hbase.async.HBaseClient;
import org.hbase.async.Bytes.ByteMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import sun.reflect.generics.reflectiveObjects.NotImplementedException;
/**
* An iterator that computes the union of all series in the result sets. This
* means we match every series with it's corresponding series in the other sets.
* If one or more set lacks the matching series, then a {@code null} is stored
* and when the caller iterates over the results, the need to detect the null
* and substitute a fill value.
* @since 2.3
*/
public class UnionIterator implements ITimeSyncedIterator, VariableIterator {
private static final Logger LOG = LoggerFactory.getLogger(UnionIterator.class);
/** The queries compiled and fetched from storage */
private final Map<String, ITimeSyncedIterator> queries;
/** A list of the current values for each series post intersection */
private final Map<String, ExpressionDataPoint[]> current_values;
/** A map used for single series iteration where the array is the index */
private final Map<String, int[]> single_series_matrix;
/** A map of the sub query index to their names for intersection computation */
private final String[] index_to_names;
/** Whether or not to intersect on the query tagks instead of the result set
* tagks */
private final boolean union_on_query_tagks;
/** Whether or not to include the aggregated tags in the result set */
private final boolean include_agg_tags;
/** The start/current timestamp for the iterator in ms */
private long timestamp;
/** Post intersection number of time series */
private int series_size;
/** The ID of this iterator */
private final String id;
/** The index of this iterator in a list of iterators */
private int index;
/** The fill policy to use when a series is missing from one of the sets.
* Default is zero. */
private NumericFillPolicy fill_policy;
/** A data point used for filling missing time series */
private ExpressionDataPoint fill_dp;
/**
* Default ctor
* @param id The variable ID for this iterator
* @param results Upstream iterators
* @param union_on_query_tagks Whether or not to flatten and join on only
* the tags from the query or those returned in the results.
* @param include_agg_tags Whether or not to include the flattened aggregated
* tag keys in the join.
*/
public UnionIterator(final String id, final Map<String, ITimeSyncedIterator> results,
final boolean union_on_query_tagks, final boolean include_agg_tags) {
this.id = id;
this.union_on_query_tagks = union_on_query_tagks;
this.include_agg_tags = include_agg_tags;
timestamp = Long.MAX_VALUE;
queries = new HashMap<String, ITimeSyncedIterator>(results.size());
current_values = new HashMap<String, ExpressionDataPoint[]>(results.size());
single_series_matrix = new HashMap<String, int[]>(results.size());
index_to_names = new String[results.size()];
fill_policy = new NumericFillPolicy(FillPolicy.ZERO);
fill_dp = new ExpressionDataPoint();
int i = 0;
for (final Map.Entry<String, ITimeSyncedIterator> entry : results.entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding iterator " + entry.getValue());
}
queries.put(entry.getKey(), entry.getValue());
entry.getValue().setIndex(i);
index_to_names[i] = entry.getKey();
++i;
}
computeUnion();
// calculate the starting timestamp from the various iterators
for (final ITimeSyncedIterator it : queries.values()) {
final long ts = it.nextTimestamp();
if (ts < timestamp) {
timestamp = ts;
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Computed union: " + this);
}
}
/**
* Private copy constructor that copies references and sets up new collections
* without copying results.
* @param iterator The iterator to copy from.
*/
private UnionIterator(final UnionIterator iterator) {
id = iterator.id;
union_on_query_tagks = iterator.union_on_query_tagks;
include_agg_tags = iterator.include_agg_tags;
timestamp = Long.MAX_VALUE;
queries = new HashMap<String, ITimeSyncedIterator>(iterator.queries.size());
current_values = new HashMap<String, ExpressionDataPoint[]>(queries.size());
single_series_matrix = new HashMap<String, int[]>(queries.size());
index_to_names = new String[queries.size()];
fill_policy = iterator.fill_policy;
int i = 0;
for (final Map.Entry<String, ITimeSyncedIterator> entry : iterator.queries.entrySet()) {
if (LOG.isDebugEnabled()) {
LOG.debug("Adding iterator " + entry.getValue());
}
queries.put(entry.getKey(), entry.getValue());
entry.getValue().setIndex(i);
index_to_names[i] = entry.getKey();
++i;
}
computeUnion();
// calculate the starting timestamp from the various iterators
for (final ITimeSyncedIterator it : queries.values()) {
final long ts = it.nextTimestamp();
if (ts < timestamp) {
timestamp = ts;
}
}
}
/**
* Computes the union of all sets, matching on tags and optionally the
* aggregated tags across each variable.
*/
private void computeUnion() {
// key = flattened tags, array of queries.size()
final ByteMap<ExpressionDataPoint[]> ordered_union =
new ByteMap<ExpressionDataPoint[]>();
final Iterator<ITimeSyncedIterator> it = queries.values().iterator();
while (it.hasNext()) {
final ITimeSyncedIterator sub = it.next();
final ExpressionDataPoint[] dps = sub.values();
final ByteMap<Integer> local_tags = new ByteMap<Integer>();
for (int i = 0; i < sub.size(); i++) {
final byte[] key = flattenTags(union_on_query_tagks, include_agg_tags,
dps[i], sub);
local_tags.put(key, i);
ExpressionDataPoint[] udps = ordered_union.get(key);
if (udps == null) {
udps = new ExpressionDataPoint[queries.size()];
ordered_union.put(key, udps);
}
udps[sub.getIndex()] = dps[i];
}
}
if (ordered_union.size() < 1) {
// if no data, just stop here
return;
}
setCurrentAndMeta(ordered_union);
}
/**
* Takes the resulting union and builds the {@link #current_values}
* and {@link #meta} maps.
* @param ordered_union The union to build from.
*/
private void setCurrentAndMeta(final ByteMap<ExpressionDataPoint[]>
ordered_union) {
for (final String id : queries.keySet()) {
current_values.put(id, new ExpressionDataPoint[ordered_union.size()]);
// TODO - blech. Fill with a sentinel value to reflect "no data here!"
final int[] m = new int[ordered_union.size()];
for (int i = 0; i < m.length; i++) {
m[i] = -1;
}
single_series_matrix.put(id, m);
}
int i = 0;
for (final Entry<byte[], ExpressionDataPoint[]> entry : ordered_union.entrySet()) {
final ExpressionDataPoint[] idps = entry.getValue();
for (int x = 0; x < idps.length; x++) {
final ExpressionDataPoint[] current_dps =
current_values.get(index_to_names[x]);
current_dps[i] = idps[x];
final int[] m = single_series_matrix.get(index_to_names[x]);
if (idps[x] != null) {
m[i] = idps[x].getIndex();
}
}
++i;
}
// set fills on nulls
for (final ExpressionDataPoint[] idps : current_values.values()) {
for (i = 0; i < idps.length; i++) {
if (idps[i] == null) {
idps[i] = fill_dp;
}
}
}
series_size = ordered_union.size();
}
/**
* Creates a key based on the concatenation of the tag pairs then the agg
* tag keys.
* @param use_query_tags Whether or not to include tags returned with the
* results or just use those group by'd in the query
* @param include_agg_tags Whether or not to include the aggregated tags in
* the identifier
* @param dp The current expression data point
* @param sub The sub query iterator
* @return A byte array with the flattened tag keys and values. Note that
* if the tags set is empty, this may return an empty array (but not a null
* array)
*/
static byte[] flattenTags(final boolean use_query_tags,
final boolean include_agg_tags, final ExpressionDataPoint dp,
final ITimeSyncedIterator sub) {
if (dp.tags() == null || dp.tags().isEmpty()) {
return HBaseClient.EMPTY_ARRAY;
}
final int tagk_width = TSDB.tagk_width();
final int tagv_width = TSDB.tagv_width();
final ByteSet query_tagks;
// NOTE: We MAY need the agg tags but I'm not sure yet
final int tag_size;
if (use_query_tags) {
int i = 0;
if (sub.getQueryTagKs() != null && !sub.getQueryTagKs().isEmpty()) {
query_tagks = sub.getQueryTagKs();
for (final Map.Entry<byte[], byte[]> pair : dp.tags().entrySet()) {
if (query_tagks.contains(pair.getKey())) {
i++;
}
}
} else {
query_tagks = new ByteSet();
}
tag_size = i;
} else {
query_tagks = new ByteSet();
tag_size = dp.tags().size();
}
final int length = (tag_size * (tagk_width + tagv_width))
+ (include_agg_tags ? (dp.aggregatedTags().size() * tagk_width) : 0);
final byte[] key = new byte[length];
int idx = 0;
for (final Entry<byte[], byte[]> pair : dp.tags().entrySet()) {
if (use_query_tags && !query_tagks.contains(pair.getKey())) {
continue;
}
System.arraycopy(pair.getKey(), 0, key, idx, tagk_width);
idx += tagk_width;
System.arraycopy(pair.getValue(), 0, key, idx, tagv_width);
idx += tagv_width;
}
if (include_agg_tags) {
for (final byte[] tagk : dp.aggregatedTags()) {
System.arraycopy(tagk, 0, key, idx, tagk_width);
idx += tagk_width;
}
}
return key;
}
@Override
public String toString() {
final StringBuilder buf = new StringBuilder();
buf.append("UnionIterator(id=")
.append(id)
.append(", useQueryTags=")
.append(union_on_query_tagks)
.append(", includeAggTags=")
.append(include_agg_tags)
.append(", index=")
.append(index)
.append(", queries=")
.append(queries);
return buf.toString();
}
// Iterator implementations
@Override
public boolean hasNext() {
for (final ITimeSyncedIterator sub : queries.values()) {
if (sub.hasNext()) {
return true;
}
}
return false;
}
@Override
public ExpressionDataPoint[] next(long timestamp) {
throw new NotImplementedException();
}
@Override
public long nextTimestamp() {
long ts = Long.MAX_VALUE;
for (final ITimeSyncedIterator sub : queries.values()) {
if (sub != null) {
final long t = sub.nextTimestamp();
if (t < ts) {
ts = t;
}
}
}
return ts;
}
@Override
public int size() {
throw new NotImplementedException();
}
@Override
public ExpressionDataPoint[] values() {
throw new NotImplementedException();
}
@Override
public void nullIterator(int index) {
throw new NotImplementedException();
}
@Override
public int getIndex() {
return index;
}
@Override
public void setIndex(int index) {
this.index = index;
}
@Override
public String getId() {
return id;
}
@Override
public ByteSet getQueryTagKs() {
throw new NotImplementedException();
}
@Override
public void setFillPolicy(NumericFillPolicy policy) {
this.fill_policy = policy;
}
@Override
public NumericFillPolicy getFillPolicy() {
return fill_policy;
}
@Override
public ITimeSyncedIterator getCopy() {
return new UnionIterator(this);
}
@Override
public void next() {
if (!hasNext()) {
throw new IllegalDataException("No more data");
}
for (final ITimeSyncedIterator sub : queries.values()) {
sub.next(timestamp);
}
// reset the fill data point
fill_dp.reset(timestamp, fill_policy.getValue());
timestamp = nextTimestamp();
}
@Override
public Map<String, ExpressionDataPoint[]> getResults() {
return current_values;
}
@Override
public int getSeriesSize() {
return series_size;
}
@Override
public boolean hasNext(int index) {
for (final Entry<String, int[]> entry : single_series_matrix.entrySet()) {
final int idx = entry.getValue()[index];
if (idx >= 0 && queries.get(entry.getKey()).hasNext(idx)) {
return true;
}
}
return false;
}
@Override
public void next(int index) {
if (!hasNext()) {
throw new IllegalDataException("No more data");
}
for (final Entry<String, int[]> entry : single_series_matrix.entrySet()) {
final int idx = entry.getValue()[index];
if (idx >= 0) {
queries.get(entry.getKey()).next(idx);
}
}
}
}