// This file is part of OpenTSDB.
// Copyright (C) 2015 The OpenTSDB Authors.
//
// This program is free software: you can redistribute it and/or modify it
// under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 2.1 of the License, or (at your
// option) any later version. This program is distributed in the hope that it
// will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty
// of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser
// General Public License for more details. You should have received a copy
// of the GNU Lesser General Public License along with this program. If not,
// see <http://www.gnu.org/licenses/>.
package net.opentsdb.query.expression;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import net.opentsdb.core.FillPolicy;
import net.opentsdb.query.expression.VariableIterator.SetOperator;
import net.opentsdb.utils.ByteSet;
import org.apache.commons.jexl2.JexlContext;
import org.apache.commons.jexl2.JexlEngine;
import org.apache.commons.jexl2.MapContext;
import org.apache.commons.jexl2.Script;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.ImmutableSet;
/**
* A iterator that applies an expression to the results of multiple sub queries.
* To use this class:
* - Instantiate with a valid expression
* - Call {@link #getVariableNames()} and iterate over a set of TSSubQueries and
* their results. For each query that matches a variable name, call
* {@link #addResults(String, ITimeSyncedIterator)} with the result set.
* - Call {@link #compile()} to setup the meta data, fills and compute the
* intersection of the series.
* - Call {@link #values()} and store the reference. Results for each
* series will be written here as you iterate.
* - Call {@link #hasNext()} and {@link #next()} to iterate over results.
* - At each iteration, fetch the timestamp and value from the data points array.
* <p>
* Iteration is performed across all series supplied to the iterator, synchronizing
* on the timestamps and substituting fill values where appropriate.
* <p>
* WARNING: You MUST supply a result set and associated sub query to match each
* of the variable names in the expression. If you fail to do so, when you call
* {@link #compile()} you'll get an exception.
* <p>
* NOTE: Right now this class only supports intersection on the series so that
* each metric result must contain series with the same tags based on the flags
* provided in the ctor.
* NOTE: If a result set doesn't include a fill policy, we default to ZERO for
* "missing" data points.
*/
public class ExpressionIterator implements ITimeSyncedIterator {
private static final Logger LOG = LoggerFactory.getLogger(ExpressionIterator.class);
/** Docs don't say whether this is thread safe or not. SOME methods are marked
* as not thread safe, so I assume it's ok to instantiate one of these guys
* and keep creating scripts from it.
*/
public final static JexlEngine JEXL_ENGINE = new JexlEngine();
/** Whether or not to intersect on the query tagks instead of the result set
* tagks */
private final boolean intersect_on_query_tagks;
/** Whether or not to include the aggregated tags in the result set */
private final boolean include_agg_tags;
/** List of iterators and their IDs */
private final Map<String, ITimeSyncedIterator> results;
/** The compiled expression */
private final Script expression;
/** The context where we'll dump results for processing through the expression */
private final JexlContext context = new MapContext();
/** A list of unique variable names pulled from the expression */
private final Set<String> names;
/** The intersection iterator we'll use for processing */
// TODO - write an interface to allow other set operators, e.g. union, disjoint
private VariableIterator iterator;
/** A map of results from the intersection iterator to pass to the expression */
private Map<String, ExpressionDataPoint[]> iteration_results;
/** The results of processing the expressions */
private ExpressionDataPoint[] dps;
/** The ID of this iterator */
private final String id;
/** The index of this iterator in expressions */
private int index;
/** A fill policy for this expression if data is missing */
private NumericFillPolicy fill_policy;
/** The set operator to use for joining sets */
private SetOperator set_operator;
// NOTE - if the query is set to NONE for the aggregation and the query has
// no tagk filters then we shouldn't set the II's intersect_on_query_tagks
/**
* Default Ctor that compiles the expression for use with this iterator.
* @param id The id of this iterator.
* @param expression The expression to compile and use
* @param set_operator The type of set operator to use
* @param intersect_on_query_tagks Whether or not to include only the query
* specified tags during intersection
* @param include_agg_tags Whether or not to include aggregated tags during
* intersection
* @throws IllegalArgumentException if the expression is null or empty or doesn't
* contain any variables.
* @throws JexlException if the expression isn't valid
*/
public ExpressionIterator(final String id, final String expression,
final SetOperator set_operator,
final boolean intersect_on_query_tagks, final boolean include_agg_tags) {
if (expression == null || expression.isEmpty()) {
throw new IllegalArgumentException("The expression cannot be null");
}
if (set_operator == null) {
throw new IllegalArgumentException("The set operator cannot be null");
}
this.id = id;
this.intersect_on_query_tagks = intersect_on_query_tagks;
this.include_agg_tags = include_agg_tags;
results = new HashMap<String, ITimeSyncedIterator>();
this.expression = JEXL_ENGINE.createScript(expression);
names = new HashSet<String>();
extractVariableNames();
if (names.size() < 1) {
throw new IllegalArgumentException(
"The expression didn't appear to have any variables");
}
this.set_operator = set_operator;
fill_policy = new NumericFillPolicy(FillPolicy.NOT_A_NUMBER);
}
/**
* Copy constructor that setups up a dupe of this iterator with fresh sub
* iterator objects for use in a nested expression.
* @param iterator The expression to copy from.
*/
private ExpressionIterator(final ExpressionIterator iterator) {
id = iterator.id;
// need to recompile, don't know if we'll run into threading issues
expression = JEXL_ENGINE.createScript(iterator.expression.toString());
intersect_on_query_tagks = iterator.intersect_on_query_tagks;
include_agg_tags = iterator.include_agg_tags;
set_operator = iterator.set_operator;
results = new HashMap<String, ITimeSyncedIterator>();
for (Entry<String, ITimeSyncedIterator> entry : iterator.results.entrySet()) {
results.put(entry.getKey(), entry.getValue().getCopy());
}
names = new HashSet<String>();
extractVariableNames();
if (names.size() < 1) {
throw new IllegalArgumentException(
"The expression didn't appear to have any variables");
}
}
@Override
public String toString() {
final StringBuffer buf = new StringBuffer();
buf.append("ExpressionIterator(id=")
.append(id)
.append(", expression=\"")
.append(expression.toString())
.append(", setOperator=")
.append(set_operator)
.append(", fillPolicy=")
.append(fill_policy)
.append(", intersectOnQueryTagks=")
.append(intersect_on_query_tagks)
.append(", includeAggTags=")
.append(include_agg_tags)
.append(", index=")
.append(index)
.append("\", VariableIterator=")
.append(iterator)
.append(", dps=")
.append(dps)
.append(", results=")
.append(results)
.append(")");
return buf.toString();
}
/**
* Adds a sub query result object to the iterator.
* TODO - accept a proper object, not a map
* @param id The ID of source iterator.
* @param iterator The source iterator.
* @throws IllegalArgumentException if the object is missing required data
*/
public void addResults(final String id, final ITimeSyncedIterator iterator) {
if (id == null) {
throw new IllegalArgumentException("Missing ID");
}
if (iterator == null) {
throw new IllegalArgumentException("Iterator cannot be null");
}
results.put(id, iterator);
}
/**
* Builds the iterator by computing the intersection of all series in all sets
* and sets up the output.
* @throws IllegalArgumentException if there aren't any results, or we don't
* have a result for each variable, or something else is wrong.
* @throws IllegalDataException if no series were left after computing the
* intersection.
*/
public void compile() {
if (LOG.isDebugEnabled()) {
LOG.debug("Compiling " + this);
}
if (results.size() < 1) {
throw new IllegalArgumentException("No results for any variables in "
+ "the expression: " + this);
}
if (results.size() < names.size()) {
throw new IllegalArgumentException("Not enough query results ["
+ results.size() + " total results found] for the expression variables ["
+ names.size() + " expected] " + this);
}
// don't care if we have extra results, but we had darned well better make
// sure we have a result set for each variable
for (final String variable : names) {
// validation
final ITimeSyncedIterator it = results.get(variable.toLowerCase());
if (it == null) {
throw new IllegalArgumentException("Missing results for variable " + variable);
}
if (it instanceof ExpressionIterator) {
((ExpressionIterator)it).compile();
}
if (LOG.isDebugEnabled()) {
LOG.debug("Matched variable " + variable + " to " + it);
}
}
// TODO implement other set functions
switch (set_operator) {
case INTERSECTION:
iterator = new IntersectionIterator(id, results, intersect_on_query_tagks,
include_agg_tags);
break;
case UNION:
iterator = new UnionIterator(id, results, intersect_on_query_tagks,
include_agg_tags);
}
iteration_results = iterator.getResults();
dps = new ExpressionDataPoint[iterator.getSeriesSize()];
for (int i = 0; i < iterator.getSeriesSize(); i++) {
final Iterator<Entry<String, ExpressionDataPoint[]>> it =
iteration_results.entrySet().iterator();
Entry<String, ExpressionDataPoint[]> entry = it.next();
if (entry.getValue() == null || entry.getValue()[i] == null) {
dps[i] = new ExpressionDataPoint();
} else {
dps[i] = new ExpressionDataPoint(entry.getValue()[i]);
}
while (it.hasNext()) {
entry = it.next();
if (entry.getValue() != null && entry.getValue()[i] != null) {
dps[i].add(entry.getValue()[i]);
}
}
}
if (LOG.isDebugEnabled()) {
LOG.debug("Finished compiling " + this);
}
}
/**
* Checks to see if we have another value in any of the series.
* Make sure to call {@link #compile()} first.
* @return True if there is more data to process, false if not
*/
@Override
public boolean hasNext() {
return iterator.hasNext();
}
/**
* Fetches the next set of data and computes a value for the expression.
* Make sure to call {@link #compile()} first.
* And make sure to call {@link #hasNext()} before calling this.
* @return A link to the data points for this result set
* @throws IllegalDataException if there wasn't any data left in any of the
* series.
* @throws JexlException if something went pear shaped processing the expression
*/
public ExpressionDataPoint[] next(final long timestamp) {
// fetch the timestamp ONCE to save some cycles.
// final long timestamp = iterator.nextTimestamp();
iterator.next();
// set aside a couple of addresses for the variables
double val;
double result;
for (int i = 0; i < iterator.getSeriesSize(); i++) {
// this here is why life sucks. there MUST be a better way to bind variables
for (final String variable : names) {
if (iteration_results.get(variable)[i] == null) {
context.set(variable, results.get(variable).getFillPolicy().getValue());
} else {
val = iteration_results.get(variable)[i].toDouble();
if (Double.isNaN(val)) {
context.set(variable, results.get(variable).getFillPolicy().getValue());
} else {
context.set(variable, val);
}
}
}
final Object output = expression.execute(context);
if (output instanceof Double) {
result = (Double) output;
} else if (output instanceof Boolean) {
result = (((Boolean) output) ? 1 : 0);
} else {
throw new IllegalStateException("Expression returned a result of type: "
+ output.getClass().getName() + " for " + this);
}
dps[i].reset(timestamp, result);
}
return dps;
}
/** @return a list of expression results. You can keep this list and check the
* results on each call to {@link #next()} */
@Override
public ExpressionDataPoint[] values() {
return dps;
}
/**
* Pulls the variable names from the expression and stores them in {@link #names}
*/
private void extractVariableNames() {
if (expression == null) {
throw new IllegalArgumentException("The expression was null");
}
for (final List<String> exp_list : JEXL_ENGINE.getVariables(expression)) {
for (final String variable : exp_list) {
names.add(variable);
}
}
}
/** @return an immutable set of the variable IDs used in the expression. Case
* sensitive. */
public Set<String> getVariableNames() {
return ImmutableSet.copyOf(names);
}
public void setSetOperator(final SetOperator set_operator) {
this.set_operator = set_operator;
}
@Override
public long nextTimestamp() {
return iterator.nextTimestamp();
}
@Override
public int size() {
return dps.length;
}
@Override
public void nullIterator(int index) {
if (index < 0 || index >= dps.length) {
throw new IllegalArgumentException("Index out of bounds");
}
// TODO - do it
}
@Override
public int getIndex() {
return index;
}
@Override
public void setIndex(int index) {
this.index = index;
}
@Override
public String getId() {
return id;
}
@Override
public ByteSet getQueryTagKs() {
return null;
}
@Override
public void setFillPolicy(NumericFillPolicy policy) {
fill_policy = policy;
}
@Override
public NumericFillPolicy getFillPolicy() {
return fill_policy;
}
@Override
public ITimeSyncedIterator getCopy() {
final ExpressionIterator ei = new ExpressionIterator(this);
return ei;
}
@Override
public boolean hasNext(final int i) {
return iterator.hasNext(i);
}
@Override
public void next(final int i) {
iterator.next(i);
// set aside a couple of addresses for the variables
double val;
double result;
// this here is why life sucks. there MUST be a better way to bind variables
long ts = Long.MAX_VALUE;
for (final String variable : names) {
if (iteration_results.get(variable)[i] == null) {
context.set(variable, results.get(variable).getFillPolicy().getValue());
} else {
if (iteration_results.get(variable)[i].timestamp() < ts) {
ts = iteration_results.get(variable)[i].timestamp();
}
val = iteration_results.get(variable)[i].toDouble();
if (Double.isNaN(val)) {
context.set(variable, results.get(variable).getFillPolicy().getValue());
} else {
context.set(variable, val);
}
}
}
final Object output = expression.execute(context);
if (output instanceof Double) {
result = (Double) output;
} else if (output instanceof Boolean) {
result = (((Boolean) output) ? 1 : 0);
} else {
throw new IllegalStateException("Expression returned a result of type: "
+ output.getClass().getName() + " for " + this);
}
dps[i].reset(ts, result);
}
}