/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.query.op;
import com.addthis.basis.util.LessStrings;
import com.addthis.bundle.core.Bundle;
import com.addthis.bundle.core.BundleField;
import com.addthis.bundle.util.BundleColumnBinder;
import com.addthis.bundle.util.ValueUtil;
import com.addthis.bundle.value.Numeric;
import com.addthis.bundle.value.ValueObject;
import com.addthis.hydra.data.query.AbstractRowOp;
import io.netty.channel.ChannelProgressivePromise;
public class OpRoll extends AbstractRowOp {
public static enum OP {
MIN, MAX, AVG, SUM, DELTA
}
/**
* <p>This query operation <span class="hydra-summary">calculates the minimum value</span>.
* <p/>
* <p>This operation keeps track of the currently observed minimum value in one
* or more columns. The list of input columns are comma-separated.
* For each input column an output column will be generated (unless the 's'
* prefix is used see below).
* <p>Optionally a list of comma-separated key columns
* can be specified with a colon ":" following the list of input columns. If the
* <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a>
* of adjacent rows is different then the minimum values are reset.
* <p>An optional prefix of 'i' or 'f' before the input columns
* designates whether to process as ints or floats (defaults to
* ints). Prefix with 's' to makes changes swap value in place. Prefix
* with 'S' causes the operation to not emit any columns during processing and
* emit an additional row at the end of processing with the final maximum value.</p>
* <p/>
* <p>Examples:</p>
* <pre>
* min=0 // generate a new column that emits the current min observed for column 0
* min=0,3 // generate one column for the min of column 0 and one column for the min of column 3
* min=0:1 // track the min of column 0. If adjacent rows in column 1 are different then reset the state
* min=s0 // overwrite column 0 with the current minimum value
* </pre>
*
* @user-reference
* @hydra-name min
*/
public static class MinOpRoll extends OpRoll {
public MinOpRoll(String args, ChannelProgressivePromise queryPromise) {
super(args, OP.MIN, queryPromise);
}
}
/**
* <p>This query operation <span class="hydra-summary">calculates the maximum value</span>.
* <p/>
* <p>This operation keeps track of the currently observed maximum value in one
* or more columns. The list of input columns are comma-separated.
* For each input column an output column will be generated (unless the 's'
* prefix is used see below).
* <p>Optionally a list of comma-separated key columns
* can be specified with a colon ":" following the list of input columns. If the
* <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a>
* of adjacent rows is different then the maximum values are reset.
* <p>An optional prefix of 'i' or 'f' before the input columns
* designates whether to process as ints or floats (defaults to
* ints). Prefix with 's' to makes changes swap value in place. Prefix
* with 'S' causes the operation to not emit any columns during processing and
* emit an additional row at the end of processing with the final maximum value.</p>
* <p/>
* <p>Examples:</p>
* <pre>
* max=0 // generate a new column that emits the current max observed for column 0
* max=0,3 // generate one column for the max of column 0 and one column for the max of column 3
* max=0:1 // track the max of column 0. If adjacent rows in column 1 are different then reset the state
* max=s0 // overwrite column 0 with the current maximum value
* </pre>
*
* @user-reference
* @hydra-name max
*/
public static class MaxOpRoll extends OpRoll {
public MaxOpRoll(String args, ChannelProgressivePromise queryPromise) {
super(args, OP.MAX, queryPromise);
}
}
/**
* <p>This query operation <span class="hydra-summary">calculates the average value</span>.
* <p/>
* <p>This operation keeps track of the currently observed average value in one
* or more columns. The list of input columns are comma-separated.
* For each input column an output column will be generated (unless the 's'
* prefix is used see below).
* <p>Optionally a list of comma-separated key columns
* can be specified with a colon ":" following the list of input columns. If the
* <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a>
* of adjacent rows is different then the average values are reset.
* <p>An optional prefix of 'i' or 'f' before the input columns
* designates whether to process as ints or floats (defaults to
* ints). Prefix with 's' to makes changes swap value in place. Prefix
* with 'S' causes the operation to not emit any columns during processing and
* emit an additional row at the end of processing with the final average value.</p>
* <p/>
* <p>Examples:</p>
* <pre>
* avg=0 // generate a new column that emits the current average observed for column 0
* avg=0,3 // generate one column for the average of column 0 and one column for the average of column 3
* avg=0:1 // track the average of column 0. If adjacent rows in column 1 are different then reset the state
* avg=s0 // overwrite column 0 with the current average value
* </pre>
*
* @user-reference
* @hydra-name avg
*/
public static class AvgOpRoll extends OpRoll {
public AvgOpRoll(String args, ChannelProgressivePromise queryPromise) {
super(args, OP.AVG, queryPromise);
}
}
/**
* <p>This query operation <span class="hydra-summary">calculates the sum of values</span>.
* <p/>
* <p>This operation keeps track of the currently observed sum in one
* or more columns. The list of input columns are comma-separated.
* For each input column an output column will be generated (unless the 's'
* prefix is used see below).
* <p>Optionally a list of comma-separated key columns
* can be specified with a colon ":" following the list of input columns. If the
* <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a>
* of adjacent rows is different then the sum values are reset.
* <p>An optional prefix of 'i' or 'f' before the input columns
* designates whether to process as ints or floats (defaults to
* ints). Prefix with 's' to makes changes swap value in place. Prefix
* with 'S' causes the operation to not emit any columns during processing and
* emit an additional row at the end of processing with the final sum.</p>
* <p/>
* <p>Examples:</p>
* <pre>
* sum=0 // generate a new column that emits the current sum observed for column 0
* sum=0,3 // generate one column for the sum of column 0 and one column for the sum of column 3
* sum=0:1 // track the sum of column 0. If adjacent rows in column 1 are different then reset the state
* sum=s0 // overwrite column 0 with the current sum
* </pre>
*
* @user-reference
* @hydra-name sum
*/
public static class SumOpRoll extends OpRoll {
public SumOpRoll(String args, ChannelProgressivePromise queryPromise) {
super(args, OP.SUM, queryPromise);
}
}
/**
* <p>This query operation <span class="hydra-summary">calculates the delta of values</span>.
* <p/>
* <p>This operation keeps track of the delta from the previous value in one
* or more columns. The list of input columns are comma-separated.
* For each input column an output column will be generated (unless the 's'
* prefix is used see below).
* <p>Optionally a list of comma-separated key columns
* can be specified with a colon ":" following the list of input columns. If the
* <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a>
* of adjacent rows is different then the delta values are reset.
* <p>An optional prefix of 'i' or 'f' before the input columns
* designates whether to process as ints or floats (defaults to
* ints). Prefix with 's' to makes changes swap value in place. Prefix
* with 'S' causes the operation to not emit any columns during processing and
* emit an additional row at the end of processing with the final delta.</p>
* <p/>
* <p>Examples:</p>
* <pre>
* delta=0 // generate a new column that emits the current delta observed for column 0
* delta=0,3 // generate one column for the delta of column 0 and one column for the delta of column 3
* delta=0:1 // track the delta of column 0. If adjacent rows in column 1 are different then reset the state
* delta=s0 // overwrite column 0 with the current delta
* </pre>
*
* @user-reference
* @hydra-name delta
*/
public static class DeltaOpRoll extends OpRoll {
public DeltaOpRoll(String args, ChannelProgressivePromise queryPromise) {
super(args, OP.DELTA, queryPromise);
}
}
private final String[] args;
private final OP op;
private final boolean asInt;
private final boolean inPlace;
private final boolean summary;
private Numeric[] state;
private BundleField[] colIn;
private BundleField[] colOut;
private BundleField[] colKeys;
private int rows;
private String lastKey;
private Numeric[] oldvals;
private Bundle lastRow;
public OpRoll(String args, OP op, ChannelProgressivePromise queryPromise) {
super(queryPromise);
boolean asInt = true;
this.op = op;
if (args.startsWith("i")) {
args = args.substring(1);
}
if (args.startsWith("f")) {
asInt = false;
args = args.substring(1);
}
if (args.startsWith("s") || args.startsWith("S")) {
inPlace = args.startsWith("s");
summary = !inPlace;
args = args.substring(1);
} else {
summary = false;
inPlace = false;
}
this.args = LessStrings.splitArray(args, ":");
this.asInt = asInt;
}
private final Numeric toType(ValueObject vo) {
if (asInt) {
return ValueUtil.asNumberOrParseLong(vo, 10);
} else {
return ValueUtil.asNumberOrParseDouble(vo);
}
}
@Override
public Bundle rowOp(Bundle row) {
if (state == null) {
colIn = new BundleColumnBinder(row, LessStrings.splitArray(args[0], ",")).getFields();
colKeys = args.length > 1 ? new BundleColumnBinder(row, LessStrings.splitArray(args[1], ",")).getFields() : null;
state = new Numeric[colIn.length];
oldvals = new Numeric[colIn.length];
if (inPlace || summary) {
colOut = colIn;
if (summary) {
lastRow = row.createBundle();
}
} else {
colOut = new BundleField[colIn.length];
for (int i = 0; i < colOut.length; i++) {
colOut[i] = row.getFormat().getField("op_".concat(colIn[i].getName()));
}
}
}
rows++;
if (colKeys != null) {
String key = createCompoundKey(colKeys, row);
if ((lastKey == null && key != null) || (lastKey != null && !lastKey.equals(key))) {
lastKey = key;
for (int j = 0; j < state.length; j++) {
state[j] = null;
}
}
}
BundleColumnBinder binder = this.getSourceColumnBinder(row);
for (int i = 0; i < colIn.length; i++) {
if (state[i] == null) {
state[i] = toType(row.getValue(colIn[i]));
if (state[i] == null) {
state[i] = ZERO;
}
oldvals[i] = state[i];
} else {
switch (op) {
case DELTA:
Numeric newval = toType(row.getValue(colIn[i]));
state[i] = newval.diff(oldvals[i]);
oldvals[i] = newval;
break;
case MIN:
state[i] = toType(state[i].min(toType(row.getValue(colIn[i]))));
break;
case MAX:
state[i] = toType(state[i].max(toType(row.getValue(colIn[i]))));
break;
case SUM:
state[i] = state[i].sum(toType(row.getValue(colIn[i])));
break;
case AVG:
state[i] = toType(state[i].sum(toType(row.getValue(colIn[i]))));
break;
}
}
if (!summary) {
if (op == OP.AVG) {
row.setValue(colOut[i], state[i].avg(rows));
} else {
row.setValue(colOut[i], state[i]);
}
}
}
return row;
}
@Override
public void sendComplete() {
if (summary && state != null) {
for (int i = 0; i < state.length; i++) {
if (op == OP.AVG) {
lastRow.setValue(colOut[i], state[i].avg(rows));
} else {
lastRow.setValue(colOut[i], state[i]);
}
}
getNext().send(lastRow);
}
super.sendComplete();
}
}