/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.query.op; import com.addthis.basis.util.LessStrings; import com.addthis.bundle.core.Bundle; import com.addthis.bundle.core.BundleField; import com.addthis.bundle.util.BundleColumnBinder; import com.addthis.bundle.util.ValueUtil; import com.addthis.bundle.value.Numeric; import com.addthis.bundle.value.ValueObject; import com.addthis.hydra.data.query.AbstractRowOp; import io.netty.channel.ChannelProgressivePromise; public class OpRoll extends AbstractRowOp { public static enum OP { MIN, MAX, AVG, SUM, DELTA } /** * <p>This query operation <span class="hydra-summary">calculates the minimum value</span>. * <p/> * <p>This operation keeps track of the currently observed minimum value in one * or more columns. The list of input columns are comma-separated. * For each input column an output column will be generated (unless the 's' * prefix is used see below). * <p>Optionally a list of comma-separated key columns * can be specified with a colon ":" following the list of input columns. If the * <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a> * of adjacent rows is different then the minimum values are reset. * <p>An optional prefix of 'i' or 'f' before the input columns * designates whether to process as ints or floats (defaults to * ints). Prefix with 's' to makes changes swap value in place. Prefix * with 'S' causes the operation to not emit any columns during processing and * emit an additional row at the end of processing with the final maximum value.</p> * <p/> * <p>Examples:</p> * <pre> * min=0 // generate a new column that emits the current min observed for column 0 * min=0,3 // generate one column for the min of column 0 and one column for the min of column 3 * min=0:1 // track the min of column 0. If adjacent rows in column 1 are different then reset the state * min=s0 // overwrite column 0 with the current minimum value * </pre> * * @user-reference * @hydra-name min */ public static class MinOpRoll extends OpRoll { public MinOpRoll(String args, ChannelProgressivePromise queryPromise) { super(args, OP.MIN, queryPromise); } } /** * <p>This query operation <span class="hydra-summary">calculates the maximum value</span>. * <p/> * <p>This operation keeps track of the currently observed maximum value in one * or more columns. The list of input columns are comma-separated. * For each input column an output column will be generated (unless the 's' * prefix is used see below). * <p>Optionally a list of comma-separated key columns * can be specified with a colon ":" following the list of input columns. If the * <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a> * of adjacent rows is different then the maximum values are reset. * <p>An optional prefix of 'i' or 'f' before the input columns * designates whether to process as ints or floats (defaults to * ints). Prefix with 's' to makes changes swap value in place. Prefix * with 'S' causes the operation to not emit any columns during processing and * emit an additional row at the end of processing with the final maximum value.</p> * <p/> * <p>Examples:</p> * <pre> * max=0 // generate a new column that emits the current max observed for column 0 * max=0,3 // generate one column for the max of column 0 and one column for the max of column 3 * max=0:1 // track the max of column 0. If adjacent rows in column 1 are different then reset the state * max=s0 // overwrite column 0 with the current maximum value * </pre> * * @user-reference * @hydra-name max */ public static class MaxOpRoll extends OpRoll { public MaxOpRoll(String args, ChannelProgressivePromise queryPromise) { super(args, OP.MAX, queryPromise); } } /** * <p>This query operation <span class="hydra-summary">calculates the average value</span>. * <p/> * <p>This operation keeps track of the currently observed average value in one * or more columns. The list of input columns are comma-separated. * For each input column an output column will be generated (unless the 's' * prefix is used see below). * <p>Optionally a list of comma-separated key columns * can be specified with a colon ":" following the list of input columns. If the * <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a> * of adjacent rows is different then the average values are reset. * <p>An optional prefix of 'i' or 'f' before the input columns * designates whether to process as ints or floats (defaults to * ints). Prefix with 's' to makes changes swap value in place. Prefix * with 'S' causes the operation to not emit any columns during processing and * emit an additional row at the end of processing with the final average value.</p> * <p/> * <p>Examples:</p> * <pre> * avg=0 // generate a new column that emits the current average observed for column 0 * avg=0,3 // generate one column for the average of column 0 and one column for the average of column 3 * avg=0:1 // track the average of column 0. If adjacent rows in column 1 are different then reset the state * avg=s0 // overwrite column 0 with the current average value * </pre> * * @user-reference * @hydra-name avg */ public static class AvgOpRoll extends OpRoll { public AvgOpRoll(String args, ChannelProgressivePromise queryPromise) { super(args, OP.AVG, queryPromise); } } /** * <p>This query operation <span class="hydra-summary">calculates the sum of values</span>. * <p/> * <p>This operation keeps track of the currently observed sum in one * or more columns. The list of input columns are comma-separated. * For each input column an output column will be generated (unless the 's' * prefix is used see below). * <p>Optionally a list of comma-separated key columns * can be specified with a colon ":" following the list of input columns. If the * <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a> * of adjacent rows is different then the sum values are reset. * <p>An optional prefix of 'i' or 'f' before the input columns * designates whether to process as ints or floats (defaults to * ints). Prefix with 's' to makes changes swap value in place. Prefix * with 'S' causes the operation to not emit any columns during processing and * emit an additional row at the end of processing with the final sum.</p> * <p/> * <p>Examples:</p> * <pre> * sum=0 // generate a new column that emits the current sum observed for column 0 * sum=0,3 // generate one column for the sum of column 0 and one column for the sum of column 3 * sum=0:1 // track the sum of column 0. If adjacent rows in column 1 are different then reset the state * sum=s0 // overwrite column 0 with the current sum * </pre> * * @user-reference * @hydra-name sum */ public static class SumOpRoll extends OpRoll { public SumOpRoll(String args, ChannelProgressivePromise queryPromise) { super(args, OP.SUM, queryPromise); } } /** * <p>This query operation <span class="hydra-summary">calculates the delta of values</span>. * <p/> * <p>This operation keeps track of the delta from the previous value in one * or more columns. The list of input columns are comma-separated. * For each input column an output column will be generated (unless the 's' * prefix is used see below). * <p>Optionally a list of comma-separated key columns * can be specified with a colon ":" following the list of input columns. If the * <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a> * of adjacent rows is different then the delta values are reset. * <p>An optional prefix of 'i' or 'f' before the input columns * designates whether to process as ints or floats (defaults to * ints). Prefix with 's' to makes changes swap value in place. Prefix * with 'S' causes the operation to not emit any columns during processing and * emit an additional row at the end of processing with the final delta.</p> * <p/> * <p>Examples:</p> * <pre> * delta=0 // generate a new column that emits the current delta observed for column 0 * delta=0,3 // generate one column for the delta of column 0 and one column for the delta of column 3 * delta=0:1 // track the delta of column 0. If adjacent rows in column 1 are different then reset the state * delta=s0 // overwrite column 0 with the current delta * </pre> * * @user-reference * @hydra-name delta */ public static class DeltaOpRoll extends OpRoll { public DeltaOpRoll(String args, ChannelProgressivePromise queryPromise) { super(args, OP.DELTA, queryPromise); } } private final String[] args; private final OP op; private final boolean asInt; private final boolean inPlace; private final boolean summary; private Numeric[] state; private BundleField[] colIn; private BundleField[] colOut; private BundleField[] colKeys; private int rows; private String lastKey; private Numeric[] oldvals; private Bundle lastRow; public OpRoll(String args, OP op, ChannelProgressivePromise queryPromise) { super(queryPromise); boolean asInt = true; this.op = op; if (args.startsWith("i")) { args = args.substring(1); } if (args.startsWith("f")) { asInt = false; args = args.substring(1); } if (args.startsWith("s") || args.startsWith("S")) { inPlace = args.startsWith("s"); summary = !inPlace; args = args.substring(1); } else { summary = false; inPlace = false; } this.args = LessStrings.splitArray(args, ":"); this.asInt = asInt; } private final Numeric toType(ValueObject vo) { if (asInt) { return ValueUtil.asNumberOrParseLong(vo, 10); } else { return ValueUtil.asNumberOrParseDouble(vo); } } @Override public Bundle rowOp(Bundle row) { if (state == null) { colIn = new BundleColumnBinder(row, LessStrings.splitArray(args[0], ",")).getFields(); colKeys = args.length > 1 ? new BundleColumnBinder(row, LessStrings.splitArray(args[1], ",")).getFields() : null; state = new Numeric[colIn.length]; oldvals = new Numeric[colIn.length]; if (inPlace || summary) { colOut = colIn; if (summary) { lastRow = row.createBundle(); } } else { colOut = new BundleField[colIn.length]; for (int i = 0; i < colOut.length; i++) { colOut[i] = row.getFormat().getField("op_".concat(colIn[i].getName())); } } } rows++; if (colKeys != null) { String key = createCompoundKey(colKeys, row); if ((lastKey == null && key != null) || (lastKey != null && !lastKey.equals(key))) { lastKey = key; for (int j = 0; j < state.length; j++) { state[j] = null; } } } BundleColumnBinder binder = this.getSourceColumnBinder(row); for (int i = 0; i < colIn.length; i++) { if (state[i] == null) { state[i] = toType(row.getValue(colIn[i])); if (state[i] == null) { state[i] = ZERO; } oldvals[i] = state[i]; } else { switch (op) { case DELTA: Numeric newval = toType(row.getValue(colIn[i])); state[i] = newval.diff(oldvals[i]); oldvals[i] = newval; break; case MIN: state[i] = toType(state[i].min(toType(row.getValue(colIn[i])))); break; case MAX: state[i] = toType(state[i].max(toType(row.getValue(colIn[i])))); break; case SUM: state[i] = state[i].sum(toType(row.getValue(colIn[i]))); break; case AVG: state[i] = toType(state[i].sum(toType(row.getValue(colIn[i])))); break; } } if (!summary) { if (op == OP.AVG) { row.setValue(colOut[i], state[i].avg(rows)); } else { row.setValue(colOut[i], state[i]); } } } return row; } @Override public void sendComplete() { if (summary && state != null) { for (int i = 0; i < state.length; i++) { if (op == OP.AVG) { lastRow.setValue(colOut[i], state[i].avg(rows)); } else { lastRow.setValue(colOut[i], state[i]); } } getNext().send(lastRow); } super.sendComplete(); } }