/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.addthis.hydra.data.query.op; import java.util.ArrayDeque; import java.util.LinkedHashMap; import java.util.Map.Entry; import java.util.Queue; import java.util.SortedMap; import java.util.TreeMap; import com.addthis.basis.util.LessStrings; import com.addthis.bundle.core.Bundle; import com.addthis.bundle.core.BundleField; import com.addthis.bundle.core.list.ListBundle; import com.addthis.bundle.table.DataTable; import com.addthis.bundle.table.DataTableFactory; import com.addthis.bundle.util.IndexField; import com.addthis.bundle.util.ValueUtil; import com.addthis.bundle.value.Numeric; import com.addthis.bundle.value.ValueArray; import com.addthis.bundle.value.ValueBytes; import com.addthis.bundle.value.ValueCustom; import com.addthis.bundle.value.ValueDouble; import com.addthis.bundle.value.ValueFactory; import com.addthis.bundle.value.ValueLong; import com.addthis.bundle.value.ValueMap; import com.addthis.bundle.value.ValueObject; import com.addthis.bundle.value.ValueString; import com.addthis.bundle.value.ValueTranslationException; import com.addthis.hydra.data.query.AbstractQueryOp; import io.netty.channel.ChannelProgressivePromise; /** * <p>This query operation <span class="hydra-summary">pivots a column by row and column keys</span>. * <p/> * <p>The syntax is pivot=[rowkeys],[colkeys],[valkey],[cellop],[rowop],[colop],[sort]. [rowkeys] is a sequence of * one or more row numbers delimited by colon characters. [colkeys] is a sequence of one or more * column numbers * delimited by colon characters. [colval] is a column number. [colop] and * [rowop] are one of "max", "min", or * "sum". [sort] is either "a" for ascending * or "d" for descending. * <p/> * <p>This operation transforms the entire table into a new output table. The values of the * output table are derived from the values that are in the [colval] column. The rows of the new * table are determined by [rowkeys] and the columns of the new table determined by [colkeys]. * If two or more rows and/or columns are specified then the resulting rows or columns will * be the <a href="http://en.wikipedia.org/wiki/Cartesian_product">cartesian product</a> of * the specified values.</p> * <p/> * <p>In the output table the contents of row <i>i</i> and column <i>j</i> is the result * of the [colop] operation performed on all values from the input column that reside * in row <i>i</i> and column <i>j</i> of the original table. The output table will have * an additional column appended at the end. The contents of the additional column at row * <i>i</i> is the result of the [rowop] operation performed on all values from the input * column that reside in row <i>i</i>.</p> * <p/> * <p>Example:</p> * <pre> * 0 A 3 * 1 A 1 * 1 B 2 * 0 A 5 * * pivot=1,0,2,min,sum,d * * 0 1 * A 3 1 9 * B 0 2 2 * </pre> * * @user-reference * @hydra-name pivot */ public class OpPivot extends AbstractQueryOp { public static final ValueLong ZERO = ValueFactory.create(0); private static enum PivotOp { SUM, MIN, MAX, AVG, } private static enum SortOp { LABEL_ASC, LABEL_DES, SUM_ASC, SUM_DES } private BundleField labelCol; private BundleField sumCol; private PivotOp cellop = PivotOp.SUM; private PivotOp rowop; private PivotOp colop; private SortOp sortop; private DataTable output; private final LinkedHashMap<String, BundleField> outCellField = new LinkedHashMap<>(); private final SortedMap<String, Bundle> pivot = new TreeMap<>(); private final DataTableFactory tableFactory; private final IndexField[] rowFields; private final IndexField[] colFields; private final IndexField cellField; private final ChannelProgressivePromise queryPromise; public OpPivot(DataTableFactory tableFactory, String args, ChannelProgressivePromise queryPromise) { super(queryPromise); this.tableFactory = tableFactory; this.output = tableFactory.createTable(0); this.queryPromise = queryPromise; String[] parg = LessStrings.splitArray(args, ","); rowFields = parseIndexFields(parg[0]); colFields = parseIndexFields(parg[1]); cellField = new IndexField(Integer.parseInt(parg[2])); if (parg.length >= 4) { Queue<PivotOp> pop = new ArrayDeque<>(parg.length - 3); for (int i = 3; i < parg.length; i++) { switch (parg[i]) { case "sum": pop.add(PivotOp.SUM); break; case "min": pop.add(PivotOp.MIN); break; case "max": pop.add(PivotOp.MAX); break; case "avg": pop.add(PivotOp.AVG); break; case "a": case "sa": sortop = SortOp.SUM_ASC; break; case "d": case "sd": sortop = SortOp.SUM_DES; break; case "la": sortop = SortOp.LABEL_ASC; break; case "ld": sortop = SortOp.LABEL_DES; break; case "": case "null": case "-": pop.add(null); break; } } cellop = pop.poll(); rowop = pop.poll(); colop = pop.poll(); } } private IndexField[] parseIndexFields(String s) { String[] input = s.split(":"); IndexField[] fields = new IndexField[input.length]; for (int i = 0; i < input.length; i++) { fields[i] = new IndexField(Integer.parseInt(input[i])); } return fields; } private Numeric doOp(PivotOp op, Numeric accum, Numeric cell) { if (accum == null || cell == null) { //System.out.println("accum = " + accum + " cell = " + cell); return accum != null ? accum : cell; } if (op != null) { switch (op) { case SUM: return accum.sum(cell); case MIN: return accum.min(cell); case MAX: return accum.max(cell); case AVG: if (accum.getClass() != PivotAvg.class) { accum = new PivotAvg(accum); } return accum.sum(cell); } } return accum; } @Override public void send(Bundle row) { if (labelCol == null) { labelCol = output.getFormat().getField("__row__"); } /** generate column key or create if missing */ String colkey = ""; for (IndexField field : colFields) { colkey = colkey.concat(field.getValue(row).toString()); } BundleField pivotCell = outCellField.get(colkey); if (pivotCell == null) { pivotCell = output.getFormat().getField(colkey); outCellField.put(colkey, pivotCell); } /** generate row key and fetch row */ String rowkey = ""; for (IndexField field : rowFields) { rowkey = rowkey.concat(field.getValue(row).toString()); } Bundle pivotrow = pivot.get(rowkey); /** fill new row or append nulls to a short row */ if (pivotrow == null) { pivotrow = output.createBundle(); pivotrow.setValue(labelCol, ValueFactory.create(rowkey)); pivot.put(rowkey, pivotrow); } /** fetch column cell from pivot and matching column cell from row */ ValueObject inputValue = cellField.getValue(row); ValueObject pivotValue = pivotrow.getValue(pivotCell); if (pivotValue == null) { pivotValue = inputValue; } else { pivotValue = doOp(cellop, OpGather.num(pivotValue), OpGather.num(inputValue)); } pivotrow.setValue(pivotCell, pivotValue); } @Override public void sendComplete() { if (rowop != null) { sumCol = output.getFormat().getField("__sum__"); } // create and send pivot header ListBundle header = (ListBundle) output.createBundle(); for (Entry<String, BundleField> e : outCellField.entrySet()) { header.setValue(e.getValue(), ValueFactory.create(e.getKey())); } // emit pivot rows Bundle footer = output.createBundle(); for (Entry<String, Bundle> ent : pivot.entrySet()) { Bundle row = ent.getValue(); // do rowop and/or colop if present if (rowop != null || colop != null) { Numeric rowaccum = null; Numeric colaccum; for (BundleField col : row.getFormat()) { if (col == labelCol || col == sumCol) { continue; } ValueObject cell = row.getValue(col); if (cell == null) { row.setValue(col, ZERO); continue; } if (colop != null) { colaccum = OpGather.num(footer.getValue(col)); if (colaccum == null) { footer.setValue(col, cell); } else { footer.setValue(col, doOp(colop, colaccum, OpGather.num(cell))); } } if (rowop != null) { if (rowaccum == null) { rowaccum = OpGather.num(cell); } else { rowaccum = doOp(rowop, rowaccum, OpGather.num(cell)); } } } if (rowaccum != null) { if (rowop == PivotOp.AVG && rowaccum.getClass() == PivotAvg.class) { /* see the code -- zero is ignored */ rowaccum = rowaccum.avg(0); } row.setValue(sumCol, rowaccum); } } output.append(row); } if (colop != null) { if (colop == PivotOp.AVG) { for (BundleField col : footer.getFormat()) { if (col == labelCol || col == sumCol) { continue; } /* see the code -- zero is ignored */ footer.setValue(col, OpGather.num(footer.getValue(col)).avg(0)); } } } // sort table if (sortop != null) { String sortstr = null; switch (sortop) { case LABEL_ASC: sortstr = labelCol.getName() + ":s:a"; break; case LABEL_DES: sortstr = labelCol.getName() + ":s:d"; break; case SUM_ASC: sortstr = sumCol.getName() + ":n:a"; break; case SUM_DES: sortstr = sumCol.getName() + ":n:d"; break; } OpSort sort = new OpSort(tableFactory, sortstr, queryPromise); sort.sendTable(output); output = sort.getTable(); } // prepend header if (header.getFormat().getFieldCount() > 0) { output.insert(0, header); } // append footer if (colop != null) { output.append(footer); } getNext().sendTable(output); } /** * for doing averages */ private static class PivotAvg implements Numeric { private Numeric orig; private int ops; PivotAvg(ValueObject orig) { this.orig = ValueUtil.asNumberOrParseLong(orig, 10); this.ops = 1; } @Override public String toString() { return orig.toString(); } @Override public Numeric avg(int count) { return orig.avg(ops); } @Override public PivotAvg diff(Numeric val) { ops++; orig = orig.diff(val); return this; } @Override public PivotAvg max(Numeric val) { ops++; orig = orig.max(val); return this; } @Override public PivotAvg min(Numeric val) { ops++; orig = orig.min(val); return this; } @Override public PivotAvg sum(Numeric val) { ops++; orig = orig.sum(val); return this; } @Override public PivotAvg prod(Numeric val) { ops++; orig = orig.prod(val); return this; } @Override public PivotAvg divide(Numeric val) { ops++; orig = orig.divide(val); return this; } @Override public ValueDouble asDouble() { return orig.asDouble(); } @Override public ValueLong asLong() { return orig.asLong(); } @Override public ValueString asString() { return orig.asString(); } @Override public TYPE getObjectType() { return orig.getObjectType(); } @Override public Numeric asNative() { return orig; } @Override public ValueBytes asBytes() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public ValueArray asArray() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public ValueMap asMap() throws ValueTranslationException { throw new ValueTranslationException(); } @Override public PivotAvg asNumeric() throws ValueTranslationException { return this; } @Override public ValueCustom asCustom() throws ValueTranslationException { throw new ValueTranslationException(); } } }