StreamingAggregate.java example

Explorer
myria-master
package edu.washington.escience.myria.operator.agg;

import java.io.IOException;
import java.util.List;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.column.Column;
import edu.washington.escience.myria.operator.Operator;
import edu.washington.escience.myria.storage.TupleBatch;

/**
 * This aggregate operator computes the aggregation in streaming manner (requires input sorted on grouping column(s)).
 * Intended to substitute for Aggregate when input is known to be sorted.
 *
 * @see Aggregate
 */
public class StreamingAggregate extends Aggregate {
  /** Required for Java serialization. */
  private static final long serialVersionUID = 1L;

  /**
   * Groups the input tuples according to the specified grouping fields, then produces the specified aggregates.
   *
   * @param child The Operator that is feeding us tuples.
   * @param gfields The columns over which we are grouping the result.
   * @param factories The factories that will produce the {@link Aggregator}s for each group.
   */
  public StreamingAggregate(
      @Nullable final Operator child,
      @Nonnull final int[] gfields,
      @Nonnull final AggregatorFactory... factories) {
    super(child, gfields, factories);
  }

  /**
   * Returns the next tuple batch containing the result of this aggregate. Grouping field(s) followed by aggregate
   * field(s).
   *
   * @throws DbException if any error occurs.
   * @return result tuple batch
   * @throws IOException
   */
  @Override
  protected TupleBatch fetchNextReady() throws DbException {
    final Operator child = getChild();
    TupleBatch tb = child.nextReady();
    while (tb != null) {
      for (int row = 0; row < tb.numTuples(); ++row) {
        int index = groupStates.getIndex(tb, gfields, row);
        if (index == -1) {
          /* A new group is encountered. Since input tuples are sorted on the grouping key, the previous group must be
           * finished so we can add its state to the result. */
          generateResult();
          groupStates.addTuple(tb, gfields, row, true);
          int offset = gfields.length;
          for (Aggregator agg : internalAggs) {
            agg.initState(groupStates.getData(), offset);
            offset += agg.getStateSize();
          }
          index = groupStates.numTuples() - 1;
        }
        int offset = gfields.length;
        for (Aggregator agg : internalAggs) {
          agg.addRow(tb, row, groupStates.getData(), index, offset);
          offset += agg.getStateSize();
        }
      }
      if (resultBuffer.hasFilledTB()) {
        return resultBuffer.popFilled();
      }
      tb = child.nextReady();
    }
    if (child.eos()) {
      generateResult();
      return resultBuffer.popAny();
    }
    return null;
  }

  @Override
  protected void addToResult(List<Column<?>> columns) {
    resultBuffer.absorb(new TupleBatch(getSchema(), columns), false);
  }
}