package edu.washington.escience.myria.operator;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import org.joda.time.DateTime;
import org.junit.Test;
import edu.washington.escience.myria.DbException;
import edu.washington.escience.myria.Schema;
import edu.washington.escience.myria.Type;
import edu.washington.escience.myria.operator.agg.Aggregate;
import edu.washington.escience.myria.operator.agg.PrimitiveAggregator.AggregationOp;
import edu.washington.escience.myria.operator.agg.PrimitiveAggregatorFactory;
import edu.washington.escience.myria.operator.agg.StreamingAggregate;
import edu.washington.escience.myria.storage.TupleBatch;
import edu.washington.escience.myria.storage.TupleBatchBuffer;
import edu.washington.escience.myria.storage.TupleUtils;
import edu.washington.escience.myria.util.TestEnvVars;
/**
* Test cases for {@link StreamingAggregate} class. Source tuples are generated in sorted order on group keys, if any.
* Some of the tests are taken from those for {@link SingleGroupByAggregate} and {@link Aggregate} since
* StreamingAggregate is expected to behave the same way they do if input is sorted.
*/
public class StreamingAggTest {
/**
* Construct a TupleBatchBuffer to be used as source of aggregate. Fixed schema and sorted on grouping columns.
*
* @param numTuples number of tuples to be added
* @return filled TupleBatchBuffer with each group key having (numTuples/10) tuples
*/
private TupleBatchBuffer fillInputTbb(final int numTuples) {
final Schema schema =
Schema.ofFields(
Type.INT_TYPE,
"Int",
Type.DOUBLE_TYPE,
"Double",
Type.FLOAT_TYPE,
"Float",
Type.LONG_TYPE,
"Long",
Type.DATETIME_TYPE,
"Datetime",
Type.STRING_TYPE,
"String",
Type.BOOLEAN_TYPE,
"Boolean",
Type.LONG_TYPE,
"value");
final TupleBatchBuffer source = new TupleBatchBuffer(schema);
for (int i = 0; i < numTuples; i++) {
int value = i / (numTuples / 10);
source.putInt(0, value);
source.putDouble(1, value);
source.putFloat(2, value);
source.putLong(3, value);
source.putDateTime(4, new DateTime(2010 + value, 1, 1, 0, 0));
source.putString(5, "" + value);
source.putBoolean(6, (i / (numTuples / 2) == 0));
source.putLong(7, 2L);
}
return source;
}
@Test
public void testSingleGroupKeySingleColumnCount() throws DbException {
final int numTuples = 50;
/* col0: Int, col1: Double, col2: Float, col3: Long, col4: Datetime, col5: String, col6: Boolean, (first 7 columns
* used for grouping) col7: Long (to agg over). */
TupleBatchBuffer source = fillInputTbb(numTuples);
// group by col0
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {0},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(5, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col1
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {1},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(5, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col2
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {2},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(5, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col3
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {3},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(5, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col4
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {4},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(5, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col5
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {5},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(5, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col6
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {6},
new PrimitiveAggregatorFactory(7, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(2, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(25, result.getLong(result.numColumns() - 1, i));
}
agg.close();
}
@Test
public void testSingleGroupKeySingleColumnSum() throws DbException {
final int numTuples = 50;
/* col0: Int, col1: Double, col2: Float, col3: Long, col4: Datetime, col5: String, col6: Boolean, (first 7 columns
* used for grouping) col7: Long (to agg over). */
TupleBatchBuffer source = fillInputTbb(numTuples);
// group by col0
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {0},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col1
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {1},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col2
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {2},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col3
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {3},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col4
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {4},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col5
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {5},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col6
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {6},
new PrimitiveAggregatorFactory(7, AggregationOp.SUM));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(2, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(50L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
}
@Test
public void testSingleGroupKeySingleColumnAvg() throws DbException {
final int numTuples = 50;
/* col0: Int, col1: Double, col2: Float, col3: Long, col4: Datetime, col5: String, col6: Boolean, (first 7 columns
* used for grouping) col7: Long (to agg over). */
TupleBatchBuffer source = fillInputTbb(numTuples);
// group by col0
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {0},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col1
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {1},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col2
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {2},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col3
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {3},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col4
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {4},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col5
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {5},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col6
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {6},
new PrimitiveAggregatorFactory(7, AggregationOp.AVG));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(2, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(2L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
}
@Test
public void testSingleGroupKeySingleColumnStdev() throws DbException {
final int numTuples = 50;
/* col0: Int, col1: Double, col2: Float, col3: Long, col4: Datetime, col5: String, col6: Boolean, (first 7 columns
* used for grouping) col7: Long (to agg over). */
TupleBatchBuffer source = fillInputTbb(numTuples);
// group by col0
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {0},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col1
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {1},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col2
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {2},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col3
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {3},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col4
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {4},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col5
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {5},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(numTuples / (numTuples / 10), result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col6
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {6},
new PrimitiveAggregatorFactory(7, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(2, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
}
@Test
public void testSingleGroupKeySingleColumnMin() throws DbException {
final int numTuples = 50;
/* col0: Int, col1: Double, col2: Float, col3: Long, col4: Datetime, col5: String, col6: Boolean, (first 7 columns
* used for grouping) col7: Long (to agg over) constant value of 2L. */
TupleBatchBuffer source = fillInputTbb(numTuples);
// group by col7, agg over col0
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(0, AggregationOp.MIN));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0, result.getInt(result.numColumns() - 1, i));
}
agg.close();
// group by col7, agg over col1
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(1, AggregationOp.MIN));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col7, agg over col2
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(2, AggregationOp.MIN));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0, result.getFloat(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col7, agg over col3
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(3, AggregationOp.MIN));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col4, agg over col4
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(4, AggregationOp.MIN));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(new DateTime(2010, 1, 1, 0, 0), result.getDateTime(result.numColumns() - 1, i));
}
agg.close();
// group by col7, agg over col5
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(5, AggregationOp.MIN));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals("0", result.getString(result.numColumns() - 1, i));
}
agg.close();
// Note: Min not applicable to Boolean type
}
@Test
public void testSingleGroupKeySingleColumnMax() throws DbException {
final int numTuples = 50;
/* col0: Int, col1: Double, col2: Float, col3: Long, col4: Datetime, col5: String, col6: Boolean, (first 7 columns
* used for grouping) col7: Long (to agg over) constant value of 2L. */
TupleBatchBuffer source = fillInputTbb(numTuples);
// group by col7, agg over col0
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(0, AggregationOp.MAX));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(9, result.getInt(result.numColumns() - 1, i));
}
agg.close();
// group by col7, agg over col1
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(1, AggregationOp.MAX));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(9, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col7, agg over col2
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(2, AggregationOp.MAX));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(9, result.getFloat(result.numColumns() - 1, i), 0.0001);
}
agg.close();
// group by col7, agg over col3
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(3, AggregationOp.MAX));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(9L, result.getLong(result.numColumns() - 1, i));
}
agg.close();
// group by col4, agg over col4
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(4, AggregationOp.MAX));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(new DateTime(2019, 1, 1, 0, 0), result.getDateTime(result.numColumns() - 1, i));
}
agg.close();
// group by col7, agg over col5
agg =
new StreamingAggregate(
new BatchTupleSource(source),
new int[] {7},
new PrimitiveAggregatorFactory(5, AggregationOp.MAX));
agg.open(TestEnvVars.get());
result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals("9", result.getString(result.numColumns() - 1, i));
}
agg.close();
// Note: Max not applicable to type Boolean
}
@Test
public void testMultiGroupSingleColumnCount() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// g0 same for all tuples, g1 split to 5 groups, g2 gets i
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i / (numTuples / 5));
tbb.putLong(2, i);
}
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(2, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(3, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10, result.getLong(result.numColumns() - 1, i));
}
agg.close();
}
@Test
public void testMultiGroupSingleColumnMin() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// g0 same for all tuples, g1 split to 5 groups, g2 gets i
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i / (numTuples / 5));
tbb.putLong(2, i);
}
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(2, AggregationOp.MIN));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(3, result.getSchema().numColumns());
assertEquals(0, result.getLong(result.numColumns() - 1, 0));
assertEquals(10, result.getLong(result.numColumns() - 1, 1));
assertEquals(20, result.getLong(result.numColumns() - 1, 2));
assertEquals(30, result.getLong(result.numColumns() - 1, 3));
assertEquals(40, result.getLong(result.numColumns() - 1, 4));
agg.close();
}
@Test
public void testMultiGroupSingleColumnMax() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// g0 same for all tuples, g1 split to 5 groups, g2 gets i
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i / (numTuples / 5));
tbb.putLong(2, i);
}
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(2, AggregationOp.MAX));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(3, result.getSchema().numColumns());
assertEquals(9, result.getLong(result.numColumns() - 1, 0));
assertEquals(19, result.getLong(result.numColumns() - 1, 1));
assertEquals(29, result.getLong(result.numColumns() - 1, 2));
assertEquals(39, result.getLong(result.numColumns() - 1, 3));
assertEquals(49, result.getLong(result.numColumns() - 1, 4));
agg.close();
}
@Test
public void testMultiGroupSingleColumnSum() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// g0 same for all tuples, g1 split to 5 groups, g2 gets 10
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i / (numTuples / 5));
tbb.putLong(2, 10L);
}
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(2, AggregationOp.SUM));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(3, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(100, result.getLong(result.numColumns() - 1, i));
}
agg.close();
}
@Test
public void testMultiGroupSingleColumnAvg() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// g0 same for all tuples, g1 split to 5 groups, g2 gets 10
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i / (numTuples / 5));
tbb.putLong(2, 10L);
}
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(2, AggregationOp.AVG));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(3, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(10, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
}
@Test
public void testMultiGroupSingleColumnStdev() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// g0 same for all tuples, g1 split to 5 groups, g2 gets 10
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i / (numTuples / 5));
tbb.putLong(2, 10L);
}
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(2, AggregationOp.STDEV));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(3, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
assertEquals(0, result.getDouble(result.numColumns() - 1, i), 0.0001);
}
agg.close();
}
@Test
public void testSingleGroupKeyMultiColumnAllAgg() throws DbException {
final int numTuples = 50;
final Schema schema = Schema.ofFields(Type.LONG_TYPE, "gkey", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// gkey split to 5 groups, value gets 10
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, i / (numTuples / 5));
tbb.putLong(1, 10L);
}
// group by gkey; min on gkey, max on gkey, count on value, sum on value, avg on value, stdev on value
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0},
new PrimitiveAggregatorFactory(
0, new AggregationOp[] {AggregationOp.MIN, AggregationOp.MAX}),
new PrimitiveAggregatorFactory(
1,
new AggregationOp[] {
AggregationOp.COUNT, AggregationOp.SUM, AggregationOp.AVG, AggregationOp.STDEV
}));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(5, result.numTuples());
assertEquals(7, result.getSchema().numColumns());
for (int i = 0; i < result.numTuples(); i++) {
// min
assertEquals(result.getLong(0, i), result.getLong(1, i));
// max
assertEquals(result.getLong(0, i), result.getLong(2, i));
// count
assertEquals(10, result.getLong(3, i));
// sum
assertEquals(100, result.getLong(4, i));
// avg
assertEquals(10, result.getDouble(5, i), 0.0001);
// stdev
assertEquals(0, result.getDouble(6, i), 0.0001);
}
agg.close();
}
@Test
public void testMultiGroupMultiColumn() throws DbException {
final int numTuples = 50;
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// {0, 2, i} on first half tuples, {0, 4, i} on the second half
int sumFirst = 0;
int sumSecond = 0;
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
if (i / (numTuples / 2) == 0) {
tbb.putLong(1, 2L);
sumFirst += i;
} else {
tbb.putLong(1, 4L);
sumSecond += i;
}
tbb.putLong(2, i);
}
/* Generate expected values for mean and stdev */
double meanFirst = (double) sumFirst / (numTuples / 2);
double meanSecond = (double) sumSecond / (numTuples / 2);
double diffSquaredFirst = 0.0;
double diffSquaredSecond = 0.0;
for (int i = 0; i < numTuples; ++i) {
if (i / (numTuples / 2) == 0) {
double diff = i - meanFirst;
diffSquaredFirst += diff * diff;
} else {
double diff = i - meanSecond;
diffSquaredSecond += diff * diff;
}
}
double expectedFirstStdev = Math.sqrt(diffSquaredFirst / (numTuples / 2));
double expectedSecondStdev = Math.sqrt(diffSquaredSecond / (numTuples / 2));
// group by col0 and col1, then min max count sum avg stdev
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(
2,
new AggregationOp[] {
AggregationOp.MIN,
AggregationOp.MAX,
AggregationOp.COUNT,
AggregationOp.SUM,
AggregationOp.AVG,
AggregationOp.STDEV
}));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(2, result.numTuples());
assertEquals(8, result.getSchema().numColumns());
// min
assertEquals(0, result.getLong(2, 0));
assertEquals(25, result.getLong(2, 1));
// max
assertEquals(24, result.getLong(3, 0));
assertEquals(49, result.getLong(3, 1));
// count
assertEquals(numTuples / 2, result.getLong(4, 0));
assertEquals(numTuples / 2, result.getLong(4, 1));
// sum
assertEquals(sumFirst, result.getLong(5, 0));
assertEquals(sumSecond, result.getLong(5, 1));
// avg
assertEquals(meanFirst, result.getDouble(6, 0), 0.0001);
assertEquals(meanSecond, result.getDouble(6, 1), 0.0001);
// stdev
assertEquals(expectedFirstStdev, result.getDouble(7, 0), 0.0001);
assertEquals(expectedSecondStdev, result.getDouble(7, 1), 0.0001);
agg.close();
}
@Test
public void testSingleGroupAllAggLargeInput() throws DbException {
final Schema schema = Schema.ofFields(Type.LONG_TYPE, "gkey", Type.LONG_TYPE, "value");
final int numTuples = 2 * TupleUtils.getBatchSize(schema);
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
// {0, i}
int sum = 0;
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
tbb.putLong(1, i);
sum += i;
}
/* Generate expected values for mean and stdev */
double mean = (double) sum / numTuples;
double diffSquared = 0.0;
for (int i = 0; i < numTuples; ++i) {
double diff = i - mean;
diffSquared += diff * diff;
}
double expectedStdev = Math.sqrt(diffSquared / numTuples);
// group by gkey, then min max count sum avg stdev
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0},
new PrimitiveAggregatorFactory(
1,
new AggregationOp[] {
AggregationOp.MIN,
AggregationOp.MAX,
AggregationOp.COUNT,
AggregationOp.SUM,
AggregationOp.AVG,
AggregationOp.STDEV
}));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(1, result.numTuples());
assertEquals(7, result.getSchema().numColumns());
// min
assertEquals(0, result.getLong(1, 0));
// max
assertEquals(19999, result.getLong(2, 0));
// count
assertEquals(numTuples, result.getLong(3, 0));
// sum
assertEquals(sum, result.getLong(4, 0));
// avg
assertEquals(mean, result.getDouble(5, 0), 0.0001);
// stdev
assertEquals(expectedStdev, result.getDouble(6, 0), 0.0001);
agg.close();
}
@Test
public void testMultiGroupAllAggLargeInput() throws DbException {
final Schema schema =
Schema.ofFields(Type.LONG_TYPE, "g0", Type.LONG_TYPE, "g1", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
final int numTuples = 3 * TupleUtils.getBatchSize(schema);
// split into 4 groups, each group may spread across different batches
// {0, 0, i} in first group, {0, 1, i} in second, {0, 2, i} in third, {0, 3, i} in fourth
int sumFirst = 0;
int sumSecond = 0;
int sumThird = 0;
int sumFourth = 0;
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, 0L);
if (i / (numTuples / 4) == 0) {
tbb.putLong(1, 0L);
sumFirst += i;
} else if (i / (numTuples / 4) == 1) {
tbb.putLong(1, 1L);
sumSecond += i;
} else if (i / (numTuples / 4) == 2) {
tbb.putLong(1, 2L);
sumThird += i;
} else {
tbb.putLong(1, 3L);
sumFourth += i;
}
tbb.putLong(2, i);
}
/* Generate expected values for mean and stdev */
double meanFirst = (double) sumFirst / (numTuples / 4);
double meanSecond = (double) sumSecond / (numTuples / 4);
double meanThird = (double) sumThird / (numTuples / 4);
double meanFourth = (double) sumFourth / (numTuples / 4);
double diffSquaredFirst = 0.0;
double diffSquaredSecond = 0.0;
double diffSquaredThird = 0.0;
double diffSquaredFourth = 0.0;
for (int i = 0; i < numTuples; ++i) {
if (i / (numTuples / 4) == 0) {
double diff = i - meanFirst;
diffSquaredFirst += diff * diff;
} else if (i / (numTuples / 4) == 1) {
double diff = i - meanSecond;
diffSquaredSecond += diff * diff;
} else if (i / (numTuples / 4) == 2) {
double diff = i - meanThird;
diffSquaredThird += diff * diff;
} else {
double diff = i - meanFourth;
diffSquaredFourth += diff * diff;
}
}
double expectedFirstStdev = Math.sqrt(diffSquaredFirst / (numTuples / 4));
double expectedSecondStdev = Math.sqrt(diffSquaredSecond / (numTuples / 4));
double expectedThirdStdev = Math.sqrt(diffSquaredThird / (numTuples / 4));
double expectedFourthStdev = Math.sqrt(diffSquaredFourth / (numTuples / 4));
// group by col0 and col1, then min max count sum avg stdev
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0, 1},
new PrimitiveAggregatorFactory(
2,
new AggregationOp[] {
AggregationOp.MIN,
AggregationOp.MAX,
AggregationOp.COUNT,
AggregationOp.SUM,
AggregationOp.AVG,
AggregationOp.STDEV
}));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(4, result.numTuples());
assertEquals(8, result.getSchema().numColumns());
// min
assertEquals(0, result.getLong(2, 0));
assertEquals((numTuples / 4), result.getLong(2, 1));
assertEquals(2 * (numTuples / 4), result.getLong(2, 2));
assertEquals(3 * (numTuples / 4), result.getLong(2, 3));
// max
assertEquals((numTuples / 4) - 1, result.getLong(3, 0));
assertEquals(2 * (numTuples / 4) - 1, result.getLong(3, 1));
assertEquals(3 * (numTuples / 4) - 1, result.getLong(3, 2));
assertEquals(numTuples - 1, result.getLong(3, 3));
// count
assertEquals(numTuples / 4, result.getLong(4, 0));
assertEquals(numTuples / 4, result.getLong(4, 1));
assertEquals(numTuples / 4, result.getLong(4, 2));
assertEquals(numTuples - 3 * (numTuples / 4), result.getLong(4, 3));
// sum
assertEquals(sumFirst, result.getLong(5, 0));
assertEquals(sumSecond, result.getLong(5, 1));
assertEquals(sumThird, result.getLong(5, 2));
assertEquals(sumFourth, result.getLong(5, 3));
// avg
assertEquals(meanFirst, result.getDouble(6, 0), 0.0001);
assertEquals(meanSecond, result.getDouble(6, 1), 0.0001);
assertEquals(meanThird, result.getDouble(6, 2), 0.0001);
assertEquals(meanFourth, result.getDouble(6, 3), 0.0001);
// stdev
assertEquals(expectedFirstStdev, result.getDouble(7, 0), 0.0001);
assertEquals(expectedSecondStdev, result.getDouble(7, 1), 0.0001);
assertEquals(expectedThirdStdev, result.getDouble(7, 2), 0.0001);
assertEquals(expectedFourthStdev, result.getDouble(7, 3), 0.0001);
agg.close();
}
@Test
public void testMultiBatchResult() throws DbException {
final Schema schema = Schema.ofFields(Type.LONG_TYPE, "gkey", Type.LONG_TYPE, "value");
final TupleBatchBuffer tbb = new TupleBatchBuffer(schema);
final int numTuples = 3 * TupleUtils.getBatchSize(schema) + 3;
final int batchSize = TupleUtils.getBatchSize(schema);
// gkey: 0, 1, 2, ..., numTuples-1; value: 1, 1, 1, ...
for (long i = 0; i < numTuples; i++) {
tbb.putLong(0, i);
tbb.putLong(1, 1L);
}
// group by col0, count
StreamingAggregate agg =
new StreamingAggregate(
new BatchTupleSource(tbb),
new int[] {0},
new PrimitiveAggregatorFactory(1, AggregationOp.COUNT));
agg.open(TestEnvVars.get());
TupleBatch result = agg.nextReady();
assertNotNull(result);
assertEquals(batchSize, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
// aggregator should return filled tuple batch, even if it hasn't finished processing all input
assertFalse(agg.getChild().eos());
// get second tuple batch
result = agg.nextReady();
assertEquals(batchSize, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
assertFalse(agg.getChild().eos());
// get third tuple batch
result = agg.nextReady();
assertEquals(batchSize, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
assertFalse(agg.getChild().eos());
// get last, non-filled tuple batch
result = agg.nextReady();
assertEquals(3, result.numTuples());
assertEquals(2, result.getSchema().numColumns());
// child reaches eos()
assertTrue(agg.getChild().eos());
// exhaust aggregator
result = agg.nextReady();
assertNull(result);
assertTrue(agg.eos());
agg.close();
}
}