/* This file is part of VoltDB.
* Copyright (C) 2008-2017 VoltDB Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
package org.voltdb.planner;
import static org.voltdb.types.ExpressionType.AGGREGATE_APPROX_COUNT_DISTINCT;
import static org.voltdb.types.ExpressionType.AGGREGATE_AVG;
import static org.voltdb.types.ExpressionType.AGGREGATE_COUNT;
import static org.voltdb.types.ExpressionType.AGGREGATE_HYPERLOGLOGS_TO_CARD;
import static org.voltdb.types.ExpressionType.AGGREGATE_MAX;
import static org.voltdb.types.ExpressionType.AGGREGATE_MIN;
import static org.voltdb.types.ExpressionType.AGGREGATE_SUM;
import static org.voltdb.types.ExpressionType.AGGREGATE_VALS_TO_HYPERLOGLOG;
import java.util.List;
import org.voltdb.plannodes.AbstractPlanNode;
import org.voltdb.plannodes.AggregatePlanNode;
import org.voltdb.types.ExpressionType;
/**
* Mostly here we're concerned that an APPROX_COUNT_DISTINCT aggregate function is
* handled correctly in both single- and multi-partition contexts. In a single partition context,
* we expect APPROX_COUNT_DISTINCT to be appear in the plan. This is the simplest case.
*
* For multi-part plans, there are two possibilities:
* - APPROX_COUNT_DISTINCT is accompanied by other aggregates that cannot be pushed down
* (e.g., count(distinct col)), in this case, we must ship all the rows to the coordinator,
* so we expect to just evaluate APPROX_COUNT_DISTINCT on the coordinator.
* - APPROX_COUNT_DISTINCT appears as the only aggregate on the select list, or all the other
* aggregates can be pushed down. In this case, we "split" the aggregate function to two:
* - ROWS_TO_HYPERLOGLOG, which produces a hyperloglog for each partition
* - One coordinator, HYPERLOGLOGS_TO_CARD which produces the estimate (as a double)
* @author cwolff
*
*/
public class TestPlansApproxCountDistinct extends PlannerTestCase {
private static final int COORDINATOR_FRAG = 0;
private static final int PARTITION_FRAG = 1;
@Override
protected void setUp() throws Exception {
setupSchema(getClass().getResource("testplans-count-ddl.sql"),
"testcount", false);
}
@Override
protected void tearDown() throws Exception {
super.tearDown();
}
private void assertAggPlanNodeContainsFunctions(AggregatePlanNode node, ExpressionType[] expectedAggFns) {
List<ExpressionType> actualAggFns = node.getAggregateTypes();
assertEquals("Wrong number of aggregate functions in plan", expectedAggFns.length, actualAggFns.size());
int i = 0;
for (ExpressionType expectedAggFn : expectedAggFns) {
assertEquals("Found unexpected agg function", expectedAggFn, actualAggFns.get(i));
++i;
}
}
private void assertFragContainsAggWithFunctions(AbstractPlanNode frag, ExpressionType... expectedAggFns) {
List<AbstractPlanNode> aggNodes = findAllAggPlanNodes(frag);
assertFalse("No aggregation node in fragment!", 0 == aggNodes.size());
assertEquals("More than one aggregation node in fragment!", 1, aggNodes.size());
AggregatePlanNode aggNode = (AggregatePlanNode)aggNodes.get(0);
assertAggPlanNodeContainsFunctions(aggNode, expectedAggFns);
}
private void assertFragContainsTwoAggsWithFunctions(AbstractPlanNode frag,
ExpressionType[] expectedAggFnsFirst,
ExpressionType[] expectedAggFnsSecond) {
List<AbstractPlanNode> aggNodes = findAllAggPlanNodes(frag);
assertEquals("Wrong number of aggregation nodes in fragment!", 2, aggNodes.size());
assertAggPlanNodeContainsFunctions((AggregatePlanNode)aggNodes.get(0), expectedAggFnsFirst);
assertAggPlanNodeContainsFunctions((AggregatePlanNode)aggNodes.get(1), expectedAggFnsSecond);
}
private void assertFragContainsNoAggPlanNodes(AbstractPlanNode node) {
List<AbstractPlanNode> aggNodes = findAllAggPlanNodes(node);
assertEquals("Found an aggregation node in fragment, but didn't expect to!", 0, aggNodes.size());
}
public void testSinglePartitionTableAgg() throws Exception {
List<AbstractPlanNode> pn = compileToFragments("SELECT approx_count_distinct(age) from T1");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG), AGGREGATE_APPROX_COUNT_DISTINCT);
pn = compileToFragments("select approx_count_distinct(age), sum(points) from t1");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT,
AGGREGATE_SUM);
pn = compileToFragments("select approx_count_distinct(age), sum(distinct points) from t1");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT,
AGGREGATE_SUM);
}
public void testSinglePartitionWithGroupBy() throws Exception {
List<AbstractPlanNode> pn = compileToFragments(
"SELECT id, approx_count_distinct(age) "
+ "from T1 "
+ "group by id");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG), AGGREGATE_APPROX_COUNT_DISTINCT);
pn = compileToFragments(
"select age, approx_count_distinct(points), max(username) "
+ "from t2 "
+ "group by age");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT,
AGGREGATE_MAX);
pn = compileToFragments(
"select username, approx_count_distinct(age), avg(distinct points) "
+ "from t2 "
+ "group by username");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT,
AGGREGATE_AVG);
}
public void testMultiPartitionTableAgg() throws Exception {
List<AbstractPlanNode> pn = compileToFragments("SELECT approx_count_distinct(num) from P1");
assertEquals(2, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG), AGGREGATE_HYPERLOGLOGS_TO_CARD);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG), AGGREGATE_VALS_TO_HYPERLOGLOG);
// Two push-down-able aggs.
pn = compileToFragments("SELECT approx_count_distinct(num), count(ratio) from P1");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_COUNT);
// Three push-down-able aggs.
pn = compileToFragments("SELECT approx_count_distinct(num), min(desc), max(ratio) from P1");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_MIN, AGGREGATE_MAX);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_MIN, AGGREGATE_MAX);
// With an agg that can be pushed down, but only because its argument is a partition key.
pn = compileToFragments("SELECT approx_count_distinct(num), count(distinct id) from P1");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_COUNT);
// With an agg that can be pushed down, but only because its argument is a partition key.
// Also, with approx count distinct with partition key as argument.
pn = compileToFragments("SELECT approx_count_distinct(id), count(distinct id) from P1");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_COUNT);
// With an agg that cannot be pushed down,
pn = compileToFragments("SELECT sum(distinct ratio), approx_count_distinct(num) from P1");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_SUM,
AGGREGATE_APPROX_COUNT_DISTINCT);
assertFragContainsNoAggPlanNodes(pn.get(PARTITION_FRAG));
}
public void testMultiPartitionWithGroupBy() throws Exception {
List<AbstractPlanNode> pn = compileToFragments(
"SELECT desc as modid, approx_count_distinct(num) "
+ "from P1 "
+ "group by desc");
assertEquals(2, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG), AGGREGATE_HYPERLOGLOGS_TO_CARD);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG), AGGREGATE_VALS_TO_HYPERLOGLOG);
// Two push-down-able aggs.
pn = compileToFragments("SELECT desc, approx_count_distinct(num), count(ratio) "
+ "from P1 "
+ "group by desc");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_COUNT);
// A case similar to above.
pn = compileToFragments(
"SELECT desc, approx_count_distinct(num), max(ratio) "
+ "from P1 "
+ "group by desc");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_MAX);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_MAX);
// With an agg that can be pushed down, but only because its argument is a partition key.
pn = compileToFragments(
"SELECT ratio, approx_count_distinct(num), count(distinct id) "
+ "from P1 "
+ "group by ratio");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_COUNT);
// With an agg that can be pushed down, but only because its argument is a partition key.
// Also, with approx count distinct with partition key as argument.
pn = compileToFragments(
"SELECT desc, approx_count_distinct(id), count(distinct id) "
+ "from P1 "
+ "group by desc");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG,
AGGREGATE_COUNT);
// With partition key as group by key.
// In this case, all aggregation can be done on partitions,
// coordinator just concatenates result
pn = compileToFragments(
"SELECT id, sum(distinct ratio), approx_count_distinct(num) "
+ "from P1 "
+ "group by id");
assertFragContainsNoAggPlanNodes(pn.get(COORDINATOR_FRAG));
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_SUM,
AGGREGATE_APPROX_COUNT_DISTINCT);
}
public void testWithSubqueries() throws Exception {
// Single-partition statement with a subquery (table agg)
List<AbstractPlanNode> pn = compileToFragments(
"select * "
+ "from "
+ " T1, "
+ " (select approx_count_distinct(age) from t1) as subq");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT);
// Single-partition statement with a subquery (with group by)
pn = compileToFragments(
"select * "
+ "from "
+ " (select username, approx_count_distinct(age), avg(distinct points) "
+ " from t2 "
+ " group by username) as subq"
+ " inner join t2 "
+ " on t2.username = subq.username;");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT,
AGGREGATE_AVG);
// multi-partition table agg
pn = compileToFragments(
"select * "
+ "from "
+ "t1, "
+ "(SELECT sum(distinct ratio), approx_count_distinct(num) from P1) as subq");
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_SUM,
AGGREGATE_APPROX_COUNT_DISTINCT);
assertFragContainsNoAggPlanNodes(pn.get(PARTITION_FRAG));
// single-part plan on partitioned tables, with GB in subquery
pn = compileToFragments(
"select * "
+ "from p1 "
+ "inner join "
+ "(SELECT id, sum(distinct ratio), approx_count_distinct(num) "
+ "from P1 "
+ "where id = 10 "
+ "group by id) as subq "
+ "on subq.id = p1.id");
assertEquals(1, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_SUM,
AGGREGATE_APPROX_COUNT_DISTINCT);
// multi-part plan on partitioned tables, with GB in subquery
pn = compileToFragments(
"select * "
+ "from t1 "
+ "inner join "
+ "(SELECT id, approx_count_distinct(num) "
+ "from P1 "
+ "group by id) as subq "
+ "on subq.id = t1.id");
for (AbstractPlanNode n : pn) {
System.out.println(n.toExplainPlanString());
}
assertEquals(2, pn.size());
assertFragContainsNoAggPlanNodes(pn.get(COORDINATOR_FRAG));
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT);
}
public void testSubqueriesWithMultipleAggs() throws Exception {
List<AbstractPlanNode> pn;
// In this query, one agg plan node is distributed across fragments (p1),
// but the other is not (t1).
pn = compileToFragments("select approx_count_distinct(num) "
+ "from (select approx_count_distinct(points) from t1) as repl_subquery,"
+ " p1");
assertEquals(2, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_HYPERLOGLOGS_TO_CARD);
assertFragContainsTwoAggsWithFunctions(pn.get(PARTITION_FRAG),
new ExpressionType[] {AGGREGATE_APPROX_COUNT_DISTINCT},
new ExpressionType[] {AGGREGATE_VALS_TO_HYPERLOGLOG});
// Like above but with some more aggregate functions
// (which breaks the push-down-ability of distributed agg)
pn = compileToFragments("select approx_count_distinct(num), sum(distinct num) "
+ "from (select approx_count_distinct(points) from t1) as repl_subquery,"
+ " p1");
assertEquals(2, pn.size());
assertFragContainsAggWithFunctions(pn.get(COORDINATOR_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT,
AGGREGATE_SUM);
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_APPROX_COUNT_DISTINCT);
// As above but partitioned and replicated tables are swapped.
pn = compileToFragments("select approx_count_distinct(points) "
+ "from (select approx_count_distinct(num) from p1) as repl_subquery,"
+ " t1");
assertEquals(2, pn.size());
assertFragContainsTwoAggsWithFunctions(pn.get(COORDINATOR_FRAG),
new ExpressionType[] {AGGREGATE_HYPERLOGLOGS_TO_CARD},
new ExpressionType[] {AGGREGATE_APPROX_COUNT_DISTINCT});
assertFragContainsAggWithFunctions(pn.get(PARTITION_FRAG),
AGGREGATE_VALS_TO_HYPERLOGLOG);
// Like above but with some more aggregate functions
// (which breaks the push-down-ability of distributed agg)
pn = compileToFragments("select approx_count_distinct(points) "
+ "from (select approx_count_distinct(num), sum(distinct num) from p1) as repl_subquery,"
+ " t1");
assertEquals(2, pn.size());
assertFragContainsTwoAggsWithFunctions(pn.get(COORDINATOR_FRAG),
new ExpressionType[] {AGGREGATE_APPROX_COUNT_DISTINCT, AGGREGATE_SUM},
new ExpressionType[] {AGGREGATE_APPROX_COUNT_DISTINCT});
assertFragContainsNoAggPlanNodes(pn.get(PARTITION_FRAG));
}
}