/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ package org.voltdb.planner; import java.util.List; import org.voltdb.plannodes.AbstractPlanNode; import org.voltdb.plannodes.AggregatePlanNode; import org.voltdb.plannodes.HashAggregatePlanNode; import org.voltdb.plannodes.LimitPlanNode; import org.voltdb.plannodes.MergeReceivePlanNode; import org.voltdb.plannodes.OrderByPlanNode; import org.voltdb.plannodes.ProjectionPlanNode; import org.voltdb.plannodes.ReceivePlanNode; import org.voltdb.plannodes.SendPlanNode; import org.voltdb.types.PlanNodeType; public class TestPlansDistinct extends PlannerTestCase { @Override protected void setUp() throws Exception { setupSchema(TestPlansDistinct.class.getResource("testplans-groupby-ddl.sql"), "testplansgroupby", false); } @Override protected void tearDown() throws Exception { super.tearDown(); } public void testColumnsWithoutGroupby() { String sql1, sql2; // single column DISTINCT // A3 is partition key sql1 = "SELECT distinct A3 from T3"; sql2 = "SELECT A3 from T3 group by A3"; checkQueriesPlansAreTheSame(sql1, sql2); // B3 is not partition key sql1 = "SELECT distinct B3 from T3"; sql2 = "SELECT B3 from T3 group by B3"; checkQueriesPlansAreTheSame(sql1, sql2); // // Multiple columns DISTINCT // sql1 = "SELECT distinct A3, B3 from T3"; sql2 = "SELECT A3, B3 from T3 group by A3, B3"; checkQueriesPlansAreTheSame(sql1, sql2); sql1 = "SELECT distinct A3, B3 from T3"; sql2 = "SELECT A3, B3 from T3 group by A3, B3"; checkQueriesPlansAreTheSame(sql1, sql2); sql2 = "SELECT A3, B3 from T3 group by B3, A3"; checkQueriesPlansAreTheSame(sql1, sql2); sql2 = "SELECT B3, A3 from T3 group by B3, A3"; checkQueriesPlansAreTheSame(sql1, sql2); sql2 = "SELECT B3, A3 from T3 group by A3, B3"; checkQueriesPlansAreTheSame(sql1, sql2); // LIMIT/OFFSET sql1 = "SELECT distinct A3, B3 from T3 LIMIT 10"; sql2 = "SELECT B3, A3 from T3 group by A3, B3 LIMIT 10"; checkQueriesPlansAreTheSame(sql1, sql2); // Distinct * sql1 = "SELECT distinct * from T3"; sql2 = "SELECT pkey, A3, B3, C3, D3 from T3 group by pkey, A3, B3, C3, D3"; checkQueriesPlansAreTheSame(sql1, sql2); // Distinct on table aggregation // single table aggregate select sql1 = "SELECT distinct SUM(A3) from T3"; sql2 = "SELECT SUM(A3) from T3"; checkQueriesPlansAreTheSame(sql1, sql2); // multiple table aggregate select sql1 = "SELECT distinct count(*), SUM(A3) from T3"; sql2 = "SELECT count(*), SUM(A3) from T3"; checkQueriesPlansAreTheSame(sql1, sql2); // table aggregate with HAVING sql1 = "select Distinct min(A3), max(A3) from T3 having min(A3) > 0"; sql2 = "select min(A3), max(A3) from T3 having min(A3) > 0"; checkQueriesPlansAreTheSame(sql1, sql2); } public void testExpressionsWithoutGroupby() { String sql1, sql2; // distinct with expression sql1 = "SELECT distinct A3, floor(B3) from T3"; sql2 = "SELECT A3, floor(B3) from T3 group by A3, floor(B3)"; checkQueriesPlansAreTheSame(sql1, sql2); // distinct with expression on partition column sql1 = "SELECT distinct floor(A3), floor(B3) from T3"; sql2 = "SELECT floor(A3), floor(B3) from T3 group by floor(A3), floor(B3)"; checkQueriesPlansAreTheSame(sql1, sql2); // expression with parameters sql1 = "SELECT distinct A3, floor(B3+1) from T3"; sql2 = "SELECT A3, floor(B3+1) from T3 group by A3, floor(B3+1)"; checkQueriesPlansAreTheSame(sql1, sql2); } public void testMatViewsWithoutGroupby() { String sql1, sql2; // View: V_P1_NO_FIX_NEEDED sql1 = "SELECT DISTINCT V_A1, V_SUM_C1 FROM V_P1_NO_FIX_NEEDED"; sql2 = "SELECT V_A1, V_SUM_C1 FROM V_P1_NO_FIX_NEEDED GROUP BY V_A1, V_SUM_C1"; checkQueriesPlansAreTheSame(sql1, sql2); sql1 = "SELECT DISTINCT ABS(V_A1), V_SUM_C1 FROM V_P1_NO_FIX_NEEDED"; sql2 = "SELECT ABS(V_A1), V_SUM_C1 FROM V_P1_NO_FIX_NEEDED GROUP BY ABS(V_A1), V_SUM_C1"; checkQueriesPlansAreTheSame(sql1, sql2); sql1 = "SELECT distinct A3, floor(B3+1) from T3"; sql2 = "SELECT A3, floor(B3+1) from T3 group by A3, floor(B3+1)"; checkQueriesPlansAreTheSame(sql1, sql2); // Partition view tables without partition key String[] tbs = {"V_P1", "V_P1_ABS"}; for (String tb: tbs) { // distinct single group by column sql1 = "SELECT distinct V_A1 FROM " + tb; sql2 = "SELECT V_A1 FROM " + tb + " GROUP BY V_A1"; checkQueriesPlansAreTheSame(sql1, sql2); // distinct single aggregated column sql1 = "SELECT distinct V_SUM_C1 FROM " + tb; sql2 = "SELECT V_SUM_C1 FROM " + tb + " GROUP BY V_SUM_C1"; checkQueriesPlansAreTheSame(sql1, sql2); // adding order by sql1 = "SELECT distinct V_SUM_C1 FROM " + tb + " ORDER BY V_SUM_C1"; sql2 = "SELECT V_SUM_C1 FROM " + tb + " GROUP BY V_SUM_C1 ORDER BY V_SUM_C1"; checkQueriesPlansAreTheSame(sql1, sql2); // multiple columns sql1 = "SELECT distinct V_A1, V_SUM_C1 FROM " + tb; sql2 = "SELECT V_A1, V_SUM_C1 FROM " + tb + " GROUP BY V_A1, V_SUM_C1"; checkQueriesPlansAreTheSame(sql1, sql2); // multiple aggregated columns in the view sql1 = "SELECT distinct V_CNT, V_SUM_C1 FROM " + tb; sql2 = "SELECT V_CNT, V_SUM_C1 FROM " + tb + " GROUP BY V_CNT, V_SUM_C1"; checkQueriesPlansAreTheSame(sql1, sql2); // expressions sql1 = "SELECT distinct V_A1, V_SUM_C1 / 10 FROM " + tb; sql2 = "SELECT V_A1, V_SUM_C1 / 10 FROM " + tb + " GROUP BY V_A1, V_SUM_C1 / 10"; checkQueriesPlansAreTheSame(sql1, sql2); } } public void testNegative() { String sql; // Having sql = "SELECT distinct A3, B3 from T3 HAVING COUNT(*) > 3"; failToCompile(sql, "expression not in aggregate or GROUP BY columns: PUBLIC.T3.A3"); sql = "SELECT distinct A3, B3, C3 from T3 group by A3, B3"; failToCompile(sql, "expression not in aggregate or GROUP BY columns: PUBLIC.T3.C3"); sql = "SELECT distinct A3 from T3 group by A3, B3, C3"; compileToFragments(sql); // make sure the DISTINCT with GROUP BY is still working // invalid ORDER BY expression String errorMsg = "invalid ORDER BY expression"; // (1) without GROUP BY sql = "SELECT distinct B3 from T3 order by A3"; failToCompile(sql, errorMsg); sql = "SELECT distinct A3, B3 from T3 order by C3"; failToCompile(sql, errorMsg); // (2) with GROUP BY sql = "SELECT distinct A3 from T3 group by A3, B3, C3 ORDER BY B3"; failToCompile(sql, errorMsg); // // (3) with GROUP BY Primary key, which is the very edge case // // P1 primary key (PKEY) sql = "select PKEY, max(B1) FROM P1 group by PKEY order by C1"; failToCompile(sql, errorMsg); sql = "select max(B1) FROM P1 group by PKEY order by C1"; failToCompile(sql, errorMsg); // When including C1 in the display columns, it will be OK sql = "select DISTINCT C1, max(B1) FROM P1 group by PKEY order by C1"; compileToFragments(sql); sql = "select DISTINCT C1, max(B1) FROM P1 group by PKEY order by ABS(C1)"; compileToFragments(sql); // T3 primary key (PKEY, A3) sql = "select DISTINCT PKEY, max(B3) FROM T3 group by PKEY, A3 order by C3"; failToCompile(sql, errorMsg); sql = "select distinct max(B3) FROM T3 group by PKEY, A3 order by C3"; failToCompile(sql, errorMsg); sql = "select distinct C3, max(B3) FROM T3 group by PKEY, A3 order by C3"; compileToFragments(sql); // test ORDER BY GROUP BY key without in display list sql = "select distinct max(B3) FROM T3 group by PKEY, A3 order by A3"; failToCompile(sql, errorMsg); } public void testColumnsWithGroupby() { String sql1, sql2; // Group by with multiple columns distinct // PKEY, A3 is the primary key or contains the unique key. // A3 is the Partition key sql1 = "SELECT distinct B3, C3 from T3 group by PKEY, A3"; sql2 = "SELECT B3, C3 from T3 group by PKEY, A3"; checkDistinctWithGroupbyPlans(sql1, sql2); // single column distinct sql1 = "SELECT distinct SUM(C3) from T3 group by A3, B3"; sql2 = "SELECT SUM(C3) from T3 group by A3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); sql1 = "SELECT distinct SUM(C3) from T3 group by D3, B3"; sql2 = "SELECT SUM(C3) from T3 group by D3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); // multiple columns distinct sql1 = "SELECT distinct B3, SUM(C3), COUNT(*) from T3 group by A3, B3"; sql2 = "SELECT B3, SUM(C3), COUNT(*) from T3 group by A3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); sql1 = "SELECT distinct B3, SUM(C3), COUNT(*) from T3 group by D3, B3"; sql2 = "SELECT B3, SUM(C3), COUNT(*) from T3 group by D3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); // variance on select list and group by list sql1 = "SELECT distinct A3, sum(C3) from T3 group by A3, B3"; sql2 = "SELECT A3, sum(C3) from T3 group by A3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); sql1 = "SELECT distinct D3, sum(C3) from T3 group by D3, B3"; sql2 = "SELECT D3, sum(C3) from T3 group by D3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); // group by 3 columns sql1 = "SELECT distinct A3, B3 from T3 group by A3, B3, C3"; sql2 = "SELECT A3, B3 from T3 group by A3, B3, C3"; checkDistinctWithGroupbyPlans(sql1, sql2); // order by sql1 = "SELECT distinct A3, B3 from T3 group by A3, B3, C3 ORDER BY A3, B3"; sql2 = "SELECT A3, B3 from T3 group by A3, B3, C3 ORDER BY A3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); // order by normal case sql1 = "SELECT distinct D3, B3 from T3 group by D3, B3, C3 ORDER BY D3, B3"; sql2 = "SELECT D3, B3 from T3 group by D3, B3, C3 ORDER BY D3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); // Having sql1 = "SELECT distinct B3, SUM(C3), COUNT(*) from T3 group by A3, B3 Having SUM(C3) > 3"; sql2 = "SELECT B3, SUM(C3), COUNT(*) from T3 group by A3, B3 Having SUM(C3) > 3"; checkDistinctWithGroupbyPlans(sql1, sql2); // LIMIT/OFFSET is tricky, // a lot of PUSH DOWN can happen: ORDER BY/LIMIT, DISTINCT // A3 is the Partition column for table T3 // LIMIT can be pushed down with order by plan node for this case sql1 = "SELECT distinct A3, COUNT(*) from T3 group by A3, B3 ORDER BY A3 LIMIT 3"; sql2 = "SELECT A3, COUNT(*) from T3 group by A3, B3 ORDER BY A3 LIMIT 3"; checkDistinctWithGroupbyPlans(sql1, sql2, true); sql1 = "SELECT distinct B3, COUNT(*) from T3 group by A3, B3 ORDER BY B3 LIMIT 3"; sql2 = "SELECT B3, COUNT(*) from T3 group by A3, B3 ORDER BY B3 LIMIT 3"; checkDistinctWithGroupbyPlans(sql1, sql2, true); } public void testExpressionsWithGroupby() { String sql1, sql2; // distinct on expression sql1 = "SELECT distinct sum(C3)/count(C3) from T3 group by A3, B3"; sql2 = "SELECT sum(C3)/count(C3) from T3 group by A3, B3"; checkDistinctWithGroupbyPlans(sql1, sql2); } protected void checkDistinctWithGroupbyPlans(String distinctSQL, String groupbySQL) { checkDistinctWithGroupbyPlans(distinctSQL, groupbySQL, false); } /** * * @param distinctSQL Group by query with distinct * @param groupbySQL Group by query without distinct */ protected void checkDistinctWithGroupbyPlans(String distinctSQL, String groupbySQL, boolean limitPushdown) { List<AbstractPlanNode> pns1 = compileToFragments(distinctSQL); List<AbstractPlanNode> pns2 = compileToFragments(groupbySQL); //printExplainPlan(pns1); //printExplainPlan(pns2); assertTrue(pns1.get(0) instanceof SendPlanNode); assertTrue(pns2.get(0) instanceof SendPlanNode); AbstractPlanNode apn1, apn2; apn1 = pns1.get(0).getChild(0); apn2 = pns2.get(0).getChild(0); boolean hasTopProjection1 = false; if (apn1 instanceof ProjectionPlanNode) { apn1 = apn1.getChild(0); hasTopProjection1 = true; } boolean hasTopProjection2 = false; if (apn2 instanceof ProjectionPlanNode) { apn2 = apn2.getChild(0); hasTopProjection2 = true; } // DISTINCT plan node is rewrote with GROUP BY and adds above the original GROUP BY node // there may be another projection node in between for complex aggregation case boolean hasOrderby = false, hasLimit = false; boolean groupByMergeReceive = false; // infer the ORDERBY/LIMIT information from the base line query if (apn2 instanceof OrderByPlanNode) { hasOrderby = true; if (apn2.getInlinePlanNode(PlanNodeType.LIMIT) != null) { hasLimit = true; } apn2 = apn2.getChild(0); } else if (apn2 instanceof LimitPlanNode) { hasLimit = true; apn2 = apn2.getChild(0); } else if (apn2 instanceof MergeReceivePlanNode) { assertTrue(apn2.getInlinePlanNode(PlanNodeType.ORDERBY) != null); hasOrderby = true; hasLimit = apn2.getInlinePlanNode(PlanNodeType.LIMIT) != null; groupByMergeReceive = true; } // check the DISTINCT query plan boolean distinctMergeReceive = false; if (hasOrderby) { if (apn1 instanceof OrderByPlanNode) { assertTrue(apn1 instanceof OrderByPlanNode); if (hasLimit) { // check inline limit assertNotNull(apn1.getInlinePlanNode(PlanNodeType.LIMIT)); } apn1 = apn1.getChild(0); } else if (apn1 instanceof MergeReceivePlanNode) { distinctMergeReceive = true; assertNotNull(apn1.getInlinePlanNode(PlanNodeType.ORDERBY)); assertEquals(0, apn1.getChildCount()); } else { fail("The distinctSQL top node is not OrderBy or MergeReceive."); } } else if (hasLimit) { assertTrue(apn1 instanceof LimitPlanNode); apn1 = apn1.getChild(0); } // Check DISTINCT group by plan node if (distinctMergeReceive) { AbstractPlanNode aggr = AggregatePlanNode.getInlineAggregationNode(apn1); assertTrue(aggr instanceof AggregatePlanNode); assertEquals(0, ((AggregatePlanNode)aggr).getAggregateTypesSize()); assertEquals(pns1.get(0).getOutputSchema().getColumns().size(), ((AggregatePlanNode)aggr).getGroupByExpressionsSize()); if (hasLimit) { // check inline limit assertNotNull(aggr.getInlinePlanNode(PlanNodeType.LIMIT)); } } else { assertTrue(apn1 instanceof HashAggregatePlanNode); assertEquals(0, ((HashAggregatePlanNode)apn1).getAggregateTypesSize()); assertEquals(pns1.get(0).getOutputSchema().getColumns().size(), ((HashAggregatePlanNode)apn1).getGroupByExpressionsSize()); apn1 = apn1.getChild(0); } // check projection node for complex aggregation case if (apn1 instanceof ProjectionPlanNode) { apn1 = apn1.getChild(0); assertFalse(hasTopProjection1); } if (apn2 instanceof ProjectionPlanNode) { apn2 = apn2.getChild(0); assertFalse(hasTopProjection2); } // check the rest plan nodes. if (distinctMergeReceive == false && groupByMergeReceive == false) { assertEquals(apn1.toExplainPlanString(), apn2.toExplainPlanString()); } else if (distinctMergeReceive == true && groupByMergeReceive == true) { // In case of applied MergeReceive optimization the apn1 and apn2 nodes // should not have any children assertEquals(0, apn1.getChildCount()); assertEquals(0, apn2.getChildCount()); } // Distributed DISTINCT GROUP BY if (pns1.size() > 1) { if (! limitPushdown) { assertEquals(pns1.get(1).toExplainPlanString(), pns2.get(1).toExplainPlanString()); return; } assertTrue(pns1.get(1) instanceof SendPlanNode); assertTrue(pns2.get(1) instanceof SendPlanNode); apn1 = pns1.get(1).getChild(0); apn2 = pns2.get(1).getChild(0); // ignore the ORDER BY/LIMIT pushdown plan node // because DISTINCT case can not be pushed down assertTrue(apn2 instanceof OrderByPlanNode); assertNotNull(apn2.getInlinePlanNode(PlanNodeType.LIMIT)); apn2 = apn2.getChild(0); // If the MergeReceive optimization was applied, the explain plan could legitimately // differ (for example, index for grouping purpose vs index for sort order), or different // winners may produce completely different paths. if (distinctMergeReceive == false && groupByMergeReceive == false) { assertEquals(apn1.toExplainPlanString(), apn2.toExplainPlanString()); } } } public void testMatViewsWithGroupby() { String sql1, sql2; // Partition view tables without partition key String[] tbs = {"V_P1", "V_P1_ABS"}; for (String tb: tbs) { // Because of the GROUP BY is contained in the display columns list // DISTINCT can be dropped sql1 = "SELECT distinct V_SUM_C1 FROM " + tb + " GROUP by V_SUM_C1 LIMIT 5"; sql2 = "SELECT V_SUM_C1 FROM " + tb + " GROUP by V_SUM_C1 LIMIT 5"; checkQueriesPlansAreTheSame(sql1, sql2); sql1 = "SELECT distinct V_SUM_C1 FROM " + tb + " GROUP by V_SUM_C1 ORDER BY 1 LIMIT 5"; sql2 = "SELECT V_SUM_C1 FROM " + tb + " GROUP by V_SUM_C1 ORDER BY 1 LIMIT 5"; checkQueriesPlansAreTheSame(sql1, sql2); // count(*) may be too special sql1 = "SELECT distinct count(*) FROM " + tb + " GROUP by V_SUM_C1 LIMIT 5"; sql2 = "SELECT count(*) FROM " + tb + " GROUP by V_SUM_C1 LIMIT 5"; checkDistinctWithGroupbyPlans(sql1, sql2); sql1 = "SELECT distinct sum(V_SUM_D1) FROM " + tb + " GROUP by V_SUM_C1 LIMIT 5"; sql2 = "SELECT sum(V_SUM_D1) FROM " + tb + " GROUP by V_SUM_C1 LIMIT 5"; checkDistinctWithGroupbyPlans(sql1, sql2); // TODO: add more tests } } }