/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.server.execution.lng; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Map; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.stream.Collectors; import org.diqube.data.column.ColumnType; import org.diqube.execution.ExecutablePlan; import org.diqube.executionenv.cache.ColumnShardCacheRegistry; import org.diqube.executionenv.cache.DefaultColumnShardCache; import org.diqube.executionenv.cache.DefaultColumnShardCacheTestUtil; import org.diqube.loader.LoadException; import org.diqube.plan.exception.ValidationException; import org.diqube.server.execution.AbstractCacheDoubleDiqlExecutionTest; import org.diqube.server.execution.CacheDoubleTestUtil.IgnoreInCacheDoubleTestUtil; import org.diqube.util.DoubleUtil; import org.diqube.util.Pair; import org.testng.Assert; import org.testng.annotations.Test; import com.google.common.collect.Iterables; /** * Test the column aggregation and repeated projection features on LONG columns. * * The column aggregation aggregates the values of multiple columns of the same row to one new value. This is used if an * aggregation is executed on repeated fields for example. In contrast to that, aggregation that is executed on a GROUP * BY does aggregate values of the same column but multiple rows. * * @author Bastian Gloeckle */ public class LongColumnAggregationAndRepeatedProjectionDiqlExecutionTest extends AbstractCacheDoubleDiqlExecutionTest<Long> { public LongColumnAggregationAndRepeatedProjectionDiqlExecutionTest() { super(ColumnType.LONG, new LongTestDataProvider()); } @Test public void simpleColAggregation() throws LoadException, InterruptedException, ExecutionException { initializeFromJson("[ { \"a\": 1, \"b\": [ { \"c\": 0 }, { \"c\": 10 } ] } ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(b[*].c)) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName("b[*].c").build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals((long) resultValues.get("a").values().iterator().next(), 1L, "Expected correct result value"); Assert.assertEquals((long) resultValues.get(resAggColName).values().iterator().next(), 5L, "Expected correct result value"); } finally { executor.shutdownNow(); } } @Test public void twoRowColAggregation() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": 0 }, { \"c\": 10 } ] }," + // "{ \"a\": 2, \"b\": [ { \"c\": 0 }, { \"c\": 100 } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(b[*].c)) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName("b[*].c").build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 2, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 5L)); expected.add(new Pair<>(2L, 50L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void twoRowTwoLevelColAggregation() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ { \"d\": 20 }, { \"d\": 30 } ] }, { \"c\": [ { \"d\": 100 }, { \"d\": 25 }, { \"d\": 75 } ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ { \"d\": 400 }, { \"d\": 50 }, { \"d\": 150 } ] }, { \"c\": [ { \"d\": 1050 }, { \"d\": 850 } ] } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(b[*].c[*].d)) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName("b[*].c[*].d").build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 2, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 50L)); expected.add(new Pair<>(2L, 500L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void twoRowTwoLevelColAggregationWithoutObjectsInArray() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ 20, 30 ] }, { \"c\": [ 100, 25, 75 ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ 400, 50, 150 ] }, { \"c\": [ 1050, 850 ] } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(b[*].c[*])) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName("b[*].c[*]").build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 2, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 50L)); expected.add(new Pair<>(2L, 500L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void twoRowTwoLevelColAggregationAfterProjection() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"e\" : 10, \"c\": [ { \"d\": 20 }, { \"d\": 30 } ] }, " + // "{ \"e\": 5, \"c\": [ { \"d\": 100 }, { \"d\": 25 }, { \"d\": 75 } ] } ] },"// // 2nd top level: + "{ \"a\": 2, " + // "\"b\": [ { \"e\" : 1, \"c\": [ { \"d\": 400 }, { \"d\": 50 }, { \"d\": 150 } ] }, " + // "{ \"e\" : 1, \"c\": [ { \"d\": 1050 }, { \"d\": 850 } ] } ] }" // + // " ]"); // expected result: // 1st top level: avg((20 + 10), (30 + 10), (100 + 5), (25 + 5), (75 + 5)) == 285 / 5 == 57 // 2nd top level: avg((400 + 1), (50 + 1), (150 + 1), (1050 + 1), (850 + 1)) == 2505 / 5 == 501 ExecutablePlan plan = buildExecutablePlan("select a, round(avg(add(b[*].c[*].d, b[*].e))) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("add") .addParameterColumnName("b[*].c[*].d").addParameterColumnName("b[*].e").build() + repeatedColNameGen.allEntriesManifestedSubstr()) .build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 2, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 57L)); expected.add(new Pair<>(2L, 501L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void twoRowTwoLevelColAggregationAfterProjection2() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": [ { \"d\": 20 }, { \"d\": 30 } ] }, " + // "{ \"c\": [ { \"d\": 100 }, { \"d\": 25 }, { \"d\": 75 } ] } ] },"// // 2nd top level: + "{ \"a\": 2, " + // "\"b\": [ { \"c\": [ { \"d\": 400 }, { \"d\": 50 }, { \"d\": 150 } ] }, " + // "{ \"c\": [ { \"d\": 1050 }, { \"d\": 850 } ] } ] }" // + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(add(b[*].c[*].d, 1))) from " + TABLE); // expected result: // 1st top level: avg((20 + 1), (30 + 1), (100 + 1), (25 + 1), (75 + 1)) == 255 / 5 == 51 // 2nd top level: avg((400 + 1), (50 + 1), (150 + 1), (1050 + 1), (850 + 1)) == 2505 / 5 == 501 ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("add") .addParameterColumnName("b[*].c[*].d").addParameterLiteralLong(1).build() + repeatedColNameGen.allEntriesManifestedSubstr()) .build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 2, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 51L)); expected.add(new Pair<>(2L, 501L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test(expectedExceptions = ValidationException.class) public void selectProjected() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); buildExecutablePlan("select a, add(b[*].c[*].d, 1) from " + TABLE); } @Test(expectedExceptions = ValidationException.class) public void whereProjected() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); buildExecutablePlan("select a from " + TABLE + " where add(b[*].c[*].d, 1) = 5"); } @Test(expectedExceptions = ValidationException.class) public void groupProjected() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); buildExecutablePlan("select a from " + TABLE + " group by add(b[*].c[*].d, 1)"); } @Test(expectedExceptions = ValidationException.class) public void havingProjected() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); buildExecutablePlan("select a from " + TABLE + " group by a having add(b[*].c[*].d, 1) = 5"); } @Test(expectedExceptions = ValidationException.class) public void orderProjected() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); buildExecutablePlan("select a from " + TABLE + " order by add(b[*].c[*].d, 1)"); } @Test public void orderProjectedAggregated() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); ExecutablePlan plan = buildExecutablePlan("select a from " + TABLE + " order by avg(add(b[*].c, 1))"); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertEquals(resultValues.keySet().size(), 1, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals((long) Iterables.getOnlyElement(resultValues.get("a").values()), 1L, "Expected correct result value"); } finally { executor.shutdownNow(); } } @Test public void whereProjectedAggregated() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); ExecutablePlan plan = buildExecutablePlan("select a from " + TABLE + " where avg(add(b[*].c, 1)) = 26."); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertEquals(resultValues.keySet().size(), 1, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals((long) Iterables.getOnlyElement(resultValues.get("a").values()), 1L, "Expected correct result value"); } finally { executor.shutdownNow(); } } @Test public void groupProjectedAggregated() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); ExecutablePlan plan = buildExecutablePlan("select a from " + TABLE + " group by avg(add(b[*].c, 1))"); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertEquals(resultValues.keySet().size(), 1, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals((long) Iterables.getOnlyElement(resultValues.get("a").values()), 1L, "Expected correct result value"); } finally { executor.shutdownNow(); } } @Test public void selectProjectedAggregated() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(add(b[*].c, 1))) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("add") .addParameterColumnName("b[*].c").addParameterLiteralLong(1).build() + repeatedColNameGen.allEntriesManifestedSubstr()) .build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 1, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 26L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test(expectedExceptions = ValidationException.class) public void havingProjectedAggregated() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, " + // "\"b\": [ { \"c\": 20 }, { \"c\": 30 } ] } ]"); buildExecutablePlan("select a from " + TABLE + " group by a having avg(add(b[*].c[*].d, 1)) = 5"); } @Test public void rowAndColAggregationAndProjection() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ 20, 30 ] }, { \"c\": [ 100, 25, 75 ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ 400, 50, 150 ] }, { \"c\": [ 1050, 850 ] } ] }," // + "{ \"a\": 2, \"b\": [ { \"c\": [ 10, 0, 3 ] }, { \"c\": [ 7, 20 ] } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(avg(add(b[*].c[*], 1)))) from " + TABLE + " group by a"); // Explanation: from inner functions to outer: // add(b[*].c[*], 1) is REPEATED_PROJECT // avg(..) is AGGREGATION_COL (outputs one value per row) // avg(..) is AGGREGATION_ROW (aggregates the two rows with a==2) // round(..) is a PROJECT ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") // .addParameterColumnName(// functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") // .addParameterColumnName( // functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") // .addParameterColumnName( // functionBasedColumnNameBuilderFactory.create().withFunctionName("add") // .addParameterColumnName("b[*].c[*]") // .addParameterLiteralLong(1).build() + repeatedColNameGen.allEntriesManifestedSubstr()) .build()) .build()) // .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 2, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 51L)); expected.add(new Pair<>(2L, (501L + 9L) / 2)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void rowAndColAggregationAndProjectionWithHaving() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ 20, 30 ] }, { \"c\": [ 100, 25, 75 ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ 400, 50, 150 ] }, { \"c\": [ 1050, 850 ] } ] }," // + "{ \"a\": 2, \"b\": [ { \"c\": [ 10, 0, 3 ] }, { \"c\": [ 7, 20 ] } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a, round(avg(avg(add(b[*].c[*], 1)))) from " + TABLE + " group by a having avg(avg(add(b[*].c[*], 1))) > 100."); // Explanation: from inner functions to outer: // add(b[*].c[*], 1) is REPEATED_PROJECT // avg(..) is AGGREGATION_COL (outputs one value per row) // avg(..) is AGGREGATION_ROW (aggregates the two rows with a==2) // round(..) is a PROJECT ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("round") // .addParameterColumnName(// functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") // .addParameterColumnName( // functionBasedColumnNameBuilderFactory.create().withFunctionName("avg") // .addParameterColumnName( // functionBasedColumnNameBuilderFactory.create().withFunctionName("add") // .addParameterColumnName("b[*].c[*]") // .addParameterLiteralLong(1).build() + repeatedColNameGen.allEntriesManifestedSubstr()) .build()) .build()) // .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultHavingRowIds.length, 1, "Expected to have specific number of results that passed the HAVING."); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(2L, (501L + 9L) / 2)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultHavingRowIds) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void colAggregationWithFunctionParam() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ 20, 30 ] }, { \"c\": [ 100, 25, 75 ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ 400, 50, 150 ] }, { \"c\": [ 1050, 850 ] } ] }," // + "{ \"a\": 2, \"b\": [ { \"c\": [ 10, 0, 3 ] }, { \"c\": [ 7, 20 ] } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a from " + TABLE + " where any(7, b[*].c[*]) = 1 group by a"); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to have specific number of results."); Assert.assertEquals((long) resultValues.get("a").values().iterator().next(), 2L, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void colAggregationWithFunctionParam2() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ 20, 30 ] }, { \"c\": [ 100, 25, 75 ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ 400, 50, 150 ] }, { \"c\": [ 1050, 850 ] } ] }," // + "{ \"a\": 2, \"b\": [ { \"c\": [ 10, 0, 3 ] }, { \"c\": [ 7, 20 ] } ] }" + // " ]"); ExecutablePlan plan = buildExecutablePlan("select a from " + TABLE + " where any(75, b[*].c[*]) = 1"); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to have specific number of results."); Assert.assertEquals((long) resultValues.get("a").values().iterator().next(), 1L, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void sumTest() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": [ 20, 30 ] }, { \"c\": [ 100, 25, 75 ] } ] }," + "{ \"a\": 2, \"b\": [ { \"c\": [ 400, 50, 150 ] }, { \"c\": [ 1050, 850 ] } ] }" // + " ]"); ExecutablePlan plan = buildExecutablePlan("select a, sum(b[*].c[*]) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("sum") // .addParameterColumnName("b[*].c[*]").build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected correct number of res rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 20L + 30L + 100L + 25L + 75L)); expected.add(new Pair<>(2L, 400L + 50L + 150L + 1050L + 850L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test public void repeatedCheckDefaultValueFallback() throws LoadException, InterruptedException, ExecutionException { // second row does not have any value for b[1].c, but ColumnAggregationStep nevertheless tries to resolve the value // -> it needs to be a default value. The step will later then inspect the actual length of the array of a row. initializeFromJson( // "[ { \"a\": 1, \"b\": [ { \"c\": 20 }, { \"c\": 30 } ] },"// + "{ \"a\": 1, \"b\": [ { \"c\": 40 } ] } " // + " ]"); ExecutablePlan plan = buildExecutablePlan("select a, sum(sum(b[*].c)) from " + TABLE + " group by a"); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("sum") // .addParameterColumnName( // functionBasedColumnNameBuilderFactory.create().withFunctionName("sum") // .addParameterColumnName("b[*].c").build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 1, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 90L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); } finally { executor.shutdownNow(); } } @Test @IgnoreInCacheDoubleTestUtil public void colAggregationCachedProjectionNotExecuted() throws LoadException, InterruptedException, ExecutionException { initializeFromJson( // "[ { \"a\": 1, \"b\": 5 },"// + "{ \"a\": 2, \"b\": 6 } " // + " ]"); ExecutablePlan plan = buildExecutablePlan("select a, sub(add(b, 1), 1) from " + TABLE); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); } finally { executor.shutdownNow(); } ColumnShardCacheRegistry cacheReg = dataContext.getBean(ColumnShardCacheRegistry.class); DefaultColumnShardCache cache = (DefaultColumnShardCache) cacheReg.getColumnShardCache(TABLE); Collection<String> cachedShards = cache.getAll(0L).stream().map(shard -> shard.getName()).collect(Collectors.toList()); String innerColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("add") .addParameterColumnName("b").addParameterLiteralLong(1).build(); String outerColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("sub") .addParameterColumnName(innerColName).addParameterLiteralLong(1).build(); Assert.assertEquals(new HashSet<>(cachedShards), new HashSet<>(Arrays.asList(innerColName, outerColName)), "Expected to have correct columns in cache after executing query for the first time"); // remove the inner column from the cache DefaultColumnShardCacheTestUtil.removeFromCache(cache, 0L, innerColName); // now execute a second time executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); try { Future<?> future = plan.executeAsynchronously(executor); future.get(); Assert.assertTrue(columnValueConsumerIsDone, "Source should have reported 'done'"); Assert.assertTrue(future.isDone(), "Future should report done"); Assert.assertFalse(future.isCancelled(), "Future should not report cancelled"); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(outerColName), "Expected that there's results for the output func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 2, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(outerColName).size(), 2, "Expected to receive a specific amout of rows"); Map<Long, Long> expected = new HashMap<>(); expected.put(1L, 5L); expected.put(2L, 6L); for (long rowId : resultValues.get("a").keySet()) { Long valueColA = resultValues.get("a").get(rowId); Long valueFn = resultValues.get(outerColName).get(rowId); Assert.assertTrue(DoubleUtil.equals(valueFn, expected.get(valueColA)), "Expected correct result for colA value '" + valueColA + "'. Expected: " + expected.get(valueColA) + " but was: " + valueFn); } // we expect to NOT have something in the cache for the innerColumn, as we (1) removed it and (2) the second // execution should not have executed that step. Assert.assertNull(cache.get(0L, innerColName), "Expected to have NOT have executed the calculation of the unneeded column"); } finally { executor.shutdownNow(); } } }