/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.server.execution; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import org.diqube.data.column.ColumnType; import org.diqube.execution.ExecutablePlan; import org.diqube.execution.consumers.ColumnVersionBuiltConsumer; import org.diqube.executionenv.ExecutionEnvironment; import org.diqube.executionenv.VersionedExecutionEnvironment; import org.diqube.function.AggregationFunction.ValueProvider; import org.diqube.function.IntermediaryResult; import org.diqube.function.aggregate.CountFunction; import org.diqube.queries.QueryRegistry.QueryResultHandler; import org.diqube.queries.QueryUuid; import org.testng.Assert; import org.testng.annotations.Test; /** * This tests the parallel functionality of the query master logic as well as possible. * * <p> * The QueryMaster heavily uses {@link ColumnVersionBuiltConsumer}s which in turn use * {@link VersionedExecutionEnvironment}. This means that the steps executed on the master usually work on intermediary * versions of the columns: The first step that creates a column might create just part of that column, because it does * not yet have all source information (which is provided by the remotes). These intermediate (or temporary) columns are * then stored in a {@link VersionedExecutionEnvironment} and that one is used instead of the default * {@link ExecutionEnvironment} by consequent steps to resolve any column values. This test should test this * functionality as well as possible - naturally this is a bit hard to test, because all of it is multi-threaded, * therefore this uses {@link RemoteEmulation} object to emulate the arrival of results from the remotes in a specific * way. * * @author Bastian Gloeckle */ public abstract class QueryMasterParallelDiqlExecutionTest<T> extends AbstractRemoteEmulatingDiqlExecutionTest<T> { public QueryMasterParallelDiqlExecutionTest(ColumnType colType, TestDataProvider<T> dp) { super(colType, dp); } @Test public void limitDoesNotCutOffOnIntermediaryResults() throws InterruptedException, ExecutionException { // GIVEN initializeSampleTableShards(2); ExecutablePlan executablePlan = buildExecutablePlan( // "Select " + COL_A + ", count() from " + TABLE + // " group by " + COL_A + // " order by count() desc LIMIT 1"); ExecutorService executor = executors.newTestExecutor(executablePlan.preferredExecutorServiceSize()); try { // WHEN // start execution. Future<Void> future = executablePlan.executeAsynchronously(executor); QueryResultHandler resultHandler = null; while (resultHandler == null) { if (queryRegistry.getQueryResultHandlers(QueryUuid.getCurrentQueryUuid()).size() > 0) resultHandler = queryRegistry.getQueryResultHandlers(QueryUuid.getCurrentQueryUuid()).iterator().next(); } String countCol = functionBasedColumnNameBuilderFactory.create().withFunctionName("count").build(); Object groupByValue1 = dp.v(5); Object groupByValue2 = dp.v(10); Object groupByValue3 = dp.v(15); // let first shard returns some values Map<Long, Object> values = new HashMap<>(); values.put(0L, groupByValue1); values.put(1L, groupByValue2); values.put(2L, groupByValue3); resultHandler.newColumnValues(COL_A, values); resultHandler.newIntermediaryAggregationResult(0L, countCol, intermediary(countCol, 0), intermediary(countCol, 3)); resultHandler.newIntermediaryAggregationResult(1L, countCol, intermediary(countCol, 0), intermediary(countCol, 1)); resultHandler.newIntermediaryAggregationResult(2L, countCol, intermediary(countCol, 0), intermediary(countCol, 1)); // now: // Group 0: value 3 // Group 1: value 1 (might be cut off because of limit) // Group 2: value 1 (might be cut off because of limit) // THEN List<Long> expectedRun1 = Arrays.asList(new Long[] { 0L }); waitUntilOrFail(newOrderedRowIdsNotify, // () -> "Not correct ordering value. Was: " + resultOrderRowIds + " Expected: " + expectedRun1.toString(), // () -> resultOrderRowIds.equals(expectedRun1)); Long expectedValueRun1 = 3L; waitUntilOrFail(newValuesNotify, // () -> "Not correct value. Was: " + resultValues.get(countCol).get(0L) + " Expected: " + expectedValueRun1, // () -> resultValues.get(countCol) != null && expectedValueRun1.equals(resultValues.get(countCol).get(0L))); // WHEN second shard found some values values.clear(); values.put(3L, groupByValue1); values.put(4L, groupByValue2); values.put(5L, groupByValue3); resultHandler.newColumnValues(COL_A, values); resultHandler.newIntermediaryAggregationResult(3L, countCol, intermediary(countCol, 0), intermediary(countCol, 1)); resultHandler.newIntermediaryAggregationResult(4L, countCol, intermediary(countCol, 0), intermediary(countCol, 2)); resultHandler.newIntermediaryAggregationResult(5L, countCol, intermediary(countCol, 0), intermediary(countCol, 1)); // now: // Group 3 matched to group 0, new value: 4 // Group 4 matched to group 1, new value: 3 (might be cut off because of limit) // Group 5 matched to group 2, new value: 2 (might be cut off because of limit) // be aware: If the execution somehow forgot the previous values, the top group would now be 4 (which would be // wrong!) // THEN List<Long> expectedRun2 = Arrays.asList(new Long[] { 0L }); waitUntilOrFail(newOrderedRowIdsNotify, // () -> "Not correct ordering value. Was: " + resultOrderRowIds + " Expected: " + expectedRun2.toString(), // () -> resultOrderRowIds.equals(expectedRun2)); Long expectedValueRun2 = 4L; waitUntilOrFail(newValuesNotify, // () -> "Not correct value. Was: " + resultValues.get(countCol).get(0L) + " Expected: " + expectedValueRun2, // () -> resultValues.get(countCol) != null && expectedValueRun2.equals(resultValues.get(countCol).get(0L))); // first shard found some more values. resultHandler.newIntermediaryAggregationResult(0L, countCol, intermediary(countCol, 3), intermediary(countCol, 4)); resultHandler.newIntermediaryAggregationResult(1L, countCol, intermediary(countCol, 1), intermediary(countCol, 2)); resultHandler.newIntermediaryAggregationResult(2L, countCol, intermediary(countCol, 1), intermediary(countCol, 5)); // now: (these values are only correct, if orderStep did NOT cut off group 1L in last execution, otherwise 1L is // the only group left!) // Group 0, new value: 5 (might be cut off because of limit) // Group 1, new value: 4 (might be cut off because of limit) // Group 2, new value: 6 // be aware: If the OrderStep did cut-off the previous results of group2 internally, group2 would now not be the // top group! // THEN List<Long> expectedOrderingRun3 = Arrays.asList(new Long[] { 2L }); waitUntilOrFail(newOrderedRowIdsNotify, // () -> "Not correct ordering value. Was: " + resultOrderRowIds + " Expected: " + expectedOrderingRun3.toString(), // () -> resultOrderRowIds.equals(expectedOrderingRun3)); Long expectedValueRun3 = 6L; waitUntilOrFail(newValuesNotify, // () -> "Not correct value. Was: " + resultValues.get(countCol).get(2L) + " Expected: " + expectedValueRun3, // () -> resultValues.get(countCol) != null && expectedValueRun3.equals(resultValues.get(countCol).get(2L))); resultHandler.oneRemoteDone(); resultHandler.oneRemoteDone(); future.get(); // wait until fully done. // THEN after full completion of pipeline. Assert.assertEquals(resultOrderRowIds, Arrays.asList(new Long[] { 2L }), "Expected final ordering result to be correct"); } finally { executor.shutdownNow(); } } @Test public void intermediaryResultsDecreaseResult() throws InterruptedException, ExecutionException { // GIVEN initializeSampleTableShards(2); ExecutablePlan executablePlan = buildExecutablePlan( // "Select " + COL_A + ", count() from " + TABLE + // " group by " + COL_A + // " order by count() desc LIMIT 1"); ExecutorService executor = executors.newTestExecutor(executablePlan.preferredExecutorServiceSize()); try { // WHEN // start execution. Future<Void> future = executablePlan.executeAsynchronously(executor); QueryResultHandler resultHandler = null; while (resultHandler == null) { if (queryRegistry.getQueryResultHandlers(QueryUuid.getCurrentQueryUuid()).size() > 0) resultHandler = queryRegistry.getQueryResultHandlers(QueryUuid.getCurrentQueryUuid()).iterator().next(); } String countCol = functionBasedColumnNameBuilderFactory.create().withFunctionName("count").build(); Object groupByValue1 = dp.v(5); Object groupByValue2 = dp.v(10); // let first shard returns some values Map<Long, Object> values = new HashMap<>(); values.put(0L, groupByValue1); values.put(1L, groupByValue2); resultHandler.newColumnValues(COL_A, values); resultHandler.newIntermediaryAggregationResult(0L, countCol, intermediary(countCol, 0), intermediary(countCol, 5)); resultHandler.newIntermediaryAggregationResult(1L, countCol, intermediary(countCol, 0), intermediary(countCol, 1)); // now: // Group 0: value 5 // Group 1: value 1 (might be cut off because of limit) // THEN List<Long> expectedRun1 = Arrays.asList(new Long[] { 0L }); waitUntilOrFail(newOrderedRowIdsNotify, // () -> "Not correct ordering value. Was: " + resultOrderRowIds + " Expected: " + expectedRun1.toString(), // () -> resultOrderRowIds.equals(expectedRun1)); Long expectedValueRun1 = 5L; waitUntilOrFail(newValuesNotify, // () -> "Not correct value. Was: " + resultValues.get(countCol).get(0L) + " Expected: " + expectedValueRun1, // () -> resultValues.get(countCol) != null && expectedValueRun1.equals(resultValues.get(countCol).get(0L))); // WHEN second shard found some values values.clear(); values.put(3L, groupByValue1); values.put(4L, groupByValue2); resultHandler.newColumnValues(COL_A, values); resultHandler.newIntermediaryAggregationResult(3L, countCol, intermediary(countCol, 0), intermediary(countCol, 1)); resultHandler.newIntermediaryAggregationResult(4L, countCol, intermediary(countCol, 0), intermediary(countCol, 2)); // now: // Group 3 matched to group 0, new value: 6 // Group 4 matched to group 1, new value: 3 (might be cut off because of limit) // THEN List<Long> expectedRun2 = Arrays.asList(new Long[] { 0L }); waitUntilOrFail(newOrderedRowIdsNotify, // () -> "Not correct ordering value. Was: " + resultOrderRowIds + " Expected: " + expectedRun2.toString(), // () -> resultOrderRowIds.equals(expectedRun2)); Long expectedValueRun2 = 6L; waitUntilOrFail(newValuesNotify, // () -> "Not correct value. Was: " + resultValues.get(countCol).get(0L) + " Expected: " + expectedValueRun2, // () -> resultValues.get(countCol) != null && expectedValueRun2.equals(resultValues.get(countCol).get(0L))); // first shard found some more values. resultHandler.newIntermediaryAggregationResult(0L, countCol, intermediary(countCol, 5), intermediary(countCol, 2)); // LOWER! resultHandler.newIntermediaryAggregationResult(1L, countCol, intermediary(countCol, 1), intermediary(countCol, 2)); // now: (these values are only correct, if orderStep did NOT cut off group 1L in last execution, otherwise 1L is // the only group left!) // Group 0, new value: 3 (might be cut off because of limit) // Group 1, new value: 4 // be aware: If the OrderStep did cut-off the previous results of group2 internally, group2 would now not be the // top group! // THEN List<Long> expectedOrderingRun3 = Arrays.asList(new Long[] { 1L }); waitUntilOrFail(newOrderedRowIdsNotify, // () -> "Not correct ordering value. Was: " + resultOrderRowIds + " Expected: " + expectedOrderingRun3.toString(), // () -> resultOrderRowIds.equals(expectedOrderingRun3)); Long expectedValueRun3 = 4L; waitUntilOrFail(newValuesNotify, // () -> "Not correct value. Was: " + resultValues.get(countCol).get(1L) + " Expected: " + expectedValueRun3, // () -> resultValues.get(countCol) != null && expectedValueRun3.equals(resultValues.get(countCol).get(1L))); resultHandler.oneRemoteDone(); resultHandler.oneRemoteDone(); future.get(); // wait until fully done. // THEN after full completion of pipeline. Assert.assertEquals(resultOrderRowIds, Arrays.asList(new Long[] { 1L }), "Expected final ordering result to be correct"); } finally { executor.shutdownNow(); } } private IntermediaryResult intermediary(String outputColName, int count) { CountFunction fn = new CountFunction(); fn.addValues(new ValueProvider<Object>() { @Override public Object[] getValues() { return new Object[count]; } @Override public long size() { return count; } @Override public boolean isFinalSetOfValues() { return true; } }); IntermediaryResult res = new IntermediaryResult(outputColName, null); fn.populateIntermediary(res); return res; } }