/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.server.execution.lng; import java.util.ArrayList; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; import java.util.concurrent.Future; import java.util.function.Supplier; import java.util.stream.Collectors; import org.diqube.data.column.ColumnShard; import org.diqube.data.column.ColumnType; import org.diqube.execution.ExecutablePlan; import org.diqube.execution.steps.RepeatedProjectStep; import org.diqube.executionenv.cache.ColumnShardCache; import org.diqube.executionenv.cache.ColumnShardCacheRegistry; import org.diqube.executionenv.cache.DefaultColumnShardCache; import org.diqube.executionenv.cache.DefaultColumnShardCacheTestUtil; import org.diqube.executionenv.cache.WritableColumnShardCache; import org.diqube.loader.LoadException; import org.diqube.server.execution.AbstractDiqlExecutionTest; import org.diqube.util.Pair; import org.testng.Assert; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import com.google.common.collect.Sets; /** * Tests the logic of {@link RepeatedProjectStep} to create only those output columns that are not yet available = that * are not available in the cache. * * <p> * As this is a slightly special handling in that step, we have a separate test which tests all combinations of column * surviving a cache-clean of an example table/query situation. * * @author Bastian Gloeckle */ public class LongRepeatedProjectionCacheExecutionTest extends AbstractDiqlExecutionTest<Long> { private static final String TABLE_JSON = // "[ { \"a\": 1, \"b\": [ { \"c\": 20 }, { \"c\": 30 } ] },"// + "{ \"a\": 1, \"b\": [ { \"c\": 40 } ] } " // + " ]"; private String projectedColName1; private String projectedColName2; private String projectedColNameLength; public LongRepeatedProjectionCacheExecutionTest() { super(ColumnType.LONG, new LongTestDataProvider()); } @BeforeMethod @Override public void setUp() { super.setUp(); projectedColName1 = repeatedColNameGen.repeatedAtIndex(functionBasedColumnNameBuilderFactory.create() .withFunctionName("add").addParameterColumnName("b[*].c").addParameterLiteralLong(1L).build(), 0); projectedColName2 = repeatedColNameGen.repeatedAtIndex(functionBasedColumnNameBuilderFactory.create() .withFunctionName("add").addParameterColumnName("b[*].c").addParameterLiteralLong(1L).build(), 1); projectedColNameLength = repeatedColNameGen.repeatedLength(functionBasedColumnNameBuilderFactory.create() .withFunctionName("add").addParameterColumnName("b[*].c").addParameterLiteralLong(1L).build()); } @Test public void repeatedPartlyCacheInvalidation1() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation(new HashSet<>(Arrays.asList(projectedColName1))); } @Test public void repeatedPartlyCacheInvalidation2() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation(new HashSet<>(Arrays.asList(projectedColName1, projectedColName2))); } @Test public void repeatedPartlyCacheInvalidation3() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation( new HashSet<>(Arrays.asList(projectedColName1, projectedColName2, projectedColNameLength))); } @Test public void repeatedPartlyCacheInvalidation4() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation(new HashSet<>(Arrays.asList(projectedColName2, projectedColNameLength))); } @Test public void repeatedPartlyCacheInvalidation5() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation(new HashSet<>(Arrays.asList(projectedColName1, projectedColNameLength))); } @Test public void repeatedPartlyCacheInvalidation6() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation(new HashSet<>(Arrays.asList(projectedColName2))); } @Test public void repeatedPartlyCacheInvalidation7() throws LoadException, InterruptedException, ExecutionException { genericRepeatedPartlyCacheInvalidation(new HashSet<>(Arrays.asList(projectedColNameLength))); } /** * Implementation of a test. * * <p> * This executes the following: * * <ul> * <li>Initialize table where we can execute repeated projects * <li>Execute a repeated project * <li>Validate correct result * <li>Clear {@link ColumnShardCache}, but leave specific columns in there * <li>Execute repeated project again * <li>Validate correct result again * </ul> * * This validates that the {@link RepeatedProjectStep} handles situations correctly, where only a few of the repeated * output columns are available in the cache. * * <p> * The interesting columns (= the output cols of the repeated project step) are: * * <ul> * <li>{@link #projectedColName1} * <li>{@link #projectedColName2} * <li>{@link #projectedColNameLength} * </ul> * * @param columnNamesOfColumnsToKeepInCache * A combination of the "interesting columns" that should survive in the cache when cleaning it. */ private void genericRepeatedPartlyCacheInvalidation(Set<String> columnNamesOfColumnsToKeepInCache) throws LoadException, InterruptedException, ExecutionException { initializeFromJson(TABLE_JSON); // make some data structures readily usable. ColumnShardCacheRegistry cacheRegistry = dataContext.getBean(ColumnShardCacheRegistry.class); WritableColumnShardCache cache = cacheRegistry.getOrCreateColumnShardCache(TABLE); Set<String> allInterestingColumnnames = new HashSet<>(Arrays.asList(projectedColName1, projectedColName2, projectedColNameLength)); // as we execute the query multiple times, the following supplier will create and execute a new query and return the // Future on the plan. List<ExecutorService> executorsCreated = new ArrayList<>(); Supplier<Future<?>> createAndExecutePlan = new Supplier<Future<?>>() { @Override public Future<?> get() { ExecutablePlan plan = buildExecutablePlan("select a, sum(sum(add(b[*].c, 1))) from " + TABLE + " group by a"); ExecutorService executor = executors.newTestExecutor(plan.preferredExecutorServiceSize()); executorsCreated.add(executor); return plan.executeAsynchronously(executor); } }; // asserts a correct result after executing the query. Supplier<Void> assertCorrectResult = new Supplier<Void>() { @Override public Void get() { String resAggColName = functionBasedColumnNameBuilderFactory.create().withFunctionName("sum") .addParameterColumnName(functionBasedColumnNameBuilderFactory.create().withFunctionName("sum") // .addParameterColumnName( // functionBasedColumnNameBuilderFactory.create().withFunctionName("add") // .addParameterColumnName("b[*].c").addParameterLiteralLong(1L).build() + repeatedColNameGen.allEntriesIdentifyingSubstr()) .build()) .build(); Assert.assertTrue(resultValues.containsKey("a"), "Expected to have a result for col"); Assert.assertTrue(resultValues.containsKey(resAggColName), "Expected that there's results for the aggregation func"); Assert.assertEquals(resultValues.keySet().size(), 2, "Expected to have results for correct number of cols"); Assert.assertEquals(resultValues.get("a").size(), 1, "Expected to receive a specific amout of rows"); Assert.assertEquals(resultValues.get(resAggColName).size(), 1, "Expected to receive a specific amout of rows"); Set<Pair<Long, Long>> expected = new HashSet<>(); expected.add(new Pair<>(1L, 93L)); Set<Pair<Long, Long>> actual = new HashSet<>(); for (long rowId : resultValues.get("a").keySet()) actual.add(new Pair<>(resultValues.get("a").get(rowId), resultValues.get(resAggColName).get(rowId))); Assert.assertEquals(actual, expected, "Expected correct result values"); return null; } }; // Ok, let's start testing! try { createAndExecutePlan.get().get(); // execute and wait until done assertCorrectResult.get(); // assert that the result columns of the repeated project were put in the cache, otherwise the rest of the // test does not make sense. Assert.assertNotNull(cache.get(0L, projectedColName1), "Expected [0] to be inside cache."); Assert.assertNotNull(cache.get(0L, projectedColName2), "Expected [1] to be inside cache."); Assert.assertNotNull(cache.get(0L, projectedColNameLength), "Expected [length] to be inside cache."); // Now we take care of keeping only those columns in the cache that were requested to be kept in it! Set<String> columnShardNamesToRemoveFromCache = cache.getAll(0L).stream().map(c -> c.getName()).collect(Collectors.toSet()); columnShardNamesToRemoveFromCache.removeAll(columnNamesOfColumnsToKeepInCache); Set<ColumnShard> cachedColumnShards = cache.getAll(0L).stream().filter(colShard -> columnNamesOfColumnsToKeepInCache.contains(colShard.getName())) .collect(Collectors.toSet()); // remove not-wanted cols from cache. for (String shardName : columnShardNamesToRemoveFromCache) DefaultColumnShardCacheTestUtil.removeFromCache((DefaultColumnShardCache) cache, 0L, shardName); // -> expected: the other column shards that was cached is now evicted from the cache. for (String colNameNotLongerInCache : Sets.difference(allInterestingColumnnames, columnNamesOfColumnsToKeepInCache)) Assert.assertNull(cache.get(0L, colNameNotLongerInCache), "Expected " + colNameNotLongerInCache + " to NOT be inside cache."); for (String colNameInCache : columnNamesOfColumnsToKeepInCache) Assert.assertNotNull(cache.get(0L, colNameInCache), "Expected " + colNameInCache + " to be in cache."); // Ok, now only those columns that were requested are in the cache (and the mocked one, but we ignore that). createAndExecutePlan.get().get(); // execute and wait // assert result is good. assertCorrectResult.get(); Set<ColumnShard> afterCachedColShards = columnNamesOfColumnsToKeepInCache.stream().map(colName -> cache.get(0L, colName)).collect(Collectors.toSet()); // the following assert basically compares using == Assert.assertEquals(afterCachedColShards, cachedColumnShards, "Expected that cached col shards were not re-created."); } finally { executorsCreated.forEach(ex -> ex.shutdownNow()); } } }