/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.plan; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.UUID; import java.util.function.Function; import org.diqube.context.Profiles; import org.diqube.data.column.ColumnPage; import org.diqube.data.column.ColumnType; import org.diqube.data.column.StandardColumnShard; import org.diqube.data.table.TableFactory; import org.diqube.data.table.TableShard; import org.diqube.data.types.lng.dict.LongDictionary; import org.diqube.execution.consumers.AbstractThreadedColumnValueConsumer; import org.diqube.execution.consumers.AbstractThreadedGroupConsumer; import org.diqube.execution.consumers.AbstractThreadedGroupDeltaConsumer; import org.diqube.execution.consumers.ColumnDictIdConsumer; import org.diqube.execution.consumers.RowIdConsumer; import org.diqube.execution.steps.AbstractThreadedExecutablePlanStep; import org.diqube.execution.steps.GroupStep; import org.diqube.execution.steps.ResolveColumnDictIdsStep; import org.diqube.execution.steps.ResolveValuesStep; import org.diqube.execution.steps.RowIdSinkStep; import org.diqube.executionenv.ExecutionEnvironment; import org.diqube.executionenv.ExecutionEnvironmentFactory; import org.diqube.loader.LoaderColumnInfo; import org.diqube.loader.columnshard.ColumnShardBuilderFactory; import org.diqube.loader.columnshard.ColumnShardBuilderManager; import org.diqube.queries.QueryRegistry; import org.diqube.queries.QueryUuid; import org.diqube.util.Pair; import org.mockito.Mockito; import org.springframework.context.annotation.AnnotationConfigApplicationContext; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; /** * Tests {@link GroupStep}. * * @author Bastian Gloeckle */ public class GroupStepTest { private static final String COL_A = "colA"; private static final String COL_B = "colB"; private static final String COL_C = "colC"; private AnnotationConfigApplicationContext dataContext; private ColumnShardBuilderManager columnShardBuilderManager; private TableFactory tableFactory; private ExecutionEnvironmentFactory executionEnvironmentFactory; private Map<String, StandardColumnShard> columns; private Map<String, Map<Long, Object>> resultValues; private Map<Long, List<Long>> resultFullGroups; private List<Map<Long, List<Long>>> resultDeltaGroups; @BeforeMethod public void setUp() { dataContext = new AnnotationConfigApplicationContext(); dataContext.getEnvironment().setActiveProfiles(Profiles.UNIT_TEST); dataContext.scan("org.diqube"); dataContext.refresh(); ColumnShardBuilderFactory columnBuilderFactory = dataContext.getBean(ColumnShardBuilderFactory.class); LoaderColumnInfo colInfo = new LoaderColumnInfo(ColumnType.LONG); columnShardBuilderManager = columnBuilderFactory.createColumnShardBuilderManager(colInfo, 0L); tableFactory = dataContext.getBean(TableFactory.class); executionEnvironmentFactory = dataContext.getBean(ExecutionEnvironmentFactory.class); resultValues = new HashMap<>(); resultDeltaGroups = new ArrayList<>(); resultFullGroups = new HashMap<>(); columns = new HashMap<>(); QueryUuid.setCurrentQueryUuidAndExecutionUuid(UUID.randomUUID(), UUID.randomUUID()); } @AfterMethod public void cleanup() { QueryUuid.clearCurrent(); } @Test public void testOneColumn() throws Exception { // GIVEN // One-col grouping Long[] colAValues = new Long[] { 1L, 10L, Long.MIN_VALUE, -599L, 1L, 10L }; columnShardBuilderManager.addValues(COL_A, colAValues, 0L); Long[] colBValues = new Long[] { -100L, -200L, Long.MAX_VALUE, -300L, 50L, 60L }; columnShardBuilderManager.addValues(COL_B, colBValues, 0L); TableShard table = buildTable(columnShardBuilderManager); ExecutionEnvironment env = executionEnvironmentFactory.createQueryRemoteExecutionEnvironment(table); // RowIdEquals and ResolveValue steps List<AbstractThreadedExecutablePlanStep> steps = createExecutableSteps(env, new String[] { COL_A }, new String[] { COL_A }); // WHEN // executing the steps for (AbstractThreadedExecutablePlanStep step : steps) { step.run(); } // THEN Assert.assertTrue(resultValues.containsKey(COL_A), "Result for col a expected"); // group by as grouped the 1L and 10L values in COL_A Set<Object> expectedValues = new HashSet<>(); expectedValues.add(1L); expectedValues.add(10L); expectedValues.add(Long.MIN_VALUE); expectedValues.add(-599L); Assert.assertEquals(new HashSet<Object>(resultValues.get(COL_A).values()), expectedValues, "Correct result for col a expected"); Assert.assertEquals(resultFullGroups.size(), 4, "Expected 4 groups"); Map<Long, Set<Long>> expectedValueToRowIds = new HashMap<>(); // group with value 1L has rowIds 0 and 4 expectedValueToRowIds.put(1L, new HashSet<Long>(Arrays.asList(new Long[] { 0L, 4L }))); // group with value 10L has rowIds 1 and 5 expectedValueToRowIds.put(10L, new HashSet<Long>(Arrays.asList(new Long[] { 1L, 5L }))); // group with value Long.MIN_VALUE has rowId 2 expectedValueToRowIds.put(Long.MIN_VALUE, new HashSet<Long>(Arrays.asList(new Long[] { 2L }))); // group with value -599L has rowId 3 expectedValueToRowIds.put(-599L, new HashSet<Long>(Arrays.asList(new Long[] { 3L }))); Map<Long, Set<Long>> resultValueToRowIdMap = new HashMap<>(); resultFullGroups.entrySet().stream().map(new Function<Entry<Long, List<Long>>, Pair<Long, Set<Long>>>() { @Override public Pair<Long, Set<Long>> apply(Entry<Long, List<Long>> t) { long groupId = t.getKey(); long value = resolveValueForRowId(columns.get(COL_A), groupId); return new Pair<Long, Set<Long>>(value, new HashSet<Long>(t.getValue())); } }).forEach(pair -> resultValueToRowIdMap.put(pair.getLeft(), pair.getRight())); Assert.assertEquals(resultValueToRowIdMap, expectedValueToRowIds, "Expected correct grouping of row IDs"); assertDeltaGroupsEqualsFullGroups(); } @SuppressWarnings({ "rawtypes", "unchecked" }) @Test public void testTwoColumns() throws Exception { // GIVEN // Two-col grouping Long[] colAValues = new Long[] { 1L, 10L, Long.MIN_VALUE, -599L, 1L, 1L }; columnShardBuilderManager.addValues(COL_A, colAValues, 0L); Long[] colBValues = new Long[] { -100L, -200L, Long.MAX_VALUE, -300L, -100L, 60L }; columnShardBuilderManager.addValues(COL_B, colBValues, 0L); Long[] colCValues = new Long[] { 0L, 0L, 0L, 0L, 0L, 0L }; columnShardBuilderManager.addValues(COL_C, colCValues, 0L); TableShard table = buildTable(columnShardBuilderManager); ExecutionEnvironment env = executionEnvironmentFactory.createQueryRemoteExecutionEnvironment(table); // RowIdEquals and ResolveValue steps List<AbstractThreadedExecutablePlanStep> steps = createExecutableSteps(env, new String[] { COL_A, COL_B }, new String[] { COL_A, COL_B }); // WHEN // executing the steps for (AbstractThreadedExecutablePlanStep step : steps) { step.run(); } // THEN Assert.assertTrue(resultValues.containsKey(COL_A), "Result for col a expected"); // group by as grouped the 1L and 10L values in COL_A Set<Object> expectedValues = new HashSet<>(); expectedValues.add(1L); expectedValues.add(10L); expectedValues.add(Long.MIN_VALUE); expectedValues.add(-599L); Assert.assertEquals(new HashSet<Object>(resultValues.get(COL_A).values()), expectedValues, "Expected corected (hashed) values for col a"); Collection<Long> longValues = (Collection) resultValues.get(COL_A).values(); long countOfValue1 = longValues.stream().mapToLong(Long::longValue).filter(l -> l == 1L).count(); Assert.assertEquals(countOfValue1, 2, "Expected value 1L to be returned twice in col A because it is part of two groups"); Assert.assertEquals(longValues.size(), expectedValues.size() + 1, "Expected only value 1L to be available twice in result values of col a"); Assert.assertTrue(resultValues.containsKey(COL_B), "Result for col b expected"); // group by as grouped the 1L and 10L values in COL_A expectedValues = new HashSet<>(); expectedValues.add(-100L); expectedValues.add(-200L); expectedValues.add(Long.MAX_VALUE); expectedValues.add(-300L); expectedValues.add(60L); Assert.assertEquals(new HashSet<Object>(resultValues.get(COL_B).values()), expectedValues, "Expected corected values for col b"); Assert.assertEquals(resultValues.get(COL_B).values().size(), expectedValues.size(), "Expected no value to be available twice in result values of col b"); Assert.assertEquals(resultFullGroups.size(), 5, "Expected 5 groups"); Map<Pair<Long, Long>, Set<Long>> expectedValueToRowIds = new HashMap<>(); // group with value A:1L B:-100L has rowIds 0 and 4 expectedValueToRowIds.put(new Pair<>(1L, -100L), new HashSet<Long>(Arrays.asList(new Long[] { 0L, 4L }))); // group with value A:1L B:60L has rowId 5 expectedValueToRowIds.put(new Pair<>(1L, 60L), new HashSet<Long>(Arrays.asList(new Long[] { 5L }))); // group with value A:10L B:-200L has rowId 1 expectedValueToRowIds.put(new Pair<>(10L, -200L), new HashSet<Long>(Arrays.asList(new Long[] { 1L }))); // group with value A:MIN B:MAX has rowId 2 expectedValueToRowIds.put(new Pair<>(Long.MIN_VALUE, Long.MAX_VALUE), new HashSet<Long>(Arrays.asList(new Long[] { 2L }))); // group with value A:-599L B:-300L has rowId 3 expectedValueToRowIds.put(new Pair<>(-599L, -300L), new HashSet<Long>(Arrays.asList(new Long[] { 3L }))); Map<Pair<Long, Long>, Set<Long>> resultValueToRowIdMap = new HashMap<>(); resultFullGroups.entrySet().stream() .map(new Function<Entry<Long, List<Long>>, Pair<Pair<Long, Long>, Set<Long>>>() { @Override public Pair<Pair<Long, Long>, Set<Long>> apply(Entry<Long, List<Long>> t) { long groupId = t.getKey(); long valueA = resolveValueForRowId(columns.get(COL_A), groupId); long valueB = resolveValueForRowId(columns.get(COL_B), groupId); return new Pair<Pair<Long, Long>, Set<Long>>(new Pair<Long, Long>(valueA, valueB), new HashSet<Long>(t.getValue())); } }).forEach(pair -> resultValueToRowIdMap.put(pair.getLeft(), pair.getRight())); Assert.assertEquals(resultValueToRowIdMap, expectedValueToRowIds, "Expected correct grouping of row IDs"); assertDeltaGroupsEqualsFullGroups(); } private void assertDeltaGroupsEqualsFullGroups() { Map<Long, Set<Long>> deltaGroupsJoined = new HashMap<>(); for (Map<Long, List<Long>> delta : resultDeltaGroups) { for (Entry<Long, List<Long>> deltaEntry : delta.entrySet()) { if (!deltaGroupsJoined.containsKey(deltaEntry.getKey())) deltaGroupsJoined.put(deltaEntry.getKey(), new HashSet<>()); deltaGroupsJoined.get(deltaEntry.getKey()).addAll(deltaEntry.getValue()); } } Map<Long, Set<Long>> fullGroupsWithSet = new HashMap<>(); for (Entry<Long, List<Long>> fullGroupEntry : resultFullGroups.entrySet()) fullGroupsWithSet.put(fullGroupEntry.getKey(), new HashSet<>(fullGroupEntry.getValue())); Assert.assertEquals(deltaGroupsJoined, fullGroupsWithSet, "Delta changes should end up being the same as fullGroup results when merged"); } private long resolveValueForRowId(StandardColumnShard column, long rowId) { ColumnPage page = column.getPages().floorEntry(rowId).getValue(); long columPageValueId = page.getValues().get((int) (rowId - page.getFirstRowId())); long columnValueId = page.getColumnPageDict().decompressValue(columPageValueId); long value = ((LongDictionary<?>) column.getColumnShardDictionary()).decompressValue(columnValueId); return value; } private TableShard buildTable(ColumnShardBuilderManager columnShardBuilderManager) { for (String colName : columnShardBuilderManager.getAllColumnsWithValues()) { StandardColumnShard newColumn = columnShardBuilderManager.buildAndFree(colName); columns.put(colName, newColumn); } return tableFactory.createDefaultTableShard("table", columns.values()); } private List<AbstractThreadedExecutablePlanStep> createExecutableSteps(ExecutionEnvironment env, String[] resolveCols, String[] groupByCols) { List<AbstractThreadedExecutablePlanStep> res = new ArrayList<>(); int stepId = 0; RowIdSinkStep rowIdSinkStep = new RowIdSinkStep(stepId++, Mockito.mock(QueryRegistry.class, Mockito.RETURNS_DEEP_STUBS), env); res.add(rowIdSinkStep); GroupStep groupStep = new GroupStep(stepId++, Mockito.mock(QueryRegistry.class, Mockito.RETURNS_DEEP_STUBS), env, Arrays.asList(groupByCols)); groupStep.wireOneInputConsumerToOutputOf(RowIdConsumer.class, rowIdSinkStep); groupStep.addOutputConsumer(new AbstractThreadedGroupConsumer(null) { @Override protected void allSourcesAreDone() { } @Override protected void doConsumeGroups(Map<Long, List<Long>> fullGroups) { resultFullGroups = fullGroups; } }); groupStep.addOutputConsumer(new AbstractThreadedGroupDeltaConsumer(null) { @Override protected void allSourcesAreDone() { } @Override protected void doConsumeGroupDeltas(Map<Long, List<Long>> lastChangedGroups) { resultDeltaGroups.add(new HashMap<Long, List<Long>>(lastChangedGroups)); } }); res.add(groupStep); ResolveValuesStep resolveValuesStep = new ResolveValuesStep(stepId++, Mockito.mock(QueryRegistry.class, Mockito.RETURNS_DEEP_STUBS)); for (String resolveCol : resolveCols) { ResolveColumnDictIdsStep resolveDictIdStep = new ResolveColumnDictIdsStep(stepId++, Mockito.mock(QueryRegistry.class, Mockito.RETURNS_DEEP_STUBS), env, resolveCol); resolveDictIdStep.wireOneInputConsumerToOutputOf(RowIdConsumer.class, groupStep); resolveValuesStep.wireOneInputConsumerToOutputOf(ColumnDictIdConsumer.class, resolveDictIdStep); res.add(resolveDictIdStep); } resolveValuesStep.addOutputConsumer(new AbstractThreadedColumnValueConsumer(null) { @Override protected void allSourcesAreDone() { } @Override protected void doConsume(String colName, Map<Long, Object> values) { if (!resultValues.containsKey(colName)) resultValues.put(colName, new HashMap<Long, Object>()); resultValues.get(colName).putAll(values); } }); res.add(resolveValuesStep); return res; } }