/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.flatten; import java.nio.charset.Charset; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import java.util.SortedMap; import java.util.SortedSet; import java.util.TreeMap; import java.util.TreeSet; import java.util.UUID; import java.util.stream.LongStream; import org.diqube.context.Profiles; import org.diqube.data.column.ColumnShard; import org.diqube.data.column.ColumnType; import org.diqube.data.flatten.FlattenedTable; import org.diqube.data.table.Table; import org.diqube.data.table.TableFactory; import org.diqube.data.table.TableShard; import org.diqube.data.types.lng.LongColumnShard; import org.diqube.executionenv.querystats.QueryableLongColumnShardFacade; import org.diqube.loader.JsonLoader; import org.diqube.loader.LoadException; import org.diqube.loader.LoaderColumnInfo; import org.diqube.util.BigByteBuffer; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.context.annotation.AnnotationConfigApplicationContext; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; /** * Tests {@link Flattener}. * * @author Bastian Gloeckle */ public class FlattenerTest { private static final Logger logger = LoggerFactory.getLogger(FlattenerTest.class); private static final String TABLE = "table"; private static final Comparator<SortedMap<String, Long>> MAP_COMPARATOR = new Comparator<SortedMap<String, Long>>() { @Override public int compare(SortedMap<String, Long> o1, SortedMap<String, Long> o2) { Iterator<Entry<String, Long>> i1 = o1.entrySet().iterator(); Iterator<Entry<String, Long>> i2 = o2.entrySet().iterator(); while (i1.hasNext()) { if (!i2.hasNext()) return 1; // i2 is shorter Entry<String, Long> e1 = i1.next(); Entry<String, Long> e2 = i2.next(); if (e1.getKey().compareTo(e2.getKey()) != 0) return e1.getKey().compareTo(e2.getKey()); if (e1.getValue().compareTo(e2.getValue()) != 0) return e1.getValue().compareTo(e2.getValue()); } if (i2.hasNext()) return -1; // i1 is shorter return 0; } }; private AnnotationConfigApplicationContext dataContext; private JsonLoader loader; private TableFactory tableFactory; private Flattener flattener; @BeforeMethod public void before() { dataContext = new AnnotationConfigApplicationContext(); dataContext.getEnvironment().setActiveProfiles(Profiles.UNIT_TEST); dataContext.scan("org.diqube"); dataContext.refresh(); loader = dataContext.getBean(JsonLoader.class); tableFactory = dataContext.getBean(TableFactory.class); flattener = dataContext.getBean(Flattener.class); } @AfterMethod public void after() { dataContext.close(); } @Test public void simpleTest() throws LoadException { String json = "[ { " // + "\"a\": [ "// /* */ + "{ \"b\": 1, \"d\":[99, 100] }, "// /* */ + "{ \"b\": 2, \"d\":[] }"// + "]" + // ",\"c\" : [ 9, 10 ] }," // // + "{ " // + "\"a\": [ "// /* */ + "{ \"b\": 3, \"d\":[300,301,302] }, "// /* */ + "{ \"b\": 4, \"d\":[303,304,305] }, "// /* */ + "{ \"b\": 5, \"d\":[306,307,308] } "// + "]" + ",\"c\" : [ 0 ]}" + " ]"; // GIVEN Table t = loadFromJson(json); // WHEN FlattenedTable flattenedTable = flattener.flattenTable(t, null, "a[*]", UUID.randomUUID()); // THEN Assert.assertEquals(flattenedTable.getShards().size(), 1, "Expected correct table shard count"); TableShard tableShard = flattenedTable.getShards().iterator().next(); Assert.assertEquals(tableShard.getLowestRowId(), 0L, "Expected correct lowest row ID"); Assert.assertEquals(tableShard.getNumberOfRowsInShard(), 5, "Expected correct number of rows."); Assert.assertEquals(tableShard.getColumns().keySet(), new HashSet<>(Arrays.asList("a.b", "a.d[0]", "a.d[1]", "a.d[2]", "a.d[length]", "c[0]", "c[1]", "c[length]")), "Expected correct columns."); SortedSet<SortedMap<String, Long>> expectedRows = new TreeSet<>(MAP_COMPARATOR); SortedMap<String, Long> row = new TreeMap<>(); row.put("a.b", 1L); row.put("a.d[0]", 99L); row.put("a.d[1]", 100L); row.put("a.d[2]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[length]", 2L); row.put("c[0]", 9L); row.put("c[1]", 10L); row.put("c[length]", 2L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 2L); row.put("a.d[0]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[2]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[length]", 0L); row.put("c[0]", 9L); row.put("c[1]", 10L); row.put("c[length]", 2L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 3L); row.put("a.d[0]", 300L); row.put("a.d[1]", 301L); row.put("a.d[2]", 302L); row.put("a.d[length]", 3L); row.put("c[0]", 0L); row.put("c[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("c[length]", 1L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 4L); row.put("a.d[0]", 303L); row.put("a.d[1]", 304L); row.put("a.d[2]", 305L); row.put("a.d[length]", 3L); row.put("c[0]", 0L); row.put("c[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("c[length]", 1L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 5L); row.put("a.d[0]", 306L); row.put("a.d[1]", 307L); row.put("a.d[2]", 308L); row.put("a.d[length]", 3L); row.put("c[0]", 0L); row.put("c[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("c[length]", 1L); expectedRows.add(row); Assert.assertEquals(getAllRows(tableShard), expectedRows, "Expected to have correct rows."); } @Test public void repeatedMiddleMissingTest() throws LoadException { // GIVEN Table t = loadFromJson("[ { " // + "\"a\": [ "// /* */ + "{ \"b\": [ " // /* */ + "{ \"c\": 1 }," // /* */ + "{ \"d\": 1 }," // /* */ + "{ \"c\": 3 }" // /* */ + "] }, "// /* */ + "{ \"b\": [ " // /* */ + "{ \"c\": 2 }," // /* */ + "{ \"d\": 2 }," // /* */ + "{ \"c\": 4 }" // /* */ + "] } "// + "]" + // "}," // + "{ " // + "\"a\": [ "// /* */ + "{ \"b\": [ " // /* */ + "{ \"c\": 5 }," // /* */ + "{ \"d\": 5 }," // /* */ + "{ \"c\": 6 }" // /* */ + "] }, "// /* */ + "{ \"b\": [ " // /* */ + "{ \"c\": 7 }," // /* */ + "{ \"d\": 7 }," // /* */ + "{ \"c\": 8 }" // /* */ + "] } "// + "]" + // "}" + " ]"); // WHEN FlattenedTable flattenedTable = flattener.flattenTable(t, null, "a[*].b[*]", UUID.randomUUID()); // THEN Assert.assertEquals(flattenedTable.getShards().size(), 1, "Expected correct table shard count"); TableShard tableShard = flattenedTable.getShards().iterator().next(); Assert.assertEquals(tableShard.getLowestRowId(), 0L, "Expected correct lowest row ID"); Assert.assertEquals(tableShard.getNumberOfRowsInShard(), 12, "Expected correct number of rows."); Assert.assertEquals(tableShard.getColumns().keySet(), new HashSet<>(Arrays.asList("a.b.c", "a.b.d")), "Expected correct columns."); SortedSet<SortedMap<String, Long>> expectedRows = new TreeSet<>(MAP_COMPARATOR); SortedMap<String, Long> row = new TreeMap<>(); row.put("a.b.c", 1L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", LoaderColumnInfo.DEFAULT_LONG); row.put("a.b.d", 1L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 3L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 2L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", LoaderColumnInfo.DEFAULT_LONG); row.put("a.b.d", 2L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 4L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 5L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", LoaderColumnInfo.DEFAULT_LONG); row.put("a.b.d", 5L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 6L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 7L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", LoaderColumnInfo.DEFAULT_LONG); row.put("a.b.d", 7L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b.c", 8L); row.put("a.b.d", LoaderColumnInfo.DEFAULT_LONG); expectedRows.add(row); Assert.assertEquals(getAllRows(tableShard), expectedRows, "Expected to have correct rows."); } @Test public void simpleTestHighFirstRowId() throws LoadException { String json = "[ { " // + "\"a\": [ "// /* */ + "{ \"b\": 1, \"d\":[99, 100] }, "// /* */ + "{ \"b\": 2, \"d\":[] }"// + "]" + // ",\"c\" : [ 9, 10 ] }," // // + "{ " // + "\"a\": [ "// /* */ + "{ \"b\": 3, \"d\":[300,301,302] }, "// /* */ + "{ \"b\": 4, \"d\":[303,304,305] }, "// /* */ + "{ \"b\": 5, \"d\":[306,307,308] } "// + "]" + ",\"c\" : [ 0 ]}" + " ]"; // GIVEN Table t = loadFromJson(100, json); // WHEN FlattenedTable flattenedTable = flattener.flattenTable(t, null, "a[*]", UUID.randomUUID()); // THEN Assert.assertEquals(flattenedTable.getShards().size(), 1, "Expected correct table shard count"); TableShard tableShard = flattenedTable.getShards().iterator().next(); Assert.assertEquals(tableShard.getLowestRowId(), 100L, "Expected correct lowest row ID"); /* same as source table */ Assert.assertEquals(tableShard.getNumberOfRowsInShard(), 5, "Expected correct number of rows."); Assert.assertEquals(tableShard.getColumns().keySet(), new HashSet<>(Arrays.asList("a.b", "a.d[0]", "a.d[1]", "a.d[2]", "a.d[length]", "c[0]", "c[1]", "c[length]")), "Expected correct columns."); SortedSet<SortedMap<String, Long>> expectedRows = new TreeSet<>(MAP_COMPARATOR); SortedMap<String, Long> row = new TreeMap<>(); row.put("a.b", 1L); row.put("a.d[0]", 99L); row.put("a.d[1]", 100L); row.put("a.d[2]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[length]", 2L); row.put("c[0]", 9L); row.put("c[1]", 10L); row.put("c[length]", 2L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 2L); row.put("a.d[0]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[2]", LoaderColumnInfo.DEFAULT_LONG); row.put("a.d[length]", 0L); row.put("c[0]", 9L); row.put("c[1]", 10L); row.put("c[length]", 2L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 3L); row.put("a.d[0]", 300L); row.put("a.d[1]", 301L); row.put("a.d[2]", 302L); row.put("a.d[length]", 3L); row.put("c[0]", 0L); row.put("c[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("c[length]", 1L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 4L); row.put("a.d[0]", 303L); row.put("a.d[1]", 304L); row.put("a.d[2]", 305L); row.put("a.d[length]", 3L); row.put("c[0]", 0L); row.put("c[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("c[length]", 1L); expectedRows.add(row); row = new TreeMap<>(); row.put("a.b", 5L); row.put("a.d[0]", 306L); row.put("a.d[1]", 307L); row.put("a.d[2]", 308L); row.put("a.d[length]", 3L); row.put("c[0]", 0L); row.put("c[1]", LoaderColumnInfo.DEFAULT_LONG); row.put("c[length]", 1L); expectedRows.add(row); Assert.assertEquals(getAllRows(tableShard), expectedRows, "Expected to have correct rows."); } @Test public void simpleTestForceHalfRemoval() throws LoadException { // Test which contains a specific repetition index in approx half of the rows -> should trigger a specific ColPage // to be built. // a[0] is contained in every row, a[1] only in 5 of 9. String json = "[ { " // + "\"a\": [ "// /* */ + "{ \"b\": 1 }, "// /* */ + "{ \"b\": 2 }"// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 3 }, "// /* */ + "{ \"b\": 4 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 5 }, "// /* */ + "{ \"b\": 6 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 7 }, "// /* */ + "{ \"b\": 8 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 9 }, "// /* */ + "{ \"b\": 10 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 11 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 12 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 13 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 14 } "// + "]}" // // + " ]"; // GIVEN Table t = loadFromJson(100, json); // WHEN FlattenedTable flattenedTable = flattener.flattenTable(t, null, "a[*]", UUID.randomUUID()); // THEN Assert.assertEquals(flattenedTable.getShards().size(), 1, "Expected correct table shard count"); TableShard tableShard = flattenedTable.getShards().iterator().next(); Assert.assertEquals(tableShard.getLowestRowId(), 100L, "Expected correct lowest row ID"); /* same as source table */ Assert.assertEquals(tableShard.getNumberOfRowsInShard(), 14, "Expected correct number of rows."); Assert.assertEquals(tableShard.getColumns().keySet(), new HashSet<>(Arrays.asList("a.b")), "Expected correct columns."); SortedSet<SortedMap<String, Long>> expectedRows = new TreeSet<>(MAP_COMPARATOR); SortedMap<String, Long> row; for (long l = 1L; l <= 14; l++) { row = new TreeMap<>(); row.put("a.b", l); expectedRows.add(row); } Assert.assertEquals(getAllRows(tableShard), expectedRows, "Expected to have correct rows."); } @Test public void simpleTestForceLittleRemoval() throws LoadException { // Test which contains a specific repetition index in most of the rows -> should trigger a specific ColPage // to be built. // a[0] is contained in every row, a[1] only in 1 of 9. String json = "[ { " // + "\"a\": [ "// /* */ + "{ \"b\": 1 }, "// /* */ + "{ \"b\": 2 }"// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 3 }, "// /* */ + "{ \"b\": 4 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 5 }, "// /* */ + "{ \"b\": 6 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 7 }, "// /* */ + "{ \"b\": 8 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 9 }, "// /* */ + "{ \"b\": 10 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 11 }, "// /* */ + "{ \"b\": 12 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 13 }, "// /* */ + "{ \"b\": 14 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 15 }, "// /* */ + "{ \"b\": 16 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 17 } "// + "]}" // // + " ]"; // GIVEN Table t = loadFromJson(100, json); // WHEN FlattenedTable flattenedTable = flattener.flattenTable(t, null, "a[*]", UUID.randomUUID()); // THEN Assert.assertEquals(flattenedTable.getShards().size(), 1, "Expected correct table shard count"); TableShard tableShard = flattenedTable.getShards().iterator().next(); Assert.assertEquals(tableShard.getLowestRowId(), 100L, "Expected correct lowest row ID"); /* same as source table */ Assert.assertEquals(tableShard.getNumberOfRowsInShard(), 17, "Expected correct number of rows."); Assert.assertEquals(tableShard.getColumns().keySet(), new HashSet<>(Arrays.asList("a.b")), "Expected correct columns."); SortedSet<SortedMap<String, Long>> expectedRows = new TreeSet<>(MAP_COMPARATOR); SortedMap<String, Long> row; for (long l = 1L; l <= 17; l++) { row = new TreeMap<>(); row.put("a.b", l); expectedRows.add(row); } Assert.assertEquals(getAllRows(tableShard), expectedRows, "Expected to have correct rows."); } @Test public void simpleTestForceMostRemoval() throws LoadException { // Test which contains a specific repetition index in only a small number of the rows -> should trigger a specific // ColPage to be built. // a[0] is contained in every row, a[1] only in 1 of 9. String json = "[ { " // + "\"a\": [ "// /* */ + "{ \"b\": 1 }, "// /* */ + "{ \"b\": 2 }"// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 3 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 4 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 5 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 6 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 7 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 8 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 9 } "// + "]}," // + "{ \"a\": [ "// /* */ + "{ \"b\": 10 } "// + "]}" // // + " ]"; // GIVEN Table t = loadFromJson(100, json); // WHEN FlattenedTable flattenedTable = flattener.flattenTable(t, null, "a[*]", UUID.randomUUID()); // THEN Assert.assertEquals(flattenedTable.getShards().size(), 1, "Expected correct table shard count"); TableShard tableShard = flattenedTable.getShards().iterator().next(); Assert.assertEquals(tableShard.getLowestRowId(), 100L, "Expected correct lowest row ID"); /* same as source table */ Assert.assertEquals(tableShard.getNumberOfRowsInShard(), 10, "Expected correct number of rows."); Assert.assertEquals(tableShard.getColumns().keySet(), new HashSet<>(Arrays.asList("a.b")), "Expected correct columns."); SortedSet<SortedMap<String, Long>> expectedRows = new TreeSet<>(MAP_COMPARATOR); SortedMap<String, Long> row; for (long l = 1L; l <= 10; l++) { row = new TreeMap<>(); row.put("a.b", l); expectedRows.add(row); } Assert.assertEquals(getAllRows(tableShard), expectedRows, "Expected to have correct rows."); } private Table loadFromJson(String json) throws LoadException { return loadFromJson(0, json); } private Table loadFromJson(long firstRowId, String json) throws LoadException { BigByteBuffer jsonBuffer = new BigByteBuffer(json.getBytes(Charset.forName("UTF-8"))); TableShard shard = loader.load(firstRowId, jsonBuffer, TABLE, new LoaderColumnInfo(ColumnType.LONG)).iterator().next(); return tableFactory.createDefaultTable(TABLE, Arrays.asList(shard)); } private SortedSet<SortedMap<String, Long>> getAllRows(TableShard tableShard) { Map<Long, SortedMap<String, Long>> rows = new HashMap<>(); LongStream.range(tableShard.getLowestRowId(), tableShard.getLowestRowId() + tableShard.getNumberOfRowsInShard()) .forEach(rowId -> rows.put(rowId, new TreeMap<>())); for (String colName : tableShard.getColumns().keySet()) { ColumnShard col = tableShard.getColumns().get(colName); QueryableLongColumnShardFacade queryableCol = new QueryableLongColumnShardFacade((LongColumnShard) col); Map<Long, Long> valueIdsByRow = queryableCol.resolveColumnValueIdsForRows(rows.keySet()); for (Entry<Long, Long> e : valueIdsByRow.entrySet()) { rows.get(e.getKey()).put(colName, queryableCol.getColumnShardDictionary().decompressValue(e.getValue())); } } SortedSet<SortedMap<String, Long>> res = new TreeSet<>(MAP_COMPARATOR); res.addAll(rows.values()); return res; } }