/** * diqube: Distributed Query Base. * * Copyright (C) 2015 Bastian Gloeckle * * This file is part of diqube. * * diqube is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package org.diqube.data.types.str; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.Map; import java.util.Map.Entry; import java.util.NavigableMap; import java.util.TreeMap; import org.diqube.data.column.ColumnPage; import org.diqube.data.column.ColumnPageFactory; import org.diqube.data.column.ColumnShard; import org.diqube.data.column.StandardColumnShard; import org.diqube.data.serialize.DataDeserializer; import org.diqube.data.serialize.DataSerialization; import org.diqube.data.serialize.DataSerializationManager; import org.diqube.data.serialize.DataSerializer; import org.diqube.data.serialize.DataSerializer.ObjectDoneConsumer; import org.diqube.data.serialize.DeserializationException; import org.diqube.data.serialize.SerializationException; import org.diqube.data.table.DefaultTableShard; import org.diqube.data.table.TableFactory; import org.diqube.data.table.TableShard; import org.diqube.data.types.lng.array.BitEfficientLongArray; import org.diqube.data.types.lng.dict.ArrayCompressedLongDictionary; import org.diqube.data.types.str.dict.ConstantStringDictionary; import org.diqube.data.types.str.dict.ParentNode; import org.diqube.data.types.str.dict.StringDictionary; import org.diqube.data.types.str.dict.TrieStringDictionary; import org.diqube.util.Pair; import org.springframework.context.annotation.AnnotationConfigApplicationContext; import org.testng.Assert; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; /** * Test which serializes and deserializes string columns. * * @author Bastian Gloeckle */ public class StringColumnSerializationTest { private static final String TABLE = "Test"; private static final String COL = "col"; private static final ObjectDoneConsumer NOOP = (a) -> { }; private AnnotationConfigApplicationContext dataContext; private DataSerializationManager serializationManager; private StringColumnShardFactory stringColumnShardFactory; private ColumnPageFactory columnPageFactory; private TableFactory tableFactory; @BeforeMethod public void before() { dataContext = new AnnotationConfigApplicationContext(); dataContext.scan("org.diqube"); dataContext.refresh(); serializationManager = dataContext.getBean(DataSerializationManager.class); stringColumnShardFactory = dataContext.getBean(StringColumnShardFactory.class); columnPageFactory = dataContext.getBean(ColumnPageFactory.class); tableFactory = dataContext.getBean(TableFactory.class); } @AfterMethod public void after() { dataContext.close(); } @Test public void testSimple() throws SerializationException, DeserializationException { // GIVEN @SuppressWarnings("unchecked") Pair<TableShard, Integer> p = createTableShard(1, createTrieDict( // TrieTestUtil.parent( // new Pair<>("abc", TrieTestUtil.terminal(0L)), // new Pair<>("xyz", TrieTestUtil.terminal(1L))), "abc", "xyz", 1L)); // WHEN serialize & deserialze ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DataSerializer serializer = serializationManager.createSerializer(); Map<Long, String> valuesBefore = getAllValues(p.getLeft(), p.getRight()); serializer.serialize(p.getLeft(), outStream, NOOP); DataDeserializer deserializer = serializationManager.createDeserializer(); TableShard deserialized = (TableShard) ((DataSerialization<?>) deserializer.deserialize(DefaultTableShard.class, new ByteArrayInputStream(outStream.toByteArray()))); // THEN Map<Long, String> valuesAfter = getAllValues(deserialized, p.getRight()); Assert.assertEquals(valuesAfter, valuesBefore, "Expected column to contain the same values after deserializing"); } @Test public void testTwoLevel() throws SerializationException, DeserializationException { // GIVEN @SuppressWarnings("unchecked") Pair<TableShard, Integer> p = createTableShard(1, createTrieDict( // TrieTestUtil.parent( // new Pair<>("a", TrieTestUtil.parent( // new Pair<>("bc", TrieTestUtil.terminal(0L)), // new Pair<>("cd", TrieTestUtil.terminal(1L)))), // new Pair<>("xyz", TrieTestUtil.terminal(2L))), "abc", "xyz", 1L)); // WHEN serialize & deserialze ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DataSerializer serializer = serializationManager.createSerializer(); Map<Long, String> valuesBefore = getAllValues(p.getLeft(), p.getRight()); serializer.serialize(p.getLeft(), outStream, NOOP); DataDeserializer deserializer = serializationManager.createDeserializer(); TableShard deserialized = (TableShard) ((DataSerialization<?>) deserializer.deserialize(DefaultTableShard.class, new ByteArrayInputStream(outStream.toByteArray()))); // THEN Map<Long, String> valuesAfter = getAllValues(deserialized, p.getRight()); Assert.assertEquals(valuesAfter, valuesBefore, "Expected column to contain the same values after deserializing"); } @Test public void testConstantDict() throws SerializationException, DeserializationException { // GIVEN Pair<TableShard, Integer> p = createTableShard(2, new ConstantStringDictionary("hello")); // WHEN serialize & deserialze ByteArrayOutputStream outStream = new ByteArrayOutputStream(); DataSerializer serializer = serializationManager.createSerializer(); Map<Long, String> valuesBefore = getAllValues(p.getLeft(), p.getRight()); serializer.serialize(p.getLeft(), outStream, NOOP); DataDeserializer deserializer = serializationManager.createDeserializer(); TableShard deserialized = (TableShard) ((DataSerialization<?>) deserializer.deserialize(DefaultTableShard.class, new ByteArrayInputStream(outStream.toByteArray()))); // THEN Map<Long, String> valuesAfter = getAllValues(deserialized, p.getRight()); Assert.assertEquals(valuesAfter, valuesBefore, "Expected column to contain the same values after deserializing"); } private Map<Long, String> getAllValues(TableShard tableShard, long numberOfRowIds) { StandardColumnShard shard = tableShard.getColumns().get(COL); Map<Long, String> res = new HashMap<>(); for (long rowId = 0; rowId < numberOfRowIds; rowId++) { Entry<Long, ColumnPage> pageEntry = shard.getPages().floorEntry(rowId); long colPageId = pageEntry.getValue().getValues().get((int) (rowId - pageEntry.getKey())); long colShardId = pageEntry.getValue().getColumnPageDict().decompressValue(colPageId); String value = (String) shard.getColumnShardDictionary().decompressValue(colShardId); res.put(rowId, value); } return res; } /** * Create a testable {@link TableShard}. * * @param numberOfColPages * Number of {@link ColumnPage}s in the {@link ColumnShard} that is created. * @param columnDict * the Column shard dict to use. * @return pair of new {@link TableShard} and the number of rows that have values in the tableShard. */ private Pair<TableShard, Integer> createTableShard(int numberOfColPages, StringDictionary<?> columnDict) { NavigableMap<Long, ColumnPage> colPages = new TreeMap<>(); int numberOfRowsPerShard; if (columnDict.getMaxId() != null) { numberOfRowsPerShard = (columnDict.getMaxId().intValue() + 1) / numberOfColPages; long nextId = 0L; for (int i = 0; i < numberOfColPages; i++) { long[] pageDictArrayPlain = new long[numberOfRowsPerShard]; long[] pageValueArrayPlain = new long[numberOfRowsPerShard]; for (int j = 0; j < numberOfRowsPerShard; j++) { pageValueArrayPlain[j] = numberOfRowsPerShard - 1 - j; pageDictArrayPlain[j] = nextId++; } BitEfficientLongArray pageDictArray = new BitEfficientLongArray(pageDictArrayPlain, true); BitEfficientLongArray pageValueArray = new BitEfficientLongArray(pageValueArrayPlain, false); ColumnPage page = columnPageFactory.createDefaultColumnPage(new ArrayCompressedLongDictionary(pageDictArray), pageValueArray, i * numberOfRowsPerShard, COL + "#" + (nextId - numberOfRowsPerShard)); colPages.put(nextId - numberOfRowsPerShard, page); } } else numberOfRowsPerShard = 0; StringStandardColumnShard col = stringColumnShardFactory.createStandardStringColumnShard(COL, colPages, columnDict); TableShard ts = tableFactory.createDefaultTableShard(TABLE, new ArrayList<>(Arrays.asList(col))); return new Pair<>(ts, numberOfRowsPerShard * colPages.size()); } private TrieStringDictionary createTrieDict(ParentNode parent, String firstValue, String lastValue, long lastId) { return new TrieStringDictionary(parent, firstValue, lastValue, lastId); } }