/* * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.facebook.presto.raptor.storage.organization; import com.facebook.presto.PagesIndexPageSorter; import com.facebook.presto.SequencePageBuilder; import com.facebook.presto.operator.PagesIndex; import com.facebook.presto.raptor.metadata.ColumnInfo; import com.facebook.presto.raptor.metadata.ShardInfo; import com.facebook.presto.raptor.storage.OrcStorageManager; import com.facebook.presto.raptor.storage.ReaderAttributes; import com.facebook.presto.raptor.storage.StorageManager; import com.facebook.presto.raptor.storage.StoragePageSink; import com.facebook.presto.spi.ConnectorPageSource; import com.facebook.presto.spi.Page; import com.facebook.presto.spi.PageBuilder; import com.facebook.presto.spi.block.Block; import com.facebook.presto.spi.block.SortOrder; import com.facebook.presto.spi.predicate.TupleDomain; import com.facebook.presto.spi.type.Type; import com.facebook.presto.testing.MaterializedResult; import com.facebook.presto.testing.MaterializedRow; import com.google.common.collect.ImmutableList; import io.airlift.units.DataSize; import org.skife.jdbi.v2.DBI; import org.skife.jdbi.v2.Handle; import org.skife.jdbi.v2.IDBI; import org.testng.annotations.AfterMethod; import org.testng.annotations.BeforeMethod; import org.testng.annotations.Test; import java.io.File; import java.io.IOException; import java.util.List; import java.util.OptionalInt; import java.util.Set; import java.util.UUID; import static com.facebook.presto.raptor.storage.TestOrcStorageManager.createOrcStorageManager; import static com.facebook.presto.spi.block.SortOrder.ASC_NULLS_FIRST; import static com.facebook.presto.spi.type.BigintType.BIGINT; import static com.facebook.presto.spi.type.DateType.DATE; import static com.facebook.presto.spi.type.DoubleType.DOUBLE; import static com.facebook.presto.spi.type.TimestampType.TIMESTAMP; import static com.facebook.presto.spi.type.VarcharType.createVarcharType; import static com.facebook.presto.testing.MaterializedResult.materializeSourceDataStream; import static com.facebook.presto.testing.TestingConnectorSession.SESSION; import static com.facebook.presto.testing.assertions.Assert.assertEquals; import static com.facebook.presto.tests.QueryAssertions.assertEqualsIgnoreOrder; import static com.google.common.io.Files.createTempDir; import static io.airlift.concurrent.MoreFutures.getFutureValue; import static io.airlift.testing.FileUtils.deleteRecursively; import static io.airlift.units.DataSize.Unit.MEGABYTE; import static java.util.Collections.nCopies; import static java.util.stream.Collectors.toList; import static java.util.stream.Collectors.toSet; @Test(singleThreaded = true) public class TestShardCompactor { private static final int MAX_SHARD_ROWS = 1000; private static final PagesIndexPageSorter PAGE_SORTER = new PagesIndexPageSorter(new PagesIndex.TestingFactory()); private static final ReaderAttributes READER_ATTRIBUTES = new ReaderAttributes(new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE), new DataSize(1, MEGABYTE)); private OrcStorageManager storageManager; private ShardCompactor compactor; private File temporary; private Handle dummyHandle; @BeforeMethod public void setup() throws Exception { temporary = createTempDir(); IDBI dbi = new DBI("jdbc:h2:mem:test" + System.nanoTime()); dummyHandle = dbi.open(); storageManager = createOrcStorageManager(dbi, temporary, MAX_SHARD_ROWS); compactor = new ShardCompactor(storageManager, READER_ATTRIBUTES); } @AfterMethod(alwaysRun = true) public void tearDown() throws Exception { if (dummyHandle != null) { dummyHandle.close(); } deleteRecursively(temporary); } @Test public void testShardCompactor() throws Exception { List<Long> columnIds = ImmutableList.of(3L, 7L, 2L, 1L, 5L); List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), DOUBLE, DATE, TIMESTAMP); List<ShardInfo> inputShards = createShards(storageManager, columnIds, columnTypes, 3); assertEquals(inputShards.size(), 3); long totalRows = inputShards.stream() .mapToLong(ShardInfo::getRowCount) .sum(); long expectedOutputShards = computeExpectedOutputShards(totalRows); Set<UUID> inputUuids = inputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); long transactionId = 1; List<ShardInfo> outputShards = compactor.compact(transactionId, OptionalInt.empty(), inputUuids, getColumnInfo(columnIds, columnTypes)); assertEquals(outputShards.size(), expectedOutputShards); Set<UUID> outputUuids = outputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); assertShardEqualsIgnoreOrder(inputUuids, outputUuids, columnIds, columnTypes); } @Test public void testShardCompactorSorted() throws Exception { List<Type> columnTypes = ImmutableList.of(BIGINT, createVarcharType(20), DATE, TIMESTAMP, DOUBLE); List<Long> columnIds = ImmutableList.of(3L, 7L, 2L, 1L, 5L); List<Long> sortColumnIds = ImmutableList.of(1L, 2L, 3L, 5L, 7L); List<SortOrder> sortOrders = nCopies(sortColumnIds.size(), ASC_NULLS_FIRST); List<Integer> sortIndexes = sortColumnIds.stream() .map(columnIds::indexOf) .collect(toList()); List<ShardInfo> inputShards = createSortedShards(storageManager, columnIds, columnTypes, sortIndexes, sortOrders, 2); assertEquals(inputShards.size(), 2); long totalRows = inputShards.stream().mapToLong(ShardInfo::getRowCount).sum(); long expectedOutputShards = computeExpectedOutputShards(totalRows); Set<UUID> inputUuids = inputShards.stream().map(ShardInfo::getShardUuid).collect(toSet()); long transactionId = 1; List<ShardInfo> outputShards = compactor.compactSorted(transactionId, OptionalInt.empty(), inputUuids, getColumnInfo(columnIds, columnTypes), sortColumnIds, sortOrders); List<UUID> outputUuids = outputShards.stream() .map(ShardInfo::getShardUuid) .collect(toList()); assertEquals(outputShards.size(), expectedOutputShards); assertShardEqualsSorted(inputUuids, outputUuids, columnIds, columnTypes, sortIndexes, sortOrders); } private static long computeExpectedOutputShards(long totalRows) { return ((totalRows % MAX_SHARD_ROWS) != 0) ? ((totalRows / MAX_SHARD_ROWS) + 1) : (totalRows / MAX_SHARD_ROWS); } private void assertShardEqualsIgnoreOrder(Set<UUID> inputUuids, Set<UUID> outputUuids, List<Long> columnIds, List<Type> columnTypes) throws IOException { MaterializedResult inputRows = getMaterializedRows(ImmutableList.copyOf(inputUuids), columnIds, columnTypes); MaterializedResult outputRows = getMaterializedRows(ImmutableList.copyOf(outputUuids), columnIds, columnTypes); assertEqualsIgnoreOrder(outputRows, inputRows); } private void assertShardEqualsSorted(Set<UUID> inputUuids, List<UUID> outputUuids, List<Long> columnIds, List<Type> columnTypes, List<Integer> sortIndexes, List<SortOrder> sortOrders) throws IOException { List<Page> inputPages = getPages(inputUuids, columnIds, columnTypes); List<Type> sortTypes = sortIndexes.stream().map(columnTypes::get).collect(toList()); MaterializedResult inputRowsSorted = sortAndMaterialize(inputPages, columnTypes, sortIndexes, sortOrders, sortTypes); MaterializedResult outputRows = extractColumns(getMaterializedRows(outputUuids, columnIds, columnTypes), sortIndexes, sortTypes); assertEquals(outputRows, inputRowsSorted); } private static MaterializedResult extractColumns(MaterializedResult materializedRows, List<Integer> indexes, List<Type> types) { ImmutableList.Builder<MaterializedRow> rows = ImmutableList.builder(); for (MaterializedRow row : materializedRows) { Object[] values = new Object[indexes.size()]; for (int i = 0; i < indexes.size(); i++) { values[i] = row.getField(indexes.get(i)); } rows.add(new MaterializedRow(MaterializedResult.DEFAULT_PRECISION, values)); } return new MaterializedResult(rows.build(), types); } private static MaterializedResult sortAndMaterialize(List<Page> pages, List<Type> columnTypes, List<Integer> sortIndexes, List<SortOrder> sortOrders, List<Type> sortTypes) { long[] orderedAddresses = PAGE_SORTER.sort(columnTypes, pages, sortIndexes, sortOrders, 10_000); PageBuilder pageBuilder = new PageBuilder(columnTypes); for (long orderedAddress : orderedAddresses) { int pageIndex = PAGE_SORTER.decodePageIndex(orderedAddress); int positionIndex = PAGE_SORTER.decodePositionIndex(orderedAddress); Page page = pages.get(pageIndex); pageBuilder.declarePosition(); for (int i = 0; i < columnTypes.size(); i++) { columnTypes.get(i).appendTo(page.getBlock(i), positionIndex, pageBuilder.getBlockBuilder(i)); } } // extract the sortIndexes and reorder the blocks by sort indexes (useful for debugging) Block[] blocks = pageBuilder.build().getBlocks(); Block[] outputBlocks = new Block[blocks.length]; for (int i = 0; i < sortIndexes.size(); i++) { outputBlocks[i] = blocks[sortIndexes.get(i)]; } MaterializedResult.Builder resultBuilder = MaterializedResult.resultBuilder(SESSION, sortTypes); resultBuilder.page(new Page(outputBlocks)); return resultBuilder.build(); } private List<Page> getPages(Set<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException { ImmutableList.Builder<Page> pages = ImmutableList.builder(); for (UUID uuid : uuids) { try (ConnectorPageSource pageSource = getPageSource(columnIds, columnTypes, uuid)) { while (!pageSource.isFinished()) { Page outputPage = pageSource.getNextPage(); if (outputPage == null) { break; } outputPage.assureLoaded(); pages.add(outputPage); } } } return pages.build(); } private MaterializedResult getMaterializedRows(List<UUID> uuids, List<Long> columnIds, List<Type> columnTypes) throws IOException { MaterializedResult.Builder rows = MaterializedResult.resultBuilder(SESSION, columnTypes); for (UUID uuid : uuids) { try (ConnectorPageSource pageSource = getPageSource(columnIds, columnTypes, uuid)) { MaterializedResult result = materializeSourceDataStream(SESSION, pageSource, columnTypes); rows.rows(result.getMaterializedRows()); } } return rows.build(); } private ConnectorPageSource getPageSource(List<Long> columnIds, List<Type> columnTypes, UUID uuid) { return storageManager.getPageSource(uuid, OptionalInt.empty(), columnIds, columnTypes, TupleDomain.all(), READER_ATTRIBUTES); } private static List<ShardInfo> createSortedShards(StorageManager storageManager, List<Long> columnIds, List<Type> columnTypes, List<Integer> sortChannels, List<SortOrder> sortOrders, int shardCount) { StoragePageSink sink = createStoragePageSink(storageManager, columnIds, columnTypes); for (int shardNum = 0; shardNum < shardCount; shardNum++) { createSortedShard(columnTypes, sortChannels, sortOrders, sink); } return getFutureValue(sink.commit()); } private static void createSortedShard(List<Type> columnTypes, List<Integer> sortChannels, List<SortOrder> sortOrders, StoragePageSink sink) { List<Page> pages = createPages(columnTypes); // Sort pages long[] orderedAddresses = PAGE_SORTER.sort(columnTypes, pages, sortChannels, sortOrders, 10_000); int[] orderedPageIndex = new int[orderedAddresses.length]; int[] orderedPositionIndex = new int[orderedAddresses.length]; for (int i = 0; i < orderedAddresses.length; i++) { orderedPageIndex[i] = PAGE_SORTER.decodePageIndex(orderedAddresses[i]); orderedPositionIndex[i] = PAGE_SORTER.decodePositionIndex(orderedAddresses[i]); } // Append sorted pages sink.appendPages(pages, orderedPageIndex, orderedPositionIndex); sink.flush(); } private static List<ShardInfo> createShards(StorageManager storageManager, List<Long> columnIds, List<Type> columnTypes, int shardCount) { StoragePageSink sink = createStoragePageSink(storageManager, columnIds, columnTypes); for (int i = 0; i < shardCount; i++) { sink.appendPages(createPages(columnTypes)); sink.flush(); } return getFutureValue(sink.commit()); } private static StoragePageSink createStoragePageSink(StorageManager manager, List<Long> columnIds, List<Type> columnTypes) { long transactionId = 1; return manager.createStoragePageSink(transactionId, OptionalInt.empty(), columnIds, columnTypes, false); } private static List<Page> createPages(List<Type> columnTypes) { // Creates 10 pages with 10 rows each int rowCount = 10; int pageCount = 10; // some random values to start off the blocks int[][] initialValues = { { 17, 15, 16, 18, 14 }, { 59, 55, 54, 53, 58 } }; ImmutableList.Builder<Page> pages = ImmutableList.builder(); for (int i = 0; i < pageCount; i++) { pages.add(SequencePageBuilder.createSequencePage(columnTypes, rowCount, initialValues[i % 2])); } return pages.build(); } private static List<ColumnInfo> getColumnInfo(List<Long> columnIds, List<Type> columnTypes) { ImmutableList.Builder<ColumnInfo> columnInfos = ImmutableList.builder(); for (int i = 0; i < columnIds.size(); i++) { columnInfos.add(new ColumnInfo(columnIds.get(i), columnTypes.get(i))); } return columnInfos.build(); } }