/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.index.creator; import com.linkedin.pinot.common.config.ColumnPartitionConfig; import com.linkedin.pinot.common.config.SegmentPartitionConfig; import com.linkedin.pinot.common.data.DimensionFieldSpec; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.common.data.Schema; import com.linkedin.pinot.common.query.ServerQueryRequest; import com.linkedin.pinot.common.request.BrokerRequest; import com.linkedin.pinot.common.request.FilterOperator; import com.linkedin.pinot.common.request.InstanceRequest; import com.linkedin.pinot.common.segment.ReadMode; import com.linkedin.pinot.common.utils.request.FilterQueryTree; import com.linkedin.pinot.common.utils.request.RequestUtils; import com.linkedin.pinot.core.data.GenericRow; import com.linkedin.pinot.core.data.readers.RecordReader; import com.linkedin.pinot.core.data.readers.TestRecordReader; import com.linkedin.pinot.core.indexsegment.IndexSegment; import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig; import com.linkedin.pinot.core.query.pruner.PartitionSegmentPruner; import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl; import com.linkedin.pinot.core.segment.index.ColumnMetadata; import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl; import com.linkedin.pinot.core.segment.index.loader.Loaders; import com.linkedin.pinot.pql.parsers.Pql2Compiler; import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Random; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.math.IntRange; import org.testng.Assert; import org.testng.annotations.AfterClass; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; /** * Unit test for Segment partitioning: * <ul> * <li> Test to cover segment generation and metadata.</li> * <li> Test to cover segment pruning during query execution. </li> * </ul> */ public class SegmentPartitionTest { private static final String SEGMENT_DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator + "partitionTest"; private static final String TABLE_NAME = "partitionTable"; private static final String SEGMENT_NAME = "partition"; private static final String SEGMENT_PATH = SEGMENT_DIR_NAME + File.separator + SEGMENT_NAME; private static final int NUM_ROWS = 1001; private static final String PARTITIONED_COLUMN_NAME = "partitionedColumn"; private static final String NON_PARTITIONED_COLUMN_NAME = "nonPartitionedColumn"; private static final int NUM_PARTITIONS = 20; // For modulo function private static final int PARTITION_DIVISOR = 5; // Allowed partition values private static final int MAX_PARTITION_VALUE = (PARTITION_DIVISOR - 1); private static final String EXPECTED_PARTITION_VALUE_STRING = "[0 " + MAX_PARTITION_VALUE + "]"; private static final String EXPECTED_PARTITION_FUNCTION = "MoDuLo"; private IndexSegment _segment; @BeforeClass public void init() throws Exception { buildSegment(); } /** * Clean up after test */ @AfterClass public void cleanup() { FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME)); } /** * Unit test: * <ul> * <li> Partitioning metadata is written out correctly for column where all values comply to partition scheme. </li> * <li> Partitioning metadata is dropped for column that does not comply to partitioning scheme. </li> * <li> Partitioning metadata is not written out for column for which the metadata was not specified. </li> * </ul> * @throws Exception */ @Test public void testMetadata() throws Exception { SegmentMetadataImpl metadata = (SegmentMetadataImpl) _segment.getSegmentMetadata(); ColumnMetadata columnMetadata = metadata.getColumnMetadataFor(PARTITIONED_COLUMN_NAME); Assert.assertEquals(columnMetadata.getPartitionFunction().toString().toLowerCase(), EXPECTED_PARTITION_FUNCTION.toLowerCase()); List<IntRange> partitionValues = columnMetadata.getPartitionRanges(); Assert.assertEquals(partitionValues.size(), 1); List<IntRange> expectedPartitionValues = ColumnPartitionConfig.rangesFromString( EXPECTED_PARTITION_VALUE_STRING.split(ColumnPartitionConfig.PARTITION_VALUE_DELIMITER)); IntRange actualValue = partitionValues.get(0); IntRange expectedPartitionValue = expectedPartitionValues.get(0); Assert.assertEquals(actualValue.getMinimumInteger(), expectedPartitionValue.getMinimumInteger()); Assert.assertEquals(actualValue.getMaximumInteger(), expectedPartitionValue.getMaximumInteger()); columnMetadata = metadata.getColumnMetadataFor(NON_PARTITIONED_COLUMN_NAME); Assert.assertNull(columnMetadata.getPartitionFunction()); Assert.assertNull(columnMetadata.getPartitionRanges()); } /** * Unit test for {@link PartitionSegmentPruner}. * <ul> * <li> Generates queries with equality predicate on partitioned column with random values. </li> * <li> Ensures that column values that are in partition range ([0 5]) do not prune the segment, * whereas other values do. </li> * <li> Ensures that predicates on non-partitioned columns do not prune the segment. </li> * </ul> */ @Test public void testPruner() { Pql2Compiler compiler = new Pql2Compiler(); PartitionSegmentPruner pruner = new PartitionSegmentPruner(); Random random = new Random(System.nanoTime()); for (int i = 0; i < 1000; i++) { int columnValue = Math.abs(random.nextInt()); // Test for partitioned column. String query = buildQuery(TABLE_NAME, PARTITIONED_COLUMN_NAME, columnValue); BrokerRequest brokerRequest = compiler.compileToBrokerRequest(query); FilterQueryTree filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest); Assert.assertEquals(pruner.prune(_segment, filterQueryTree), (columnValue % NUM_PARTITIONS > MAX_PARTITION_VALUE), "Failed for column value: " + columnValue); // Test for non partitioned column. query = buildQuery(TABLE_NAME, NON_PARTITIONED_COLUMN_NAME, columnValue); brokerRequest = compiler.compileToBrokerRequest(query); filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest); Assert.assertFalse(pruner.prune(_segment, filterQueryTree)); // Test for AND query: Segment can be pruned out if partitioned column has value outside of range. int partitionColumnValue = Math.abs(random.nextInt()); int nonPartitionColumnValue = random.nextInt(); query = buildAndQuery(TABLE_NAME, PARTITIONED_COLUMN_NAME, partitionColumnValue, NON_PARTITIONED_COLUMN_NAME, nonPartitionColumnValue, FilterOperator.AND); brokerRequest = compiler.compileToBrokerRequest(query); filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest); Assert.assertEquals(pruner.prune(_segment, filterQueryTree), (partitionColumnValue % NUM_PARTITIONS) > MAX_PARTITION_VALUE); // Test for OR query: Segment should never be pruned as there's an OR with non partitioned column. query = buildAndQuery(TABLE_NAME, PARTITIONED_COLUMN_NAME, partitionColumnValue, NON_PARTITIONED_COLUMN_NAME, nonPartitionColumnValue, FilterOperator.OR); brokerRequest = compiler.compileToBrokerRequest(query); filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest); Assert.assertFalse(pruner.prune(_segment, filterQueryTree)); } } /** * Unit test for utility the converts String ranges into IntRanges and back. * <ul> * <li> Generates a list of String ranges</li> * <li> Ensures that conversion to IntRanges is as expected</li> * <li> Ensures that the IntRanges when converted back to String ranges are as expected. </li> * </ul> */ @Test public void testStringRangeConversions() { Random random = new Random(); for (int i = 0; i < 1000; i++) { int numRanges = 1 + random.nextInt(1000); String[] ranges = new String[numRanges]; List<IntRange> expected = new ArrayList<>(numRanges); StringBuilder builder = new StringBuilder(); for (int j = 0; j < numRanges; j++) { int start = random.nextInt(); int end = random.nextInt(); // Generate random ranges such that start <= end. if (start > end) { start ^= end; end = start ^ end; start = start ^ end; } ranges[j] = buildRangeString(start, end); expected.add(new IntRange(start, end)); builder.append(ranges[j]); if (j < numRanges - 1) { builder.append(ColumnPartitionConfig.PARTITION_VALUE_DELIMITER); } } String expectedString = builder.toString(); List<IntRange> actual = ColumnPartitionConfig.rangesFromString(ranges); Assert.assertEquals(actual, expected); String actualString = ColumnPartitionConfig.rangesToString(actual); Assert.assertEquals(actualString, expectedString); } } /** * Unit test for {@link SegmentPartitionConfig} that tests the following: * <ul> * <li> Conversion from/to JSON string. </li> * <li> Function names, values and ranges are as expected. </li> * </ul> * @throws IOException */ @Test public void testSegmentPartitionConfig() throws IOException { Map<String, ColumnPartitionConfig> expectedMap = new HashMap<>(); for (int i = 0; i < 10; i++) { String partitionColumn = "column_" + i; String partitionFunction = "function_" + i; expectedMap.put(partitionColumn, new ColumnPartitionConfig(partitionFunction, i + 1)); } SegmentPartitionConfig expectedConfig = new SegmentPartitionConfig(expectedMap); SegmentPartitionConfig actualConfig = SegmentPartitionConfig.fromJsonString(expectedConfig.toJsonString()); for (Map.Entry<String, ColumnPartitionConfig> entry : actualConfig.getColumnPartitionMap().entrySet()) { String partitionColumn = entry.getKey(); ColumnPartitionConfig expectedColumnConfig = expectedMap.get(partitionColumn); Assert.assertNotNull(expectedColumnConfig); ColumnPartitionConfig actualColumnConfig = entry.getValue(); Assert.assertEquals(actualColumnConfig.getFunctionName(), expectedColumnConfig.getFunctionName()); } // Test that adding new fields does not break json de-serialization. String jsonStringWithNewField = "{\"columnPartitionMap\":{\"column_0\":{\"functionName\":\"function\",\"numPartitions\":10,\"newField\":\"newValue\"}}}"; String jsonStringWithoutNewField = "{\"columnPartitionMap\":{\"column_0\":{\"functionName\":\"function\",\"numPartitions\":10}}}"; Assert.assertEquals(jsonStringWithoutNewField, SegmentPartitionConfig.fromJsonString(jsonStringWithNewField).toJsonString()); } private String buildQuery(String tableName, String columnName, int predicateValue) { return "select count(*) from " + tableName + " where " + columnName + " = " + predicateValue; } private String buildAndQuery(String tableName, String partitionColumn, int partitionedColumnValue, String nonPartitionColumn, int nonPartitionedColumnValue, FilterOperator operator) { return "select count(*) from " + tableName + " where " + partitionColumn + " = " + partitionedColumnValue + " " + operator + " " + nonPartitionColumn + " = " + nonPartitionedColumnValue; } private String buildRangeString(int start, int end) { return "[" + start + " " + end + "]"; } /** * Helper method to build a segment for testing: * <ul> * <li> First column is partitioned correctly as per the specification in the segment generation config. </li> * <li> Second column is not partitioned as per the specification in the segment generation config. </li> * <li> Third column does not have any partitioning config in the segment generation config. </li> * </ul> * @throws Exception */ private void buildSegment() throws Exception { Schema schema = new Schema(); schema.addField(new DimensionFieldSpec(PARTITIONED_COLUMN_NAME, FieldSpec.DataType.INT, true)); schema.addField(new DimensionFieldSpec(NON_PARTITIONED_COLUMN_NAME, FieldSpec.DataType.INT, true)); Random random = new Random(); Map<String, ColumnPartitionConfig> partitionFunctionMap = new HashMap<>(); partitionFunctionMap.put(PARTITIONED_COLUMN_NAME, new ColumnPartitionConfig(EXPECTED_PARTITION_FUNCTION, NUM_PARTITIONS)); SegmentPartitionConfig segmentPartitionConfig = new SegmentPartitionConfig(partitionFunctionMap); SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema); config.setOutDir(SEGMENT_DIR_NAME); config.setSegmentName(SEGMENT_NAME); config.setTableName(TABLE_NAME); config.setSegmentPartitionConfig(segmentPartitionConfig); final List<GenericRow> rows = new ArrayList<>(); for (int row = 0; row < NUM_ROWS; row++) { HashMap<String, Object> map = new HashMap<>(); int validPartitionedValue = random.nextInt(100) * 20 + random.nextInt(PARTITION_DIVISOR); map.put(PARTITIONED_COLUMN_NAME, validPartitionedValue); map.put(NON_PARTITIONED_COLUMN_NAME, validPartitionedValue); GenericRow genericRow = new GenericRow(); genericRow.init(map); rows.add(genericRow); } SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl(); RecordReader reader = new TestRecordReader(rows, schema); driver.init(config, reader); driver.build(); _segment = Loaders.IndexSegment.load(new File(SEGMENT_PATH), ReadMode.mmap); } }