SegmentPartitionTest.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.segment.index.creator;

import com.linkedin.pinot.common.config.ColumnPartitionConfig;
import com.linkedin.pinot.common.config.SegmentPartitionConfig;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.query.ServerQueryRequest;
import com.linkedin.pinot.common.request.BrokerRequest;
import com.linkedin.pinot.common.request.FilterOperator;
import com.linkedin.pinot.common.request.InstanceRequest;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.request.FilterQueryTree;
import com.linkedin.pinot.common.utils.request.RequestUtils;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.readers.RecordReader;
import com.linkedin.pinot.core.data.readers.TestRecordReader;
import com.linkedin.pinot.core.indexsegment.IndexSegment;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.query.pruner.PartitionSegmentPruner;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.core.segment.index.ColumnMetadata;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.index.loader.Loaders;
import com.linkedin.pinot.pql.parsers.Pql2Compiler;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.math.IntRange;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;


/**
 * Unit test for Segment partitioning:
 * <ul>
 *   <li> Test to cover segment generation and metadata.</li>
 *   <li> Test to cover segment pruning during query execution. </li>
 * </ul>
 */
public class SegmentPartitionTest {
  private static final String SEGMENT_DIR_NAME =
      System.getProperty("java.io.tmpdir") + File.separator + "partitionTest";
  private static final String TABLE_NAME = "partitionTable";
  private static final String SEGMENT_NAME = "partition";
  private static final String SEGMENT_PATH = SEGMENT_DIR_NAME + File.separator + SEGMENT_NAME;

  private static final int NUM_ROWS = 1001;
  private static final String PARTITIONED_COLUMN_NAME = "partitionedColumn";

  private static final String NON_PARTITIONED_COLUMN_NAME = "nonPartitionedColumn";
  private static final int NUM_PARTITIONS = 20; // For modulo function
  private static final int PARTITION_DIVISOR = 5; // Allowed partition values
  private static final int MAX_PARTITION_VALUE = (PARTITION_DIVISOR - 1);
  private static final String EXPECTED_PARTITION_VALUE_STRING = "[0 " + MAX_PARTITION_VALUE + "]";
  private static final String EXPECTED_PARTITION_FUNCTION = "MoDuLo";
  private IndexSegment _segment;

  @BeforeClass
  public void init()
      throws Exception {
    buildSegment();
  }

  /**
   * Clean up after test
   */
  @AfterClass
  public void cleanup() {
    FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
  }

  /**
   * Unit test:
   * <ul>
   *   <li> Partitioning metadata is written out correctly for column where all values comply to partition scheme. </li>
   *   <li> Partitioning metadata is dropped for column that does not comply to partitioning scheme. </li>
   *   <li> Partitioning metadata is not written out for column for which the metadata was not specified. </li>
   * </ul>
   * @throws Exception
   */
  @Test
  public void testMetadata()
      throws Exception {
    SegmentMetadataImpl metadata = (SegmentMetadataImpl) _segment.getSegmentMetadata();
    ColumnMetadata columnMetadata = metadata.getColumnMetadataFor(PARTITIONED_COLUMN_NAME);

    Assert.assertEquals(columnMetadata.getPartitionFunction().toString().toLowerCase(),
        EXPECTED_PARTITION_FUNCTION.toLowerCase());

    List<IntRange> partitionValues = columnMetadata.getPartitionRanges();
    Assert.assertEquals(partitionValues.size(), 1);
    List<IntRange> expectedPartitionValues = ColumnPartitionConfig.rangesFromString(
        EXPECTED_PARTITION_VALUE_STRING.split(ColumnPartitionConfig.PARTITION_VALUE_DELIMITER));

    IntRange actualValue = partitionValues.get(0);
    IntRange expectedPartitionValue = expectedPartitionValues.get(0);
    Assert.assertEquals(actualValue.getMinimumInteger(), expectedPartitionValue.getMinimumInteger());
    Assert.assertEquals(actualValue.getMaximumInteger(), expectedPartitionValue.getMaximumInteger());

    columnMetadata = metadata.getColumnMetadataFor(NON_PARTITIONED_COLUMN_NAME);
    Assert.assertNull(columnMetadata.getPartitionFunction());
    Assert.assertNull(columnMetadata.getPartitionRanges());
  }

  /**
   * Unit test for {@link PartitionSegmentPruner}.
   * <ul>
   *   <li> Generates queries with equality predicate on partitioned column with random values. </li>
   *   <li> Ensures that column values that are in partition range ([0 5]) do not prune the segment,
   *        whereas other values do. </li>
   *   <li> Ensures that predicates on non-partitioned columns do not prune the segment. </li>
   * </ul>
   */
  @Test
  public void testPruner() {
    Pql2Compiler compiler = new Pql2Compiler();
    PartitionSegmentPruner pruner = new PartitionSegmentPruner();

    Random random = new Random(System.nanoTime());
    for (int i = 0; i < 1000; i++) {
      int columnValue = Math.abs(random.nextInt());

      // Test for partitioned column.
      String query = buildQuery(TABLE_NAME, PARTITIONED_COLUMN_NAME, columnValue);
      BrokerRequest brokerRequest = compiler.compileToBrokerRequest(query);
      FilterQueryTree filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest);

      Assert.assertEquals(pruner.prune(_segment, filterQueryTree), (columnValue % NUM_PARTITIONS > MAX_PARTITION_VALUE),
          "Failed for column value: " + columnValue);

      // Test for non partitioned column.
      query = buildQuery(TABLE_NAME, NON_PARTITIONED_COLUMN_NAME, columnValue);
      brokerRequest = compiler.compileToBrokerRequest(query);
      filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest);
      Assert.assertFalse(pruner.prune(_segment, filterQueryTree));

      // Test for AND query: Segment can be pruned out if partitioned column has value outside of range.
      int partitionColumnValue = Math.abs(random.nextInt());
      int nonPartitionColumnValue = random.nextInt();
      query = buildAndQuery(TABLE_NAME, PARTITIONED_COLUMN_NAME, partitionColumnValue, NON_PARTITIONED_COLUMN_NAME,
          nonPartitionColumnValue, FilterOperator.AND);

      brokerRequest = compiler.compileToBrokerRequest(query);
      filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest);
      Assert.assertEquals(pruner.prune(_segment, filterQueryTree),
          (partitionColumnValue % NUM_PARTITIONS) > MAX_PARTITION_VALUE);

      // Test for OR query: Segment should never be pruned as there's an OR with non partitioned column.
      query = buildAndQuery(TABLE_NAME, PARTITIONED_COLUMN_NAME, partitionColumnValue, NON_PARTITIONED_COLUMN_NAME,
          nonPartitionColumnValue, FilterOperator.OR);
      brokerRequest = compiler.compileToBrokerRequest(query);
      filterQueryTree = RequestUtils.generateFilterQueryTree(brokerRequest);
      Assert.assertFalse(pruner.prune(_segment, filterQueryTree));
    }
  }

  /**
   * Unit test for utility the converts String ranges into IntRanges and back.
   * <ul>
   *   <li> Generates a list of String ranges</li>
   *   <li> Ensures that conversion to IntRanges is as expected</li>
   *   <li> Ensures that the IntRanges when converted back to String ranges are as expected. </li>
   * </ul>
   */
  @Test
  public void testStringRangeConversions() {
    Random random = new Random();

    for (int i = 0; i < 1000; i++) {
      int numRanges = 1 + random.nextInt(1000);
      String[] ranges = new String[numRanges];
      List<IntRange> expected = new ArrayList<>(numRanges);
      StringBuilder builder = new StringBuilder();

      for (int j = 0; j < numRanges; j++) {
        int start = random.nextInt();
        int end = random.nextInt();

        // Generate random ranges such that start <= end.
        if (start > end) {
          start ^= end;
          end = start ^ end;
          start = start ^ end;
        }

        ranges[j] = buildRangeString(start, end);
        expected.add(new IntRange(start, end));

        builder.append(ranges[j]);
        if (j < numRanges - 1) {
          builder.append(ColumnPartitionConfig.PARTITION_VALUE_DELIMITER);
        }
      }
      String expectedString = builder.toString();

      List<IntRange> actual = ColumnPartitionConfig.rangesFromString(ranges);
      Assert.assertEquals(actual, expected);

      String actualString = ColumnPartitionConfig.rangesToString(actual);
      Assert.assertEquals(actualString, expectedString);
    }
  }

  /**
   * Unit test for {@link SegmentPartitionConfig} that tests the following:
   * <ul>
   *   <li> Conversion from/to JSON string. </li>
   *   <li> Function names, values and ranges are as expected. </li>
   * </ul>
   * @throws IOException
   */
  @Test
  public void testSegmentPartitionConfig()
      throws IOException {
    Map<String, ColumnPartitionConfig> expectedMap = new HashMap<>();

    for (int i = 0; i < 10; i++) {
      String partitionColumn = "column_" + i;
      String partitionFunction = "function_" + i;
      expectedMap.put(partitionColumn, new ColumnPartitionConfig(partitionFunction, i + 1));
    }

    SegmentPartitionConfig expectedConfig = new SegmentPartitionConfig(expectedMap);
    SegmentPartitionConfig actualConfig = SegmentPartitionConfig.fromJsonString(expectedConfig.toJsonString());

    for (Map.Entry<String, ColumnPartitionConfig> entry : actualConfig.getColumnPartitionMap().entrySet()) {
      String partitionColumn = entry.getKey();

      ColumnPartitionConfig expectedColumnConfig = expectedMap.get(partitionColumn);
      Assert.assertNotNull(expectedColumnConfig);
      ColumnPartitionConfig actualColumnConfig = entry.getValue();

      Assert.assertEquals(actualColumnConfig.getFunctionName(), expectedColumnConfig.getFunctionName());
    }

    // Test that adding new fields does not break json de-serialization.
    String jsonStringWithNewField =
        "{\"columnPartitionMap\":{\"column_0\":{\"functionName\":\"function\",\"numPartitions\":10,\"newField\":\"newValue\"}}}";
    String jsonStringWithoutNewField =
        "{\"columnPartitionMap\":{\"column_0\":{\"functionName\":\"function\",\"numPartitions\":10}}}";

    Assert.assertEquals(jsonStringWithoutNewField,
        SegmentPartitionConfig.fromJsonString(jsonStringWithNewField).toJsonString());
  }

  private String buildQuery(String tableName, String columnName, int predicateValue) {
    return "select count(*) from " + tableName + " where " + columnName + " = " + predicateValue;
  }

  private String buildAndQuery(String tableName, String partitionColumn, int partitionedColumnValue,
      String nonPartitionColumn, int nonPartitionedColumnValue, FilterOperator operator) {
    return "select count(*) from " + tableName + " where " + partitionColumn + " = " + partitionedColumnValue + " "
        + operator + " " + nonPartitionColumn + " = " + nonPartitionedColumnValue;
  }

  private String buildRangeString(int start, int end) {
    return "[" + start + " " + end + "]";
  }

  /**
   * Helper method to build a segment for testing:
   * <ul>
   *   <li> First column is partitioned correctly as per the specification in the segment generation config. </li>
   *   <li> Second column is not partitioned as per the specification in the segment generation config. </li>
   *   <li> Third column does not have any partitioning config in the segment generation config. </li>
   * </ul>
   * @throws Exception
   */
  private void buildSegment()
      throws Exception {
    Schema schema = new Schema();
    schema.addField(new DimensionFieldSpec(PARTITIONED_COLUMN_NAME, FieldSpec.DataType.INT, true));
    schema.addField(new DimensionFieldSpec(NON_PARTITIONED_COLUMN_NAME, FieldSpec.DataType.INT, true));

    Random random = new Random();
    Map<String, ColumnPartitionConfig> partitionFunctionMap = new HashMap<>();

    partitionFunctionMap.put(PARTITIONED_COLUMN_NAME, new ColumnPartitionConfig(EXPECTED_PARTITION_FUNCTION,
        NUM_PARTITIONS));

    SegmentPartitionConfig segmentPartitionConfig = new SegmentPartitionConfig(partitionFunctionMap);
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);

    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);
    config.setTableName(TABLE_NAME);
    config.setSegmentPartitionConfig(segmentPartitionConfig);

    final List<GenericRow> rows = new ArrayList<>();

    for (int row = 0; row < NUM_ROWS; row++) {
      HashMap<String, Object> map = new HashMap<>();

      int validPartitionedValue = random.nextInt(100) * 20 + random.nextInt(PARTITION_DIVISOR);
      map.put(PARTITIONED_COLUMN_NAME, validPartitionedValue);
      map.put(NON_PARTITIONED_COLUMN_NAME, validPartitionedValue);

      GenericRow genericRow = new GenericRow();
      genericRow.init(map);
      rows.add(genericRow);
    }

    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestRecordReader(rows, schema);
    driver.init(config, reader);
    driver.build();
    _segment = Loaders.IndexSegment.load(new File(SEGMENT_PATH), ReadMode.mmap);
  }
}