RawIndexCreatorTest.java example

Explorer
pinot-master
/**
 * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *         http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.linkedin.pinot.core.segment.index.creator;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.StringUtil;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.readers.RecordReader;
import com.linkedin.pinot.core.data.readers.TestRecordReader;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.io.compression.ChunkCompressorFactory;
import com.linkedin.pinot.core.io.compression.ChunkDecompressor;
import com.linkedin.pinot.core.io.reader.impl.ChunkReaderContext;
import com.linkedin.pinot.core.io.reader.impl.v1.FixedByteChunkSingleValueReader;
import com.linkedin.pinot.core.io.reader.impl.v1.VarByteChunkSingleValueReader;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.core.segment.memory.PinotDataBuffer;
import com.linkedin.pinot.core.segment.store.ColumnIndexType;
import com.linkedin.pinot.core.segment.store.SegmentDirectory;


/**
 * Class for testing Raw index creators.
 */
public class RawIndexCreatorTest {

  private static final int NUM_ROWS = 10009;
  private static final int MAX_STRING_LENGTH = 101;
  private static final int MAX_STRING_LENGTH_IN_BYTES = MAX_STRING_LENGTH / 4; // margin for UTF-8 chars.

  private static final String SEGMENT_DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator + "fwdIndexTest";
  private static final String SEGMENT_NAME = "testSegment";

  private static final String INT_COLUMN = "intColumn";
  private static final String LONG_COLUMN = "longColumn";
  private static final String FLOAT_COLUMN = "floatColumn";
  private static final String DOUBLE_COLUMN = "doubleColumn";
  private static final String STRING_COLUMN = "stringColumn";

  Random _random;
  private RecordReader _recordReader;
  SegmentDirectory _segmentDirectory;
  private SegmentDirectory.Reader _segmentReader;

  /**
   * Setup to build a segment with raw indexes (no-dictionary) of various data types.
   *
   * @throws Exception
   */
  @BeforeClass
  public void setup()
      throws Exception {

    Schema schema = new Schema();
    schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true));
    schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true));
    schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true));
    schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true));
    schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true));

    _random = new Random(System.nanoTime());
    _recordReader = buildIndex(schema);
  }

  /**
   * Clean up after test
   */
  @AfterClass
  public void cleanup() {
    FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
  }

  /**
   * Test for int raw index creator.
   * Compares values read from the raw index against expected value.
   * @throws Exception
   */
  @Test
  public void testIntRawIndexCreator()
      throws Exception {
    testFixedLengthRawIndexCreator(INT_COLUMN, FieldSpec.DataType.INT);
  }

  /**
   * Test for long raw index creator.
   * Compares values read from the raw index against expected value.
   * @throws Exception
   */
  @Test
  public void testLongRawIndexCreator()
      throws Exception {
    testFixedLengthRawIndexCreator(LONG_COLUMN, FieldSpec.DataType.LONG);
  }

  /**
   * Test for float raw index creator.
   * Compares values read from the raw index against expected value.
   * @throws Exception
   */
  @Test
  public void testFloatRawIndexCreator()
      throws Exception {
    testFixedLengthRawIndexCreator(FLOAT_COLUMN, FieldSpec.DataType.FLOAT);
  }

  /**
   * Test for double raw index creator.
   * Compares values read from the raw index against expected value.
   * @throws Exception
   */
  @Test
  public void testDoubleRawIndexCreator()
      throws Exception {
    testFixedLengthRawIndexCreator(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE);
  }

  /**
   * Test for string raw index creator.
   * Compares values read from the raw index against expected value.
   * @throws Exception
   */
  @Test
  public void testStringRawIndexCreator()
      throws Exception {
    PinotDataBuffer indexBuffer = getIndexBufferForColumn(STRING_COLUMN);

    ChunkDecompressor uncompressor = ChunkCompressorFactory.getDecompressor("snappy");
    VarByteChunkSingleValueReader rawIndexReader = new VarByteChunkSingleValueReader(indexBuffer, uncompressor);

    _recordReader.rewind();
    ChunkReaderContext context = rawIndexReader.createContext();
    for (int row = 0; row < NUM_ROWS; row++) {
      GenericRow expectedRow = _recordReader.next();
      Object expected = expectedRow.getValue(STRING_COLUMN);
      Object actual = rawIndexReader.getString(row, context);
      Assert.assertEquals(actual, expected);
    }
  }

  /**
   * Helper method to perform actual tests for a given column.
   *
   * @param column Column for which to perform the test
   * @param dataType Data type of the column
   * @throws Exception
   */
  private void testFixedLengthRawIndexCreator(String column, FieldSpec.DataType dataType)
      throws Exception {
    PinotDataBuffer indexBuffer = getIndexBufferForColumn(column);

    FixedByteChunkSingleValueReader rawIndexReader = new FixedByteChunkSingleValueReader(indexBuffer,
        ChunkCompressorFactory.getDecompressor("snappy"));

    _recordReader.rewind();
    for (int row = 0; row < NUM_ROWS; row++) {
      GenericRow expectedRow = _recordReader.next();
      Object expected = expectedRow.getValue(column);

      Object actual;
      actual = readValueFromIndex(rawIndexReader, dataType, row);
      Assert.assertEquals(actual, expected);
    }
  }

  /**
   * Helper method that returns index file name for a given column name.
   *
   * @param column Column name for which to get the index file name
   * @return Name of index file for the given column name
   */
  private PinotDataBuffer getIndexBufferForColumn(String column)
      throws IOException {
    return _segmentReader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
  }

  /**
   * Helper method to build a segment containing a single valued string column with RAW (no-dictionary) index.
   *
   * @return Array of string values for the rows in the generated index.
   * @throws Exception
   */
  private RecordReader buildIndex(Schema schema)
      throws Exception {
    SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
    config.setRawIndexCreationColumns(schema.getDimensionNames());

    config.setOutDir(SEGMENT_DIR_NAME);
    config.setSegmentName(SEGMENT_NAME);

    final List<GenericRow> rows = new ArrayList<>();
    for (int row = 0; row < NUM_ROWS; row++) {
      HashMap<String, Object> map = new HashMap<>();

      for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
        Object value;

        value = getRandomValue(_random, fieldSpec.getDataType());
        map.put(fieldSpec.getName(), value);
      }

      GenericRow genericRow = new GenericRow();
      genericRow.init(map);
      rows.add(genericRow);
    }

    SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
    RecordReader reader = new TestRecordReader(rows, schema);
    driver.init(config, reader);
    driver.build();
    _segmentDirectory = SegmentDirectory.createFromLocalFS(driver.getOutputDirectory(), ReadMode.mmap);
    _segmentReader = _segmentDirectory.createReader();
    reader.rewind();
    return reader;
  }

  /**
   * Helper method that generates a random value for a given data type
   *
   * @param dataType Data type for which to generate the random value
   * @return Random value for the data type.
   */
  public static Object getRandomValue(Random random, FieldSpec.DataType dataType) {
    Object value;
    switch (dataType) {
      case INT:
        value = random.nextInt();
        break;

      case LONG:
        value = random.nextLong();
        break;

      case FLOAT:
        value = random.nextFloat();
        break;

      case DOUBLE:
        value = random.nextDouble();
        break;

      case STRING:
        value = StringUtil.trimTrailingNulls(RandomStringUtils.random(random.nextInt(MAX_STRING_LENGTH_IN_BYTES)));
        break;

      default:
        throw new IllegalArgumentException("Illegal data type for random value generator: " + dataType);
    }
    return value;
  }

  /**
   * Helper method to reader value for the given row.
   *
   * @param rawIndexReader Index reader
   * @param dataType Data type of value to be read
   * @param row Row to read
   * @return Value read from index
   */
  private Object readValueFromIndex(FixedByteChunkSingleValueReader rawIndexReader, FieldSpec.DataType dataType,
      int row) {
    Object actual;
    ChunkReaderContext context = rawIndexReader.createContext();
    switch (dataType) {
      case INT:
        actual = rawIndexReader.getInt(row, context);
        break;

      case LONG:
        actual = rawIndexReader.getLong(row, context);
        break;

      case FLOAT:
        actual = rawIndexReader.getFloat(row, context);
        break;

      case DOUBLE:
        actual = rawIndexReader.getDouble(row, context);
        break;

      default:
        throw new IllegalArgumentException("Illegal data type for fixed width raw index reader: " + dataType);
    }
    return actual;
  }
}