/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.segment.index.creator;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Random;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang3.RandomStringUtils;
import org.testng.Assert;
import org.testng.annotations.AfterClass;
import org.testng.annotations.BeforeClass;
import org.testng.annotations.Test;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.common.utils.StringUtil;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.data.readers.RecordReader;
import com.linkedin.pinot.core.data.readers.TestRecordReader;
import com.linkedin.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
import com.linkedin.pinot.core.io.compression.ChunkCompressorFactory;
import com.linkedin.pinot.core.io.compression.ChunkDecompressor;
import com.linkedin.pinot.core.io.reader.impl.ChunkReaderContext;
import com.linkedin.pinot.core.io.reader.impl.v1.FixedByteChunkSingleValueReader;
import com.linkedin.pinot.core.io.reader.impl.v1.VarByteChunkSingleValueReader;
import com.linkedin.pinot.core.segment.creator.impl.SegmentIndexCreationDriverImpl;
import com.linkedin.pinot.core.segment.memory.PinotDataBuffer;
import com.linkedin.pinot.core.segment.store.ColumnIndexType;
import com.linkedin.pinot.core.segment.store.SegmentDirectory;
/**
* Class for testing Raw index creators.
*/
public class RawIndexCreatorTest {
private static final int NUM_ROWS = 10009;
private static final int MAX_STRING_LENGTH = 101;
private static final int MAX_STRING_LENGTH_IN_BYTES = MAX_STRING_LENGTH / 4; // margin for UTF-8 chars.
private static final String SEGMENT_DIR_NAME = System.getProperty("java.io.tmpdir") + File.separator + "fwdIndexTest";
private static final String SEGMENT_NAME = "testSegment";
private static final String INT_COLUMN = "intColumn";
private static final String LONG_COLUMN = "longColumn";
private static final String FLOAT_COLUMN = "floatColumn";
private static final String DOUBLE_COLUMN = "doubleColumn";
private static final String STRING_COLUMN = "stringColumn";
Random _random;
private RecordReader _recordReader;
SegmentDirectory _segmentDirectory;
private SegmentDirectory.Reader _segmentReader;
/**
* Setup to build a segment with raw indexes (no-dictionary) of various data types.
*
* @throws Exception
*/
@BeforeClass
public void setup()
throws Exception {
Schema schema = new Schema();
schema.addField(new DimensionFieldSpec(INT_COLUMN, FieldSpec.DataType.INT, true));
schema.addField(new DimensionFieldSpec(LONG_COLUMN, FieldSpec.DataType.LONG, true));
schema.addField(new DimensionFieldSpec(FLOAT_COLUMN, FieldSpec.DataType.FLOAT, true));
schema.addField(new DimensionFieldSpec(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE, true));
schema.addField(new DimensionFieldSpec(STRING_COLUMN, FieldSpec.DataType.STRING, true));
_random = new Random(System.nanoTime());
_recordReader = buildIndex(schema);
}
/**
* Clean up after test
*/
@AfterClass
public void cleanup() {
FileUtils.deleteQuietly(new File(SEGMENT_DIR_NAME));
}
/**
* Test for int raw index creator.
* Compares values read from the raw index against expected value.
* @throws Exception
*/
@Test
public void testIntRawIndexCreator()
throws Exception {
testFixedLengthRawIndexCreator(INT_COLUMN, FieldSpec.DataType.INT);
}
/**
* Test for long raw index creator.
* Compares values read from the raw index against expected value.
* @throws Exception
*/
@Test
public void testLongRawIndexCreator()
throws Exception {
testFixedLengthRawIndexCreator(LONG_COLUMN, FieldSpec.DataType.LONG);
}
/**
* Test for float raw index creator.
* Compares values read from the raw index against expected value.
* @throws Exception
*/
@Test
public void testFloatRawIndexCreator()
throws Exception {
testFixedLengthRawIndexCreator(FLOAT_COLUMN, FieldSpec.DataType.FLOAT);
}
/**
* Test for double raw index creator.
* Compares values read from the raw index against expected value.
* @throws Exception
*/
@Test
public void testDoubleRawIndexCreator()
throws Exception {
testFixedLengthRawIndexCreator(DOUBLE_COLUMN, FieldSpec.DataType.DOUBLE);
}
/**
* Test for string raw index creator.
* Compares values read from the raw index against expected value.
* @throws Exception
*/
@Test
public void testStringRawIndexCreator()
throws Exception {
PinotDataBuffer indexBuffer = getIndexBufferForColumn(STRING_COLUMN);
ChunkDecompressor uncompressor = ChunkCompressorFactory.getDecompressor("snappy");
VarByteChunkSingleValueReader rawIndexReader = new VarByteChunkSingleValueReader(indexBuffer, uncompressor);
_recordReader.rewind();
ChunkReaderContext context = rawIndexReader.createContext();
for (int row = 0; row < NUM_ROWS; row++) {
GenericRow expectedRow = _recordReader.next();
Object expected = expectedRow.getValue(STRING_COLUMN);
Object actual = rawIndexReader.getString(row, context);
Assert.assertEquals(actual, expected);
}
}
/**
* Helper method to perform actual tests for a given column.
*
* @param column Column for which to perform the test
* @param dataType Data type of the column
* @throws Exception
*/
private void testFixedLengthRawIndexCreator(String column, FieldSpec.DataType dataType)
throws Exception {
PinotDataBuffer indexBuffer = getIndexBufferForColumn(column);
FixedByteChunkSingleValueReader rawIndexReader = new FixedByteChunkSingleValueReader(indexBuffer,
ChunkCompressorFactory.getDecompressor("snappy"));
_recordReader.rewind();
for (int row = 0; row < NUM_ROWS; row++) {
GenericRow expectedRow = _recordReader.next();
Object expected = expectedRow.getValue(column);
Object actual;
actual = readValueFromIndex(rawIndexReader, dataType, row);
Assert.assertEquals(actual, expected);
}
}
/**
* Helper method that returns index file name for a given column name.
*
* @param column Column name for which to get the index file name
* @return Name of index file for the given column name
*/
private PinotDataBuffer getIndexBufferForColumn(String column)
throws IOException {
return _segmentReader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
}
/**
* Helper method to build a segment containing a single valued string column with RAW (no-dictionary) index.
*
* @return Array of string values for the rows in the generated index.
* @throws Exception
*/
private RecordReader buildIndex(Schema schema)
throws Exception {
SegmentGeneratorConfig config = new SegmentGeneratorConfig(schema);
config.setRawIndexCreationColumns(schema.getDimensionNames());
config.setOutDir(SEGMENT_DIR_NAME);
config.setSegmentName(SEGMENT_NAME);
final List<GenericRow> rows = new ArrayList<>();
for (int row = 0; row < NUM_ROWS; row++) {
HashMap<String, Object> map = new HashMap<>();
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
Object value;
value = getRandomValue(_random, fieldSpec.getDataType());
map.put(fieldSpec.getName(), value);
}
GenericRow genericRow = new GenericRow();
genericRow.init(map);
rows.add(genericRow);
}
SegmentIndexCreationDriverImpl driver = new SegmentIndexCreationDriverImpl();
RecordReader reader = new TestRecordReader(rows, schema);
driver.init(config, reader);
driver.build();
_segmentDirectory = SegmentDirectory.createFromLocalFS(driver.getOutputDirectory(), ReadMode.mmap);
_segmentReader = _segmentDirectory.createReader();
reader.rewind();
return reader;
}
/**
* Helper method that generates a random value for a given data type
*
* @param dataType Data type for which to generate the random value
* @return Random value for the data type.
*/
public static Object getRandomValue(Random random, FieldSpec.DataType dataType) {
Object value;
switch (dataType) {
case INT:
value = random.nextInt();
break;
case LONG:
value = random.nextLong();
break;
case FLOAT:
value = random.nextFloat();
break;
case DOUBLE:
value = random.nextDouble();
break;
case STRING:
value = StringUtil.trimTrailingNulls(RandomStringUtils.random(random.nextInt(MAX_STRING_LENGTH_IN_BYTES)));
break;
default:
throw new IllegalArgumentException("Illegal data type for random value generator: " + dataType);
}
return value;
}
/**
* Helper method to reader value for the given row.
*
* @param rawIndexReader Index reader
* @param dataType Data type of value to be read
* @param row Row to read
* @return Value read from index
*/
private Object readValueFromIndex(FixedByteChunkSingleValueReader rawIndexReader, FieldSpec.DataType dataType,
int row) {
Object actual;
ChunkReaderContext context = rawIndexReader.createContext();
switch (dataType) {
case INT:
actual = rawIndexReader.getInt(row, context);
break;
case LONG:
actual = rawIndexReader.getLong(row, context);
break;
case FLOAT:
actual = rawIndexReader.getFloat(row, context);
break;
case DOUBLE:
actual = rawIndexReader.getDouble(row, context);
break;
default:
throw new IllegalArgumentException("Illegal data type for fixed width raw index reader: " + dataType);
}
return actual;
}
}