/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.data.readers;
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import org.apache.commons.configuration.ConfigurationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.linkedin.pinot.common.data.DimensionFieldSpec;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.data.FieldSpec.DataType;
import com.linkedin.pinot.common.data.FieldSpec.FieldType;
import com.linkedin.pinot.common.data.MetricFieldSpec;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.data.TimeFieldSpec;
import com.linkedin.pinot.common.data.TimeGranularitySpec;
import com.linkedin.pinot.common.segment.ReadMode;
import com.linkedin.pinot.core.data.GenericRow;
import com.linkedin.pinot.core.io.reader.SingleColumnMultiValueReader;
import com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader;
import com.linkedin.pinot.core.io.reader.impl.FixedByteSingleValueMultiColReader;
import com.linkedin.pinot.core.io.reader.impl.SortedForwardIndexReader;
import com.linkedin.pinot.core.io.reader.impl.v1.FixedBitMultiValueReader;
import com.linkedin.pinot.core.io.reader.impl.v1.FixedBitSingleValueReader;
import com.linkedin.pinot.core.segment.index.ColumnMetadata;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.index.readers.Dictionary;
import com.linkedin.pinot.core.segment.index.readers.DoubleDictionary;
import com.linkedin.pinot.core.segment.index.readers.FloatDictionary;
import com.linkedin.pinot.core.segment.index.readers.ImmutableDictionaryReader;
import com.linkedin.pinot.core.segment.index.readers.IntDictionary;
import com.linkedin.pinot.core.segment.index.readers.LongDictionary;
import com.linkedin.pinot.core.segment.index.readers.StringDictionary;
import com.linkedin.pinot.core.segment.memory.PinotDataBuffer;
import com.linkedin.pinot.core.segment.store.ColumnIndexType;
import com.linkedin.pinot.core.segment.store.SegmentDirectory;
import com.linkedin.pinot.core.segment.store.SegmentDirectory.Reader;
/**
* Record reader to read pinot segment and generate GenericRows
*/
public class PinotSegmentRecordReader extends BaseRecordReader {
private static final Logger LOGGER = LoggerFactory.getLogger(PinotSegmentRecordReader.class);
private SegmentMetadataImpl segmentMetadata;
private int totalDocs;
private Set<String> columns;
private Map<String, SingleColumnSingleValueReader> singleValueReaderMap;
private Map<String, SingleColumnMultiValueReader> multiValueReaderMap;
private Map<String, SortedForwardIndexReader> singleValueSortedReaderMap;
private Map<String, Dictionary> pinotDictionaryBufferMap;
private Map<String, DataType> columnDataTypeMap;
private Map<String, int[]> multiValueArrayMap;
private Map<String, Boolean> isSingleValueMap;
private Map<String, Boolean> isSortedMap;
private int docNumber;
public PinotSegmentRecordReader(File segmentIndexDir) throws IOException, ConfigurationException {
segmentMetadata = new SegmentMetadataImpl(segmentIndexDir);
SegmentDirectory segmentDirectory = SegmentDirectory.createFromLocalFS(segmentIndexDir, segmentMetadata, ReadMode.heap);
totalDocs = segmentMetadata.getTotalDocs();
columns = segmentMetadata.getAllColumns();
Reader reader = segmentDirectory.createReader();
singleValueReaderMap = new HashMap<>();
multiValueReaderMap = new HashMap<>();
singleValueSortedReaderMap = new HashMap<>();
pinotDictionaryBufferMap = new HashMap<>();
columnDataTypeMap = new HashMap<>();
multiValueArrayMap = new HashMap<>();
isSingleValueMap = new HashMap<>();
isSortedMap = new HashMap<>();
for (String column : columns) {
ColumnMetadata columnMetadataFor = segmentMetadata.getColumnMetadataFor(column);
isSingleValueMap.put(column, columnMetadataFor.isSingleValue());
isSortedMap.put(column, columnMetadataFor.isSorted());
if (columnMetadataFor.isSingleValue() && !columnMetadataFor.isSorted()) {
PinotDataBuffer fwdIndexBuffer = reader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
SingleColumnSingleValueReader fwdIndexReader =
new FixedBitSingleValueReader(fwdIndexBuffer, columnMetadataFor.getTotalDocs(),
columnMetadataFor.getBitsPerElement(), columnMetadataFor.hasNulls());
singleValueReaderMap.put(column, fwdIndexReader);
} else if (columnMetadataFor.isSingleValue() && columnMetadataFor.isSorted()) {
PinotDataBuffer dataBuffer = reader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
FixedByteSingleValueMultiColReader indexReader = new FixedByteSingleValueMultiColReader(
dataBuffer, columnMetadataFor.getCardinality(), new int[] {
4, 4
});
SortedForwardIndexReader fwdIndexReader = new SortedForwardIndexReader(indexReader, totalDocs);
singleValueSortedReaderMap.put(column, fwdIndexReader);
} else {
PinotDataBuffer fwdIndexBuffer = reader.getIndexFor(column, ColumnIndexType.FORWARD_INDEX);
SingleColumnMultiValueReader fwdIndexReader =
new FixedBitMultiValueReader(fwdIndexBuffer, segmentMetadata.getTotalDocs(),
columnMetadataFor.getTotalNumberOfEntries(), columnMetadataFor.getBitsPerElement(), false);
multiValueReaderMap.put(column, fwdIndexReader);
}
DataType dataType = columnMetadataFor.getDataType();
PinotDataBuffer dictionaryBuffer = reader.getIndexFor(column, ColumnIndexType.DICTIONARY);
switch (dataType) {
case BOOLEAN:
pinotDictionaryBufferMap.put(column, new StringDictionary(dictionaryBuffer, columnMetadataFor));
break;
case DOUBLE:
pinotDictionaryBufferMap.put(column, new DoubleDictionary(dictionaryBuffer, columnMetadataFor));
break;
case FLOAT:
pinotDictionaryBufferMap.put(column, new FloatDictionary(dictionaryBuffer, columnMetadataFor));
break;
case INT:
pinotDictionaryBufferMap.put(column, new IntDictionary(dictionaryBuffer, columnMetadataFor));
break;
case LONG:
pinotDictionaryBufferMap.put(column, new LongDictionary(dictionaryBuffer, columnMetadataFor));
break;
case STRING:
pinotDictionaryBufferMap.put(column, new StringDictionary(dictionaryBuffer, columnMetadataFor));
break;
case INT_ARRAY:
case BYTE:
case BYTE_ARRAY:
case CHAR:
case CHAR_ARRAY:
case DOUBLE_ARRAY:
case FLOAT_ARRAY:
case LONG_ARRAY:
case OBJECT:
case SHORT:
case SHORT_ARRAY:
case STRING_ARRAY:
default:
LOGGER.error("Unsupported data type {}", dataType);
break;
}
if (!isSingleValueMap.get(column)) {
int[] intArray = new int[columnMetadataFor.getMaxNumberOfMultiValues()];
multiValueArrayMap.put(column, intArray);
}
columnDataTypeMap.put(column, dataType);
}
}
@Override
public void init() throws Exception {
docNumber = 0;
}
@Override
public void rewind() throws Exception {
init();
}
@Override
public boolean hasNext() {
return docNumber < totalDocs;
}
@Override
public Schema getSchema() {
Schema schema = new Schema();
schema.setSchemaName(segmentMetadata.getName());
for (String column : columns) {
ColumnMetadata columnMetadata = segmentMetadata.getColumnMetadataFor(column);
String columnName = columnMetadata.getColumnName();
DataType dataType = columnMetadata.getDataType();
FieldType fieldType = columnMetadata.getFieldType();
FieldSpec fieldSpec = null;
switch (fieldType) {
case DIMENSION:
boolean isSingleValue = columnMetadata.isSingleValue();
fieldSpec = new DimensionFieldSpec(columnName, dataType, isSingleValue);
break;
case METRIC:
fieldSpec = new MetricFieldSpec(columnName, dataType);
break;
case TIME:
TimeUnit timeType = columnMetadata.getTimeUnit();
TimeGranularitySpec incomingGranularitySpec = new TimeGranularitySpec(dataType, timeType, columnName);
fieldSpec = new TimeFieldSpec(incomingGranularitySpec);
break;
default:
break;
}
schema.addField(fieldSpec);
}
return schema;
}
@Override
public GenericRow next() {
return next(new GenericRow());
}
@Override
public GenericRow next(GenericRow row) {
for (String column : columns) {
Dictionary dictionary = pinotDictionaryBufferMap.get(column);
if (isSingleValueMap.get(column)) {
// Single-value column.
if (!isSortedMap.get(column)) {
row.putField(column, dictionary.get(singleValueReaderMap.get(column).getInt(docNumber)));
} else {
row.putField(column, dictionary.get(singleValueSortedReaderMap.get(column).getInt(docNumber)));
}
} else {
// Multi-value column.
int[] dictionaryIdArray = multiValueArrayMap.get(column);
int numValues = multiValueReaderMap.get(column).getIntArray(docNumber, dictionaryIdArray);
Object[] objectArray = new Object[numValues];
for (int i = 0; i < numValues; i++) {
objectArray[i] = dictionary.get(dictionaryIdArray[i]);
}
row.putField(column, objectArray);
}
}
docNumber++;
return row;
}
@Override
public void close() throws Exception {
for (Entry<String, Dictionary> entry : pinotDictionaryBufferMap.entrySet()) {
ImmutableDictionaryReader dictionary = (ImmutableDictionaryReader) entry.getValue();
if (dictionary != null) {
dictionary.close();
}
}
for (Entry<String, SingleColumnSingleValueReader> entry : singleValueReaderMap.entrySet()) {
SingleColumnSingleValueReader reader = entry.getValue();
if (reader != null) {
reader.close();
}
}
for (Entry<String, SortedForwardIndexReader> entry : singleValueSortedReaderMap.entrySet()) {
SortedForwardIndexReader reader = entry.getValue();
if (reader != null) {
reader.close();
}
}
for (Entry<String, SingleColumnMultiValueReader> entry : multiValueReaderMap.entrySet()) {
SingleColumnMultiValueReader reader = entry.getValue();
if (reader != null) {
reader.close();
}
}
segmentMetadata.close();
}
}