/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.realtime.converter.stats;
import com.linkedin.pinot.common.config.ColumnPartitionConfig;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.core.common.Block;
import com.linkedin.pinot.core.common.BlockMultiValIterator;
import com.linkedin.pinot.core.data.partition.PartitionFunction;
import com.linkedin.pinot.core.data.partition.PartitionFunctionFactory;
import com.linkedin.pinot.core.io.reader.SingleColumnSingleValueReader;
import com.linkedin.pinot.core.operator.blocks.RealtimeSingleValueBlock;
import com.linkedin.pinot.core.realtime.impl.datasource.RealtimeColumnDataSource;
import com.linkedin.pinot.core.realtime.impl.dictionary.BaseOnHeapMutableDictionary;
import com.linkedin.pinot.core.segment.creator.ColumnStatistics;
import java.util.Arrays;
import java.util.List;
import org.apache.commons.lang.math.IntRange;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Column statistics for a column coming from an in-memory realtime segment.
*/
public class RealtimeColumnStatistics implements ColumnStatistics {
private static final Logger LOGGER = LoggerFactory.getLogger(RealtimeColumnStatistics.class);
private final RealtimeColumnDataSource _dataSource;
private final int[] _sortedDocIdIterationOrder;
private final BaseOnHeapMutableDictionary _dictionaryReader;
private final Block _block;
private PartitionFunction partitionFunction;
private int numPartitions;
private int partitionRangeStart = Integer.MAX_VALUE;
private int partitionRangeEnd = Integer.MIN_VALUE;
public RealtimeColumnStatistics(RealtimeColumnDataSource dataSource, int[] sortedDocIdIterationOrder,
ColumnPartitionConfig columnPartitionConfig) {
_dataSource = dataSource;
_sortedDocIdIterationOrder = sortedDocIdIterationOrder;
_dictionaryReader = dataSource.getDictionary();
_block = dataSource.getNextBlock();
if (columnPartitionConfig != null) {
String functionName = columnPartitionConfig.getFunctionName();
numPartitions = columnPartitionConfig.getNumPartitions();
partitionFunction =
(functionName != null) ? PartitionFunctionFactory.getPartitionFunction(functionName, numPartitions) : null;
if (partitionFunction != null) {
updatePartition();
}
}
}
@Override
public Object getMinValue() {
return _dictionaryReader.getMinVal();
}
@Override
public Object getMaxValue() {
return _dictionaryReader.getMaxVal();
}
@Override
public Object getUniqueValuesSet() {
return _dictionaryReader.getSortedValues();
}
@Override
public int getCardinality() {
return _dictionaryReader.length();
}
@Override
public int getLengthOfLargestElement() {
// Length of longest string
int maximumStringLength = 0;
// If this column is a string column, iterate over the dictionary to find the maximum length
if (_dataSource.getDataSourceMetadata().getDataType() == FieldSpec.DataType.STRING) {
final int length = _dictionaryReader.length();
for (int i = 0; i < length; i++) {
maximumStringLength = Math.max(_dictionaryReader.getStringValue(i).length(), maximumStringLength);
}
}
return maximumStringLength;
}
@Override
public boolean isSorted() {
// Multivalue columns can't be in sorted order
if (!_block.getMetadata().isSingleValue()) {
return false;
}
// If this is a single value, then by definition the data is sorted
final int blockLength = _block.getMetadata().getLength();
if (blockLength <= 1 || getCardinality() <= 1) {
return true;
}
// Iterate over all data to figure out whether or not it's in sorted order
SingleColumnSingleValueReader singleValueReader = ((RealtimeSingleValueBlock) _block).getReader();
int docIdIndex = _sortedDocIdIterationOrder != null ? _sortedDocIdIterationOrder[0] : 0;
int dictionaryId = singleValueReader.getInt(docIdIndex);
Comparable previousValue = (Comparable) _dictionaryReader.get(dictionaryId);
for (int i = 1; i < blockLength; i++) {
docIdIndex = _sortedDocIdIterationOrder != null ? _sortedDocIdIterationOrder[i] : i;
dictionaryId = singleValueReader.getInt(docIdIndex);
Comparable currentValue = (Comparable) _dictionaryReader.get(dictionaryId);
// If previousValue is greater than currentValue
if (0 < previousValue.compareTo(currentValue)) {
return false;
} else {
previousValue = currentValue;
}
}
return true;
}
@Override
public int getTotalNumberOfEntries() {
// Number of multivalue entries
int multivalueEntryCount = 0;
// If this column is a multivalue column, iterate over all data to find the total number of multivalue entries (this
// information doesn't seem to be exposed via an API)
if (!_block.getMetadata().isSingleValue()) {
int[] dictionaryIds = new int[getMaxNumberOfMultiValues()];
BlockMultiValIterator valIterator = (BlockMultiValIterator) _block.getBlockValueSet().iterator();
while (valIterator.hasNext()) {
multivalueEntryCount += valIterator.nextIntVal(dictionaryIds);
}
}
return multivalueEntryCount;
}
@Override
public int getMaxNumberOfMultiValues() {
return _block.getMetadata().getMaxNumberOfMultiValues();
}
@Override
public boolean hasNull() {
return false;
}
@Override
public PartitionFunction getPartitionFunction() {
return partitionFunction;
}
@Override
public int getNumPartitions() {
return numPartitions;
}
@Override
public List<IntRange> getPartitionRanges() {
if (partitionRangeStart <= partitionRangeEnd) {
return Arrays.asList(new IntRange(partitionRangeStart, partitionRangeEnd));
} else {
return null;
}
}
/**
* Update partition ranges based on column values.
*
*/
void updatePartition() {
// Iterate over the dictionary to check the partitioning
final int length = _dictionaryReader.length();
for (int i = 0; i < length; i++) {
int partition = partitionFunction.getPartition(_dictionaryReader.get(i));
if (partition < partitionRangeStart) {
partitionRangeStart = partition;
}
if (partition > partitionRangeEnd) {
partitionRangeEnd = partition;
}
}
}
}