/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.segment.creator.impl.stats;
import com.linkedin.pinot.core.segment.creator.StatsCollectorConfig;
import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
import it.unimi.dsi.fastutil.doubles.DoubleSet;
import java.util.Arrays;
public class DoubleColumnPreIndexStatsCollector extends AbstractColumnStatisticsCollector {
private Double min = null;
private Double max = null;
private final DoubleSet rawDoubleSet;
private final DoubleSet aggregatedDoubleSet;
private double[] sortedDoubleList;
private boolean sealed = false;
public DoubleColumnPreIndexStatsCollector(String column, StatsCollectorConfig statsCollectorConfig) {
super(column, statsCollectorConfig);
rawDoubleSet = new DoubleOpenHashSet(INITIAL_HASH_SET_SIZE);
aggregatedDoubleSet = new DoubleOpenHashSet(INITIAL_HASH_SET_SIZE);
}
/**
* Collect statistics for the given entry.
* - Add it to the passed in set (which could be raw or aggregated)
* - Update maximum number of values for Multi-valued entries
* - Update Total number of entries
* - Check if entry is sorted.
* @param entry
* @param set
*/
private void collectEntry(Object entry, DoubleSet set) {
if (entry instanceof Object[]) {
for (final Object e : (Object[]) entry) {
set.add(((Number) e).doubleValue());
}
if (maxNumberOfMultiValues < ((Object[]) entry).length) {
maxNumberOfMultiValues = ((Object[]) entry).length;
}
updateTotalNumberOfEntries((Object[]) entry);
} else {
double value = ((Number) entry).doubleValue();
addressSorted(value);
updatePartition(value);
set.add(value);
totalNumberOfEntries++;
}
}
/**
* {@inheritDoc}
* @param entry Entry to be collected
* @param isAggregated True for aggregated, False for raw.
*/
@Override
public void collect(Object entry, boolean isAggregated) {
if (isAggregated) {
collectEntry(entry, aggregatedDoubleSet);
} else {
collectEntry(entry, rawDoubleSet);
}
}
/**
* {@inheritDoc}
* @param entry Entry to be collected
*/
@Override
public void collect(Object entry) {
collect(entry, false /* isAggregated */);
}
@Override
public Double getMinValue() {
if (sealed) {
return min;
}
throw new IllegalStateException("you must seal the collector first before asking for min value");
}
@Override
public Double getMaxValue() {
if (sealed) {
return max;
}
throw new IllegalStateException("you must seal the collector first before asking for min value");
}
@Override
public Object getUniqueValuesSet() {
if (sealed) {
return sortedDoubleList;
}
throw new IllegalStateException("you must seal the collector first before asking for min value");
}
@Override
public int getCardinality() {
if (sealed) {
return sortedDoubleList.length;
}
throw new IllegalStateException("you must seal the collector first before asking for min value");
}
@Override
public boolean hasNull() {
return false;
}
@Override
public void seal() {
sealed = true;
sortedDoubleList = new double[rawDoubleSet.size()];
rawDoubleSet.toArray(sortedDoubleList);
Arrays.sort(sortedDoubleList);
if (sortedDoubleList.length == 0) {
min = null;
max = null;
return;
}
// Update the min-max values based on raw docs.
min = sortedDoubleList[0];
max = sortedDoubleList[sortedDoubleList.length - 1];
// Merge the raw and aggregated docs, so stats for dictionary creation are collected correctly.
int numAggregated = aggregatedDoubleSet.size();
if (numAggregated > 0) {
rawDoubleSet.addAll(aggregatedDoubleSet);
sortedDoubleList = new double[rawDoubleSet.size()];
rawDoubleSet.toArray(sortedDoubleList);
Arrays.sort(sortedDoubleList);
}
}
}