/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.segment.creator.impl.stats; import com.linkedin.pinot.core.segment.creator.StatsCollectorConfig; import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; import it.unimi.dsi.fastutil.floats.FloatSet; import java.util.Arrays; public class FloatColumnPreIndexStatsCollector extends AbstractColumnStatisticsCollector { private Float min = Float.MAX_VALUE; private Float max = Float.MIN_VALUE; private final FloatSet rawFloatSet; private final FloatSet aggregatedFloatSet; private float[] sortedFloatList; private boolean sealed = false; public FloatColumnPreIndexStatsCollector(String column, StatsCollectorConfig statsCollectorConfig) { super(column, statsCollectorConfig); rawFloatSet = new FloatOpenHashSet(INITIAL_HASH_SET_SIZE); aggregatedFloatSet = new FloatOpenHashSet(INITIAL_HASH_SET_SIZE); } /** * Collect statistics for the given entry. * - Add it to the passed in set (which could be raw or aggregated) * - Update maximum number of values for Multi-valued entries * - Update Total number of entries * - Check if entry is sorted. * @param entry * @param set */ private void collectEntry(Object entry, FloatSet set) { if (entry instanceof Object[]) { for (final Object e : (Object[]) entry) { set.add(((Number) e).floatValue()); } if (maxNumberOfMultiValues < ((Object[]) entry).length) { maxNumberOfMultiValues = ((Object[]) entry).length; } updateTotalNumberOfEntries((Object[]) entry); } else { float value = ((Number) entry).floatValue(); addressSorted(value); updatePartition(value); set.add(value); totalNumberOfEntries++; } } /** * {@inheritDoc} * @param entry Entry to be collected * @param isAggregated True for aggregated, False for raw. */ @Override public void collect(Object entry, boolean isAggregated) { if (isAggregated) { collectEntry(entry, aggregatedFloatSet); } else { collectEntry(entry, rawFloatSet); } } /** * {@inheritDoc} * @param entry Entry to be collected */ @Override public void collect(Object entry) { collect(entry, false /* isAggregated */); } @Override public Float getMinValue() { if (sealed) { return min; } throw new IllegalStateException("you must seal the collector first before asking for min value"); } @Override public Float getMaxValue() { if (sealed) { return max; } throw new IllegalStateException("you must seal the collector first before asking for min value"); } @Override public Object getUniqueValuesSet() { if (sealed) { return sortedFloatList; } throw new IllegalStateException("you must seal the collector first before asking for min value"); } @Override public int getCardinality() { if (sealed) { return sortedFloatList.length; } throw new IllegalStateException("you must seal the collector first before asking for min value"); } @Override public boolean hasNull() { return false; } @Override public void seal() { sealed = true; sortedFloatList = new float[rawFloatSet.size()]; rawFloatSet.toArray(sortedFloatList); Arrays.sort(sortedFloatList); if (sortedFloatList.length == 0) { min = null; max = null; return; } // Update min/max based on raw docs. min = sortedFloatList[0]; max = sortedFloatList[sortedFloatList.length - 1]; // Merge the raw and aggregated docs, so stats for dictionary creation are collected correctly. int numAggregated = aggregatedFloatSet.size(); if (numAggregated > 0) { rawFloatSet.addAll(aggregatedFloatSet); sortedFloatList = new float[rawFloatSet.size()]; rawFloatSet.toArray(sortedFloatList); Arrays.sort(sortedFloatList); } } }