/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.query.aggregation.groupby;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.core.common.BlockMetadata;
import com.linkedin.pinot.core.common.BlockValSet;
import com.linkedin.pinot.core.operator.blocks.TransformBlock;
import com.linkedin.pinot.core.query.aggregation.groupby.utils.ValueToIdMap;
import com.linkedin.pinot.core.query.aggregation.groupby.utils.ValueToIdMapFactory;
import com.linkedin.pinot.core.segment.index.readers.Dictionary;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
/**
* Implementation of {@link GroupKeyGenerator} interface using actual value based
* group keys, instead of dictionary ids. This implementation is used for group-by key
* generation when one or more of the group-by columns do not have dictionary.
*
* TODO:
* 1. Add support for multi-valued group-by columns.
* 2. Add support for trimming group-by results.
*/
public class NoDictionaryMultiColumnGroupKeyGenerator implements GroupKeyGenerator {
private String[] _groupByColumns;
private Map<FixedIntArray, Integer> _groupKeyMap;
private int _numGroupKeys = 0;
private boolean[] _hasDictionary;
private Dictionary[] _dictionaries;
private ValueToIdMap[] _onTheFlyDictionaries;
/**
* Constructor for the class.
*
* @param groupByColumns Columns for which to generate group-by keys
*/
public NoDictionaryMultiColumnGroupKeyGenerator(TransformBlock transformBlock, String[] groupByColumns) {
_groupByColumns = groupByColumns;
_groupKeyMap = new HashMap<>();
_hasDictionary = new boolean[groupByColumns.length];
_dictionaries = new Dictionary[groupByColumns.length];
_onTheFlyDictionaries = new ValueToIdMap[groupByColumns.length];
for (int i = 0; i < groupByColumns.length; i++) {
BlockMetadata blockMetadata = transformBlock.getBlockMetadata(groupByColumns[i]);
if (blockMetadata.hasDictionary()) {
_dictionaries[i] = blockMetadata.getDictionary();
_hasDictionary[i] = true;
} else {
_onTheFlyDictionaries[i] = ValueToIdMapFactory.get(blockMetadata.getDataType());
_hasDictionary[i] = false;
}
}
}
@Override
public int getGlobalGroupKeyUpperBound() {
// Since there's no dictionary, we cannot find the cardinality
return Integer.MAX_VALUE;
}
@Override
public void generateKeysForBlock(TransformBlock transformBlock, int[] docIdToGroupKey) {
int numGroupByColumns = _groupByColumns.length;
int numDocs = transformBlock.getNumDocs();
Object[] values = new Object[numGroupByColumns];
boolean[] hasDictionary = new boolean[numGroupByColumns];
FieldSpec.DataType[] dataTypes = new FieldSpec.DataType[numGroupByColumns];
for (int i = 0; i < numGroupByColumns; i++) {
BlockValSet blockValSet = transformBlock.getBlockValueSet(_groupByColumns[i]);
dataTypes[i] = blockValSet.getValueType();
BlockMetadata blockMetadata = transformBlock.getBlockMetadata(_groupByColumns[i]);
if (blockMetadata.hasDictionary()) {
hasDictionary[i] = true;
values[i] = blockValSet.getDictionaryIds();
} else {
hasDictionary[i] = false;
values[i] = getValuesFromBlockValSet(blockValSet, dataTypes[i]);
}
}
for (int i = 0; i < numDocs; i++) {
int[] keys = new int[numGroupByColumns];
for (int j = 0; j < numGroupByColumns; j++) {
if (hasDictionary[j]) {
int[] dictIds = (int[]) values[j];
keys[j] = dictIds[i];
} else {
// BlockValSet.getDoubleValuesSV() always returns double currently, as all aggregation functions assume
// data type to be double.
switch (dataTypes[j]) {
case INT:
int[] intValues = (int[]) values[j];
keys[j] = _onTheFlyDictionaries[j].put(intValues[i]);
break;
case LONG:
long[] longValues = (long[]) values[j];
keys[j] = _onTheFlyDictionaries[j].put(longValues[i]);
break;
case FLOAT:
float[] floatValues = (float[]) values[j];
keys[j] = _onTheFlyDictionaries[j].put(floatValues[i]);
break;
case DOUBLE:
double[] doubleValues = (double[]) values[j];
keys[j] = _onTheFlyDictionaries[j].put(doubleValues[i]);
break;
case STRING:
String[] stringValues = (String[]) values[j];
keys[j] = _onTheFlyDictionaries[j].put(stringValues[i]);
break;
default:
throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + dataTypes[j]);
}
}
}
docIdToGroupKey[i] = getGroupIdForKey(new FixedIntArray(keys));
}
}
@Override
public void generateKeysForBlock(TransformBlock transformBlock, int[][] docIdToGroupKeys) {
// TODO: Support generating keys for multi-valued columns.
throw new UnsupportedOperationException("Operation not supported");
}
@Override
public int getCurrentGroupKeyUpperBound() {
return _groupKeyMap.size();
}
@Override
public Iterator<GroupKey> getUniqueGroupKeys() {
return new GroupKeyIterator(_groupKeyMap);
}
@Override
public void purgeKeys(int[] keysToPurge) {
// TODO: Implement purging.
throw new UnsupportedOperationException("Purging keys not yet supported in GroupKeyGenerator without dictionary.");
}
/**
* Helper method to get or create group-id for a group key.
*
* @param keyList Group key, that is a list of objects to be grouped
* @return Group id
*/
private int getGroupIdForKey(FixedIntArray keyList) {
Integer groupId = _groupKeyMap.get(keyList);
if (groupId == null) {
groupId = _numGroupKeys;
_groupKeyMap.put(keyList, _numGroupKeys++);
}
return groupId;
}
/**
* Iterator for {Group-Key, Group-id) pair.
*/
class GroupKeyIterator implements Iterator<GroupKey> {
Iterator<Map.Entry<FixedIntArray, Integer>> _iterator;
GroupKey _groupKey;
public GroupKeyIterator(Map<FixedIntArray, Integer> map) {
_iterator = map.entrySet().iterator();
_groupKey = new GroupKey(INVALID_ID, null);
}
@Override
public boolean hasNext() {
return _iterator.hasNext();
}
@Override
public GroupKey next() {
Map.Entry<FixedIntArray, Integer> entry = _iterator.next();
_groupKey.setFirst(entry.getValue());
_groupKey.setSecond(buildStringKeyFromIds(entry.getKey()));
return _groupKey;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
private String buildStringKeyFromIds(FixedIntArray keyList) {
StringBuilder builder = new StringBuilder();
int[] keys = keyList.elements();
for (int i = 0; i < keyList.size(); i++) {
String key;
int dictId = keys[i];
if (_hasDictionary[i]) {
key = _dictionaries[i].get(dictId).toString();
} else {
key = _onTheFlyDictionaries[i].getString(dictId);
}
if (i > 0) {
builder.append(AggregationGroupByTrimmingService.GROUP_KEY_DELIMITER);
}
builder.append(key);
}
return builder.toString();
}
/**
* Helper method to fetch values from BlockValSet
* @param dataType Data type
* @param blockValSet Block val set
* @return Values from block val set
*/
private Object getValuesFromBlockValSet(BlockValSet blockValSet, FieldSpec.DataType dataType) {
Object values;
switch (dataType) {
case INT:
values = blockValSet.getIntValuesSV();
break;
case LONG:
values = blockValSet.getLongValuesSV();
break;
case FLOAT:
values = blockValSet.getFloatValuesSV();
break;
case DOUBLE:
values = blockValSet.getDoubleValuesSV();
break;
case STRING:
values = blockValSet.getStringValuesSV();
break;
default:
throw new IllegalArgumentException("Illegal data type for no-dictionary key generator: " + dataType);
}
return values;
}
/**
* Wrapper around fixed size int array with hashCode() and equals() implementation.
* Used as a key in hash-map.
*/
private static class FixedIntArray {
private final int[] _value;
FixedIntArray(int[] value) {
_value = value;
}
int[] elements() {
return _value;
}
int size() {
return _value.length;
}
@Override
public boolean equals(Object o) {
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
FixedIntArray that = (FixedIntArray) o;
return Arrays.equals(_value, that._value);
}
@Override
public int hashCode() {
return Arrays.hashCode(_value);
}
}
}