/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.operator.filter.predicate; import com.linkedin.pinot.common.data.FieldSpec; import com.linkedin.pinot.core.common.predicate.NotInPredicate; import com.linkedin.pinot.core.segment.index.readers.Dictionary; import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; import it.unimi.dsi.fastutil.doubles.DoubleSet; import it.unimi.dsi.fastutil.floats.FloatOpenHashSet; import it.unimi.dsi.fastutil.floats.FloatSet; import it.unimi.dsi.fastutil.ints.IntOpenHashSet; import it.unimi.dsi.fastutil.ints.IntSet; import it.unimi.dsi.fastutil.longs.LongOpenHashSet; import it.unimi.dsi.fastutil.longs.LongSet; import java.util.Collections; import java.util.HashSet; import java.util.Set; /** * Factory for Not-in predicate evaluators. */ public class NotInPredicateEvaluatorFactory { // Private constructor private NotInPredicateEvaluatorFactory() { } /** * Returns a new instance of dictionary based equality predicate evaluator. * @param predicate Predicate to evaluate * @param dictionary Dictionary for the column * @return Dictionary based equality predicate evaluator */ public static PredicateEvaluator newDictionaryBasedEvaluator(NotInPredicate predicate, Dictionary dictionary) { return new DictionaryBasedNotInPredicateEvaluator(predicate, dictionary); } /** * Returns a new instance of no-dictionary based equality predicate evaluator. * @param predicate Predicate to evaluate * @param dataType Data type for the column * @return No Dictionary based equality predicate evaluator */ public static PredicateEvaluator newNoDictionaryBasedEvaluator(NotInPredicate predicate, FieldSpec.DataType dataType) { switch (dataType) { case INT: return new IntNoDictionaryBasedNotInEvaluator(predicate); case LONG: return new LongNoDictionaryBasedNotInEvaluator(predicate); case FLOAT: return new FloatNoDictionaryBasedNotInEvaluator(predicate); case DOUBLE: return new DoubleNoDictionaryBasedNotInEvaluator(predicate); case STRING: return new StringNoDictionaryBasedNotInEvaluator(predicate); default: throw new UnsupportedOperationException( "No dictionary based Equals predicate evaluator not supported for datatype:" + dataType); } } /** * Dictionary based implementation of not-in predicate evaluator. */ public static class DictionaryBasedNotInPredicateEvaluator extends BasePredicateEvaluator { private int[] _matchingIds; private int[] _nonMatchingIds; private Dictionary _dictionary; private IntSet _nonMatchingDictIdSet; public DictionaryBasedNotInPredicateEvaluator(NotInPredicate predicate, Dictionary dictionary) { _dictionary = dictionary; final String[] notInValues = predicate.getNotInRange(); _nonMatchingDictIdSet = new IntOpenHashSet(notInValues.length); for (final String notInValue : notInValues) { int dictId = dictionary.indexOf(notInValue); if (dictId >= 0) { _nonMatchingDictIdSet.add(dictId); } } _nonMatchingIds = new int[_nonMatchingDictIdSet.size()]; int index = 0; for (int dictId : _nonMatchingDictIdSet) { _nonMatchingIds[index] = dictId; index = index + 1; } } @Override public boolean apply(int dictionaryId) { return (!_nonMatchingDictIdSet.contains(dictionaryId)); } @Override public boolean apply(int[] dictionaryIds) { for (int dictId : dictionaryIds) { if (_nonMatchingDictIdSet.contains(dictId)) { return false; } } return true; } @Override public int[] getMatchingDictionaryIds() { //This is expensive for NOT IN predicate, some operators need this for now. Eventually we should remove the need for exposing matching dict ids if (_matchingIds == null) { int count = 0; _matchingIds = new int[_dictionary.length() - _nonMatchingDictIdSet.size()]; for (int i = 0; i < _dictionary.length(); i++) { if (!_nonMatchingDictIdSet.contains(i)) { _matchingIds[count] = i; count = count + 1; } } } return _matchingIds; } @Override public int[] getNonMatchingDictionaryIds() { return _nonMatchingIds; } @Override public boolean apply(int[] dictionaryIds, int length) { for (int i = 0; i < length; i++) { int dictId = dictionaryIds[i]; if (_nonMatchingDictIdSet.contains(dictId)) { return false; } } return true; } @Override public boolean alwaysFalse() { return _nonMatchingIds.length == _dictionary.length(); } } /** * No dictionary implementation of not-in predicate evaluator for INT data type. */ private static class IntNoDictionaryBasedNotInEvaluator extends BasePredicateEvaluator { IntSet _nonMatchingValues; public IntNoDictionaryBasedNotInEvaluator(NotInPredicate predicate) { _nonMatchingValues = new IntOpenHashSet(); for (String valueString : predicate.getNotInRange()) { _nonMatchingValues.add(Integer.parseInt(valueString)); } } @Override public boolean apply(int inputValue) { return (!_nonMatchingValues.contains(inputValue)); } @Override public boolean apply(int[] inputValues) { return apply(inputValues, inputValues.length); } @Override public boolean apply(int[] inputValues, int length) { // we cannot do binary search since the multi-value columns are not sorted in the raw segment for (int i = 0; i < length; i++) { int inputValue = inputValues[i]; if (_nonMatchingValues.contains(inputValue)) { return false; } } return true; } } /** * No dictionary implementation of not-in predicate evaluator for LONG data type. */ private static class LongNoDictionaryBasedNotInEvaluator extends BasePredicateEvaluator { LongSet _nonMatchingValues; public LongNoDictionaryBasedNotInEvaluator(NotInPredicate predicate) { _nonMatchingValues = new LongOpenHashSet(); for (String valueString : predicate.getNotInRange()) { _nonMatchingValues.add(Long.parseLong(valueString)); } } @Override public boolean apply(long inputValue) { return (!_nonMatchingValues.contains(inputValue)); } @Override public boolean apply(long[] inputValues) { return apply(inputValues, inputValues.length); } @Override public boolean apply(long[] inputValues, int length) { // we cannot do binary search since the multi-value columns are not sorted in the raw segment for (int i = 0; i < length; i++) { long inputValue = inputValues[i]; if (_nonMatchingValues.contains(inputValue)) { return false; } } return true; } } /** * No dictionary implementation of not-in predicate evaluator for FLOAT data type. */ private static class FloatNoDictionaryBasedNotInEvaluator extends BasePredicateEvaluator { FloatSet _nonMatchingValues; public FloatNoDictionaryBasedNotInEvaluator(NotInPredicate predicate) { _nonMatchingValues = new FloatOpenHashSet(); for (String valueString : predicate.getNotInRange()) { _nonMatchingValues.add(Float.parseFloat(valueString)); } } @Override public boolean apply(float inputValue) { return (!_nonMatchingValues.contains(inputValue)); } @Override public boolean apply(float[] inputValues) { return apply(inputValues, inputValues.length); } @Override public boolean apply(float[] inputValues, int length) { // we cannot do binary search since the multi-value columns are not sorted in the raw segment for (int i = 0; i < length; i++) { float inputValue = inputValues[i]; if (_nonMatchingValues.contains(inputValue)) { return false; } } return true; } } /** * No dictionary implementation of not-in predicate evaluator for DOUBLE data type. */ private static class DoubleNoDictionaryBasedNotInEvaluator extends BasePredicateEvaluator { DoubleSet _nonMatchingValues; public DoubleNoDictionaryBasedNotInEvaluator(NotInPredicate predicate) { _nonMatchingValues = new DoubleOpenHashSet(); for (String valueString : predicate.getNotInRange()) { _nonMatchingValues.add(Double.parseDouble(valueString)); } } @Override public boolean apply(double inputValue) { return (!_nonMatchingValues.contains(inputValue)); } @Override public boolean apply(double[] inputValues) { return apply(inputValues, inputValues.length); } @Override public boolean apply(double[] inputValues, int length) { // we cannot do binary search since the multi-value columns are not sorted in the raw segment for (int i = 0; i < length; i++) { double inputValue = inputValues[i]; if (_nonMatchingValues.contains(inputValue)) { return false; } } return true; } } /** * No dictionary implementation of not-in predicate evaluator for STRING data type. */ private static class StringNoDictionaryBasedNotInEvaluator extends BasePredicateEvaluator { Set<String> _nonMatchingValues; public StringNoDictionaryBasedNotInEvaluator(NotInPredicate predicate) { _nonMatchingValues = new HashSet<>(); Collections.addAll(_nonMatchingValues, predicate.getNotInRange()); } @Override public boolean apply(String inputValue) { return (!_nonMatchingValues.contains(inputValue)); } @Override public boolean apply(String[] inputValues) { return apply(inputValues, inputValues.length); } @Override public boolean apply(String[] inputValues, int length) { // we cannot do binary search since the multi-value columns are not sorted in the raw segment for (int i = 0; i < length; i++) { String inputValue = inputValues[i]; if (_nonMatchingValues.contains(inputValue)) { return false; } } return true; } } }