/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.core.operator.docidsets; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicLong; import org.roaringbitmap.IntIterator; import org.roaringbitmap.buffer.ImmutableRoaringBitmap; import org.roaringbitmap.buffer.MutableRoaringBitmap; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.linkedin.pinot.common.utils.Pairs.IntPair; import com.linkedin.pinot.core.common.BlockDocIdIterator; import com.linkedin.pinot.core.common.BlockDocIdSet; import com.linkedin.pinot.core.common.Constants; import com.linkedin.pinot.core.operator.dociditerators.AndDocIdIterator; import com.linkedin.pinot.core.operator.dociditerators.BitmapDocIdIterator; import com.linkedin.pinot.core.operator.dociditerators.RangelessBitmapDocIdIterator; import com.linkedin.pinot.core.operator.dociditerators.ScanBasedDocIdIterator; import com.linkedin.pinot.core.operator.filter.AndOperator; import com.linkedin.pinot.core.util.SortedRangeIntersection; public final class AndBlockDocIdSet implements FilterBlockDocIdSet { /** * */ static final Logger LOGGER = LoggerFactory.getLogger(AndOperator.class); public final AtomicLong timeMeasure = new AtomicLong(0); private List<FilterBlockDocIdSet> blockDocIdSets; private int minDocId = Integer.MIN_VALUE; private int maxDocId = Integer.MAX_VALUE; MutableRoaringBitmap answer = null; boolean validate = false; public AndBlockDocIdSet(List<FilterBlockDocIdSet> blockDocIdSets) { this.blockDocIdSets = blockDocIdSets; updateMinMaxRange(); } private void updateMinMaxRange() { for (FilterBlockDocIdSet blockDocIdSet : blockDocIdSets) { minDocId = Math.max(minDocId, blockDocIdSet.getMinDocId()); maxDocId = Math.min(maxDocId, blockDocIdSet.getMaxDocId()); } for (FilterBlockDocIdSet blockDocIdSet : blockDocIdSets) { blockDocIdSet.setStartDocId(minDocId); blockDocIdSet.setEndDocId(maxDocId); } } @Override public BlockDocIdIterator iterator() { //TODO: Remove this validation code once we have enough testing if (validate) { BlockDocIdIterator slowIterator = slowIterator(); BlockDocIdIterator fastIterator = fastIterator(); List<Integer> matchedIds = new ArrayList<>(); while (true) { int docId1 = slowIterator.next(); int docId2 = fastIterator.next(); if (docId1 != docId2) { LOGGER.error("ERROR docId1:" + docId1 + " docId2:" + docId2); } else { matchedIds.add(docId1); } if (docId1 == Constants.EOF || docId2 == Constants.EOF) { break; } } answer = null; } return fastIterator(); } public BlockDocIdIterator slowIterator() { List<BlockDocIdIterator> rawIterators = new ArrayList<>(); boolean useBitmapBasedIntersection = false; for (BlockDocIdSet docIdSet : blockDocIdSets) { if (docIdSet instanceof BitmapDocIdSet) { useBitmapBasedIntersection = true; } } BlockDocIdIterator[] docIdIterators; if (useBitmapBasedIntersection) { List<ImmutableRoaringBitmap> allBitmaps = new ArrayList<ImmutableRoaringBitmap>(); for (BlockDocIdSet docIdSet : blockDocIdSets) { if (docIdSet instanceof SortedDocIdSet) { MutableRoaringBitmap bitmap = new MutableRoaringBitmap(); SortedDocIdSet sortedDocIdSet = (SortedDocIdSet) docIdSet; List<IntPair> pairs = sortedDocIdSet.getRaw(); for (IntPair pair : pairs) { bitmap.add(pair.getLeft(), pair.getRight() + 1); // add takes [start, end) i.e inclusive // start, exclusive end. } allBitmaps.add(bitmap); } else if (docIdSet instanceof BitmapDocIdSet) { BitmapDocIdSet bitmapDocIdSet = (BitmapDocIdSet) docIdSet; ImmutableRoaringBitmap childBitmap = bitmapDocIdSet.getRaw(); allBitmaps.add(childBitmap); } else { BlockDocIdIterator iterator = docIdSet.iterator(); rawIterators.add(iterator); } } IntIterator intIterator; if (allBitmaps.size() > 1) { MutableRoaringBitmap answer = (MutableRoaringBitmap) allBitmaps.get(0).clone(); for (int i = 1; i < allBitmaps.size(); i++) { answer.and(allBitmaps.get(i)); } intIterator = answer.getIntIterator(); } else { intIterator = allBitmaps.get(0).getIntIterator(); } BitmapDocIdIterator singleBitmapBlockIdIterator = new BitmapDocIdIterator(intIterator); singleBitmapBlockIdIterator.setStartDocId(minDocId); singleBitmapBlockIdIterator.setEndDocId(maxDocId); rawIterators.add(0, singleBitmapBlockIdIterator); docIdIterators = new BlockDocIdIterator[rawIterators.size()]; rawIterators.toArray(docIdIterators); } else { docIdIterators = new BlockDocIdIterator[blockDocIdSets.size()]; for (int srcId = 0; srcId < blockDocIdSets.size(); srcId++) { docIdIterators[srcId] = blockDocIdSets.get(srcId).iterator(); } } return new AndDocIdIterator(docIdIterators); } public BlockDocIdIterator fastIterator() { long start = System.currentTimeMillis(); List<List<IntPair>> sortedRangeSets = new ArrayList<>(); List<ImmutableRoaringBitmap> childBitmaps = new ArrayList<ImmutableRoaringBitmap>(); List<FilterBlockDocIdSet> scanBasedDocIdSets = new ArrayList<>(); List<BlockDocIdIterator> remainingIterators = new ArrayList<>(); for (BlockDocIdSet docIdSet : blockDocIdSets) { if (docIdSet instanceof SortedDocIdSet) { SortedDocIdSet sortedDocIdSet = (SortedDocIdSet) docIdSet; List<IntPair> pairs = sortedDocIdSet.getRaw(); sortedRangeSets.add(pairs); } else if (docIdSet instanceof BitmapDocIdSet) { BitmapDocIdSet bitmapDocIdSet = (BitmapDocIdSet) docIdSet; ImmutableRoaringBitmap childBitmap = bitmapDocIdSet.getRaw(); childBitmaps.add(childBitmap); } else if (docIdSet instanceof ScanBasedSingleValueDocIdSet) { scanBasedDocIdSets.add((ScanBasedSingleValueDocIdSet) docIdSet); } else if (docIdSet instanceof ScanBasedMultiValueDocIdSet) { scanBasedDocIdSets.add((ScanBasedMultiValueDocIdSet) docIdSet); } else { // TODO:handle child OR/AND as bitmap if possible remainingIterators.add(docIdSet.iterator()); } } if (childBitmaps.size() == 0 && sortedRangeSets.size() == 0) { // When one or more of the operands are operators themselves, then we don't have a sorted or // bitmap index. In that case, just use the AndDocIdIterator to iterate over all of of the subtree. BlockDocIdIterator[] docIdIterators = new BlockDocIdIterator[blockDocIdSets.size()]; for (int srcId = 0; srcId < blockDocIdSets.size(); srcId++) { docIdIterators[srcId] = blockDocIdSets.get(srcId).iterator(); } return new AndDocIdIterator(docIdIterators); } else { // handle sorted ranges // TODO: will be nice to re-order sorted and bitmap index based on size if (sortedRangeSets.size() > 0) { List<IntPair> pairList; pairList = SortedRangeIntersection.intersectSortedRangeSets(sortedRangeSets); answer = new MutableRoaringBitmap(); for (IntPair pair : pairList) { // end is exclusive answer.add(pair.getLeft(), pair.getRight() + 1); } } // handle bitmaps if (childBitmaps.size() > 0) { if (answer == null) { answer = childBitmaps.get(0).toMutableRoaringBitmap(); for (int i = 1; i < childBitmaps.size(); i++) { answer.and(childBitmaps.get(i)); } } else { for (int i = 0; i < childBitmaps.size(); i++) { answer.and(childBitmaps.get(i)); } } } // At this point, we must have 'answer' to be non-null. assert (answer != null) : "sortedRangeSets=" + sortedRangeSets.size() + ",childBitmaps=" + childBitmaps.size(); // handle raw iterators for (FilterBlockDocIdSet scanBasedDocIdSet : scanBasedDocIdSets) { ScanBasedDocIdIterator iterator = (ScanBasedDocIdIterator) scanBasedDocIdSet.iterator(); MutableRoaringBitmap scanAnswer = iterator.applyAnd(answer); answer.and(scanAnswer); } long end = System.currentTimeMillis(); LOGGER.debug("Time to evaluate and Filter:{}", (end - start)); // if other iterators exists resort to iterator style intersection BlockDocIdIterator answerDocIdIterator = new RangelessBitmapDocIdIterator(answer.getIntIterator()); if (remainingIterators.size() == 0) { return answerDocIdIterator; } else { BlockDocIdIterator[] docIdIterators = new BlockDocIdIterator[remainingIterators.size() + 1]; docIdIterators[0] = answerDocIdIterator; for (int i = 0; i < remainingIterators.size(); i++) { docIdIterators[i + 1] = remainingIterators.get(i); } return new AndDocIdIterator(docIdIterators); } } } @SuppressWarnings("unchecked") @Override public <T> T getRaw() { return (T) answer; } @Override public int getMinDocId() { return minDocId; } @Override public int getMaxDocId() { return maxDocId; } @Override public void setStartDocId(int startDocId) { minDocId = Math.max(minDocId, startDocId); updateMinMaxRange(); } @Override public void setEndDocId(int endDocId) { maxDocId = Math.min(maxDocId, endDocId); updateMinMaxRange(); } @Override public long getNumEntriesScannedInFilter() { long numEntriesScannedInFilter = 0L; for (FilterBlockDocIdSet blockDocIdSet : blockDocIdSets) { numEntriesScannedInFilter += blockDocIdSet.getNumEntriesScannedInFilter(); } return numEntriesScannedInFilter; } }