/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.join; import java.io.IOException; import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; import org.apache.lucene.search.SortField; import org.apache.lucene.search.SortedNumericSelector; import org.apache.lucene.search.SortedSetSelector; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; /** Select a value from a block of documents. * @lucene.internal */ public class BlockJoinSelector { private BlockJoinSelector() {} /** Type of selection to perform. If none of the documents in the block have * a value then no value will be selected. */ public enum Type { /** Only consider the minimum value from the block when sorting. */ MIN, /** Only consider the maximum value from the block when sorting. */ MAX; } /** Return a {@link Bits} instance that returns true if, and only if, any of * the children of the given parent document has a value. */ public static Bits wrap(final Bits docsWithValue, BitSet parents, BitSet children) { return new Bits() { @Override public boolean get(int docID) { assert parents.get(docID) : "this selector may only be used on parent documents"; if (docID == 0) { // no children return false; } final int firstChild = parents.prevSetBit(docID - 1) + 1; for (int child = children.nextSetBit(firstChild); child < docID; child = children.nextSetBit(child + 1)) { if (docsWithValue.get(child)) { return true; } } return false; } @Override public int length() { return docsWithValue.length(); } }; } /** Wraps the provided {@link SortedSetDocValues} in order to only select * one value per parent among its {@code children} using the configured * {@code selection} type. */ public static SortedDocValues wrap(SortedSetDocValues sortedSet, Type selection, BitSet parents, BitSet children) { SortedDocValues values; switch (selection) { case MIN: values = SortedSetSelector.wrap(sortedSet, SortedSetSelector.Type.MIN); break; case MAX: values = SortedSetSelector.wrap(sortedSet, SortedSetSelector.Type.MAX); break; default: throw new AssertionError(); } return wrap(values, selection, parents, children); } /** Wraps the provided {@link SortedDocValues} in order to only select * one value per parent among its {@code children} using the configured * {@code selection} type. */ public static SortedDocValues wrap(final SortedDocValues values, Type selection, BitSet parents, BitSet children) { if (values.docID() != -1) { throw new IllegalArgumentException("values iterator was already consumed: values.docID=" + values.docID()); } return new SortedDocValues() { private int ord = -1; private int docID = -1; @Override public int docID() { return docID; } @Override public int nextDoc() throws IOException { assert docID != NO_MORE_DOCS; if (values.docID() == -1) { if (values.nextDoc() == NO_MORE_DOCS) { docID = NO_MORE_DOCS; return docID; } } if (values.docID() == NO_MORE_DOCS) { docID = NO_MORE_DOCS; return docID; } int nextParentDocID = parents.nextSetBit(values.docID()); ord = values.ordValue(); while (true) { int childDocID = values.nextDoc(); assert childDocID != nextParentDocID; if (childDocID > nextParentDocID) { break; } if (children.get(childDocID) == false) { continue; } if (selection == Type.MIN) { ord = Math.min(ord, values.ordValue()); } else if (selection == Type.MAX) { ord = Math.max(ord, values.ordValue()); } else { throw new AssertionError(); } } docID = nextParentDocID; return docID; } @Override public int advance(int target) throws IOException { if (target >= parents.length()) { docID = NO_MORE_DOCS; return docID; } if (target == 0) { assert docID() == -1; return nextDoc(); } int prevParentDocID = parents.prevSetBit(target-1); if (values.docID() <= prevParentDocID) { values.advance(prevParentDocID+1); } return nextDoc(); } @Override public boolean advanceExact(int targetParentDocID) throws IOException { if (targetParentDocID < docID) { throw new IllegalArgumentException("target must be after the current document: current=" + docID + " target=" + targetParentDocID); } int previousDocId = docID; docID = targetParentDocID; if (targetParentDocID == previousDocId) { return ord != -1; } docID = targetParentDocID; ord = -1; if (parents.get(targetParentDocID) == false) { return false; } int prevParentDocId = docID == 0 ? -1 : parents.prevSetBit(docID - 1); int childDoc = values.docID(); if (childDoc <= prevParentDocId) { childDoc = values.advance(prevParentDocId + 1); } if (childDoc >= docID) { return false; } boolean hasValue = false; for (int doc = values.docID(); doc < docID; doc = values.nextDoc()) { if (children.get(doc)) { ord = values.ordValue(); hasValue = true; values.nextDoc(); break; } } if (hasValue == false) { return false; } for (int doc = values.docID(); doc < docID; doc = values.nextDoc()) { if (children.get(doc)) { switch (selection) { case MIN: ord = Math.min(ord, values.ordValue()); break; case MAX: ord = Math.max(ord, values.ordValue()); break; default: throw new AssertionError(); } } } return true; } @Override public int ordValue() { return ord; } @Override public BytesRef lookupOrd(int ord) throws IOException { return values.lookupOrd(ord); } @Override public int getValueCount() { return values.getValueCount(); } @Override public long cost() { return values.cost(); } }; } /** Wraps the provided {@link SortedNumericDocValues} in order to only select * one value per parent among its {@code children} using the configured * {@code selection} type. */ public static NumericDocValues wrap(SortedNumericDocValues sortedNumerics, Type selection, BitSet parents, BitSet children) { NumericDocValues values; switch (selection) { case MIN: values = SortedNumericSelector.wrap(sortedNumerics, SortedNumericSelector.Type.MIN, SortField.Type.LONG); break; case MAX: values = SortedNumericSelector.wrap(sortedNumerics, SortedNumericSelector.Type.MAX, SortField.Type.LONG); break; default: throw new AssertionError(); } return wrap(values, selection, parents, children); } /** Wraps the provided {@link NumericDocValues}, iterating over only * child documents, in order to only select one value per parent among * its {@code children} using the configured {@code selection} type. */ public static NumericDocValues wrap(final NumericDocValues values, Type selection, BitSet parents, BitSet children) { return new NumericDocValues() { private int parentDocID = -1; private long value; @Override public int nextDoc() throws IOException { if (parentDocID == -1) { values.nextDoc(); } while (true) { // TODO: make this crazy loop more efficient int childDocID = values.docID(); if (childDocID == NO_MORE_DOCS) { parentDocID = NO_MORE_DOCS; return parentDocID; } if (children.get(childDocID) == false) { values.nextDoc(); continue; } assert parents.get(childDocID) == false; parentDocID = parents.nextSetBit(childDocID); value = values.longValue(); while (true) { childDocID = values.nextDoc(); assert childDocID != parentDocID; if (childDocID > parentDocID) { break; } switch (selection) { case MIN: value = Math.min(value, values.longValue()); break; case MAX: value = Math.max(value, values.longValue()); break; default: throw new AssertionError(); } } break; } return parentDocID; } @Override public int advance(int targetParentDocID) throws IOException { if (targetParentDocID <= parentDocID) { throw new IllegalArgumentException("target must be after the current document: current=" + parentDocID + " target=" + targetParentDocID); } if (targetParentDocID == 0) { return nextDoc(); } int firstChild = parents.prevSetBit(targetParentDocID - 1) + 1; if (values.advance(firstChild) == NO_MORE_DOCS) { parentDocID = NO_MORE_DOCS; return parentDocID; } else { return nextDoc(); } } @Override public boolean advanceExact(int targetParentDocID) throws IOException { if (targetParentDocID <= parentDocID) { throw new IllegalArgumentException("target must be after the current document: current=" + parentDocID + " target=" + targetParentDocID); } parentDocID = targetParentDocID; if (parents.get(targetParentDocID) == false) { return false; } int prevParentDocId = parentDocID == 0 ? -1 : parents.prevSetBit(parentDocID - 1); int childDoc = values.docID(); if (childDoc <= prevParentDocId) { childDoc = values.advance(prevParentDocId + 1); } if (childDoc >= parentDocID) { return false; } boolean hasValue = false; for (int doc = values.docID(); doc < parentDocID; doc = values.nextDoc()) { if (children.get(doc)) { value = values.longValue(); hasValue = true; values.nextDoc(); break; } } if (hasValue == false) { return false; } for (int doc = values.docID(); doc < parentDocID; doc = values.nextDoc()) { if (children.get(doc)) { switch (selection) { case MIN: value = Math.min(value, values.longValue()); break; case MAX: value = Math.max(value, values.longValue()); break; default: throw new AssertionError(); } } } return true; } @Override public long longValue() { return value; } @Override public int docID() { return parentDocID; } @Override public long cost() { return values.cost(); } }; } }