/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.spatial.prefix; import java.io.IOException; import java.util.Arrays; import org.locationtech.spatial4j.shape.Shape; import org.locationtech.spatial4j.shape.SpatialRelation; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PostingsEnum; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.CellIterator; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.util.Bits; import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.RamUsageEstimator; import org.apache.lucene.util.SentinelIntSet; /** * Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains * CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}. * * @lucene.experimental */ public class ContainsPrefixTreeQuery extends AbstractPrefixTreeQuery { /** * If the spatial data for a document is comprised of multiple overlapping or adjacent parts, * it might fail to match a query shape when doing the CONTAINS predicate when the sum of * those shapes contain the query shape but none do individually. Set this to false to * increase performance if you don't care about that circumstance (such as if your indexed * data doesn't even have such conditions). See LUCENE-5062. */ protected final boolean multiOverlappingIndexedShapes; public ContainsPrefixTreeQuery(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel, boolean multiOverlappingIndexedShapes) { super(queryShape, fieldName, grid, detailLevel); this.multiOverlappingIndexedShapes = multiOverlappingIndexedShapes; } @Override public boolean equals(Object o) { if (!super.equals(o)) return false; return multiOverlappingIndexedShapes == ((ContainsPrefixTreeQuery)o).multiOverlappingIndexedShapes; } @Override public int hashCode() { return super.hashCode() + (multiOverlappingIndexedShapes ? 1 : 0); } @Override public String toString(String field) { return getClass().getSimpleName() + "(" + "fieldName=" + fieldName + "," + "queryShape=" + queryShape + "," + "detailLevel=" + detailLevel + "," + "multiOverlappingIndexedShapes=" + multiOverlappingIndexedShapes + ")"; } @Override protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException { return new ContainsVisitor(context).visit(grid.getWorldCell(), null); } private class ContainsVisitor extends BaseTermsEnumTraverser { public ContainsVisitor(LeafReaderContext context) throws IOException { super(context); if (termsEnum != null) { nextTerm();//advance to first } } BytesRef seekTerm = new BytesRef();//temp; see seek() BytesRef thisTerm;//current term in termsEnum Cell indexedCell;//the cell wrapper around thisTerm /** This is the primary algorithm; recursive. Returns null if finds none. */ private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException { if (thisTerm == null)//signals all done return null; // Get the AND of all child results (into combinedSubResults) SmallDocSet combinedSubResults = null; // Optimization: use null subCellsFilter when we know cell is within the query shape. Shape subCellsFilter = queryShape; if (cell.getLevel() != 0 && ((cell.getShapeRel() == null || cell.getShapeRel() == SpatialRelation.WITHIN))) { subCellsFilter = null; assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN; } CellIterator subCells = cell.getNextLevelCells(subCellsFilter); while (subCells.hasNext()) { Cell subCell = subCells.next(); if (!seek(subCell)) { combinedSubResults = null; } else if (subCell.getLevel() == detailLevel) { combinedSubResults = getDocs(subCell, acceptContains); } else if (!multiOverlappingIndexedShapes && subCell.getShapeRel() == SpatialRelation.WITHIN) { combinedSubResults = getLeafDocs(subCell, acceptContains); } else { //OR the leaf docs with all child results SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains); SmallDocSet subDocs = visit(subCell, acceptContains); //recursion combinedSubResults = union(leafDocs, subDocs); } if (combinedSubResults == null) break; acceptContains = combinedSubResults;//has the 'AND' effect on next iteration } return combinedSubResults; } private boolean seek(Cell cell) throws IOException { if (thisTerm == null) return false; final int compare = indexedCell.compareToNoLeaf(cell); if (compare > 0) { return false;//leap-frog effect } else if (compare == 0) { return true; // already there! } else {//compare > 0 //seek! seekTerm = cell.getTokenBytesNoLeaf(seekTerm); final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm); if (seekStatus == TermsEnum.SeekStatus.END) { thisTerm = null;//all done return false; } thisTerm = termsEnum.term(); indexedCell = grid.readCell(thisTerm, indexedCell); if (seekStatus == TermsEnum.SeekStatus.FOUND) { return true; } return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0; } } /** Get prefix & leaf docs at this cell. */ private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException { assert indexedCell.compareToNoLeaf(cell) == 0; //called when we've reached detailLevel. if (indexedCell.isLeaf()) {//only a leaf SmallDocSet result = collectDocs(acceptContains); nextTerm(); return result; } else { SmallDocSet docsAtPrefix = collectDocs(acceptContains); if (!nextTerm()) { return docsAtPrefix; } //collect leaf too if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) { SmallDocSet docsAtLeaf = collectDocs(acceptContains); nextTerm(); return union(docsAtPrefix, docsAtLeaf); } else { return docsAtPrefix; } } } /** Gets docs on the leaf of the given cell, _if_ there is a leaf cell, otherwise null. */ private SmallDocSet getLeafDocs(Cell cell, Bits acceptContains) throws IOException { assert indexedCell.compareToNoLeaf(cell) == 0; //Advance past prefix if we're at a prefix; return null if no leaf if (!indexedCell.isLeaf()) { if (!nextTerm() || !indexedCell.isLeaf() || indexedCell.getLevel() != cell.getLevel()) { return null; } } SmallDocSet result = collectDocs(acceptContains); nextTerm(); return result; } private boolean nextTerm() throws IOException { if ((thisTerm = termsEnum.next()) == null) return false; indexedCell = grid.readCell(thisTerm, indexedCell); return true; } private SmallDocSet union(SmallDocSet aSet, SmallDocSet bSet) { if (bSet != null) { if (aSet == null) return bSet; return aSet.union(bSet);//union is 'or' } return aSet; } private SmallDocSet collectDocs(Bits acceptContains) throws IOException { SmallDocSet set = null; postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE); int docid; while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { if (acceptContains != null && acceptContains.get(docid) == false) { continue; } if (set == null) { int size = termsEnum.docFreq(); if (size <= 0) size = 16; set = new SmallDocSet(size); } set.set(docid); } return set; } }//class ContainsVisitor /** A hash based mutable set of docIds. If this were Solr code then we might * use a combination of HashDocSet and SortedIntDocSet instead. */ // TODO use DocIdSetBuilder? private static class SmallDocSet extends DocIdSet implements Bits { private final SentinelIntSet intSet; private int maxInt = 0; public SmallDocSet(int size) { intSet = new SentinelIntSet(size, -1); } @Override public boolean get(int index) { return intSet.exists(index); } public void set(int index) { intSet.put(index); if (index > maxInt) maxInt = index; } /** Largest docid. */ @Override public int length() { return maxInt; } /** Number of docids. */ public int size() { return intSet.size(); } /** NOTE: modifies and returns either "this" or "other" */ public SmallDocSet union(SmallDocSet other) { SmallDocSet bigger; SmallDocSet smaller; if (other.intSet.size() > this.intSet.size()) { bigger = other; smaller = this; } else { bigger = this; smaller = other; } //modify bigger for (int v : smaller.intSet.keys) { if (v == smaller.intSet.emptyVal) continue; bigger.set(v); } return bigger; } @Override public Bits bits() throws IOException { //if the # of docids is super small, return null since iteration is going // to be faster return size() > 4 ? this : null; } @Override public DocIdSetIterator iterator() throws IOException { if (size() == 0) return null; //copy the unsorted values to a new array then sort them int d = 0; final int[] docs = new int[intSet.size()]; for (int v : intSet.keys) { if (v == intSet.emptyVal) continue; docs[d++] = v; } assert d == intSet.size(); final int size = d; //sort them Arrays.sort(docs, 0, size); return new DocIdSetIterator() { int idx = -1; @Override public int docID() { if (idx < 0) { return -1; } else if (idx < size) { return docs[idx]; } else { return NO_MORE_DOCS; } } @Override public int nextDoc() throws IOException { if (++idx < size) return docs[idx]; return NO_MORE_DOCS; } @Override public int advance(int target) throws IOException { //for this small set this is likely faster vs. a binary search // into the sorted array return slowAdvance(target); } @Override public long cost() { return size; } }; } @Override public long ramBytesUsed() { return RamUsageEstimator.alignObjectSize( RamUsageEstimator.NUM_BYTES_OBJECT_REF + Integer.BYTES) + intSet.ramBytesUsed(); } }//class SmallDocSet }