/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.spatial.prefix; import java.io.IOException; import java.util.Iterator; import org.locationtech.spatial4j.shape.Shape; import org.locationtech.spatial4j.shape.SpatialRelation; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.TermsEnum; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.CellIterator; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.util.BytesRef; /** * Traverses a {@link SpatialPrefixTree} indexed field, using the template and * visitor design patterns for subclasses to guide the traversal and collect * matching documents. * <p> * Subclasses implement {@link #getDocIdSet(org.apache.lucene.index.LeafReaderContext)} * by instantiating a custom {@link VisitorTemplate} subclass (i.e. an anonymous inner class) * and implement the required methods. * * @lucene.internal */ public abstract class AbstractVisitingPrefixTreeQuery extends AbstractPrefixTreeQuery { //Historical note: this code resulted from a refactoring of RecursivePrefixTreeQuery, // which in turn came out of SOLR-2155 //This class perhaps could have been implemented in terms of FilteredTermsEnum & MultiTermQuery. // Maybe so for simple Intersects predicate but not for when we want to collect terms // differently depending on cell state like IsWithin and for fuzzy/accurate collection planned improvements. At // least it would just make things more complicated. protected final int prefixGridScanLevel;//at least one less than grid.getMaxLevels() public AbstractVisitingPrefixTreeQuery(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel, int prefixGridScanLevel) { super(queryShape, fieldName, grid, detailLevel); this.prefixGridScanLevel = Math.max(0, Math.min(prefixGridScanLevel, grid.getMaxLevels() - 1)); assert detailLevel <= grid.getMaxLevels(); } /** * An abstract class designed to make it easy to implement predicates or * other operations on a {@link SpatialPrefixTree} indexed field. An instance * of this class is not designed to be re-used across LeafReaderContext * instances so simply create a new one per-leaf. * The {@link #getDocIdSet()} method here starts the work. It first checks * that there are indexed terms; if not it quickly returns null. Then it calls * {@link #start()} so a subclass can set up a return value, like an * {@link org.apache.lucene.util.FixedBitSet}. Then it starts the traversal * process, calling {@link #findSubCellsToVisit(org.apache.lucene.spatial.prefix.tree.Cell)} * which by default finds the top cells that intersect {@code queryShape}. If * there isn't an indexed cell for a corresponding cell returned for this * method then it's short-circuited until it finds one, at which point * {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)} is called. At * some depths, of the tree, the algorithm switches to a scanning mode that * calls {@link #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)} * for each leaf cell found. * * @lucene.internal */ public abstract class VisitorTemplate extends BaseTermsEnumTraverser { /* Future potential optimizations: * Can a polygon query shape be optimized / made-simpler at recursive depths (e.g. intersection of shape + cell box) * RE "scan" vs divide & conquer performance decision: We should use termsEnum.docFreq() as an estimate on the number of places at this depth. It would be nice if termsEnum knew how many terms start with the current term without having to repeatedly next() & test to find out. * Perhaps don't do intermediate seek()'s to cells above detailLevel that have Intersects relation because we won't be collecting those docs any way. However seeking does act as a short-circuit. So maybe do some percent of the time or when the level is above some threshold. */ // // TODO MAJOR REFACTOR SIMPLIFICATION BASED ON TreeCellIterator TODO // private VNode curVNode;//current pointer, derived from query shape private BytesRef curVNodeTerm = new BytesRef();//curVNode.cell's term, without leaf. in main loop only private BytesRef thisTerm;//the result of termsEnum.term() private Cell indexedCell;//Cell wrapper of thisTerm. Always updated when thisTerm is. public VisitorTemplate(LeafReaderContext context) throws IOException { super(context); } public DocIdSet getDocIdSet() throws IOException { assert curVNode == null : "Called more than once?"; if (termsEnum == null) return null; if (!nextTerm()) {//advances return null; } curVNode = new VNode(null); curVNode.reset(grid.getWorldCell()); start(); addIntersectingChildren(); main: while (thisTerm != null) {//terminates for other reasons too! //Advance curVNode pointer if (curVNode.children != null) { //-- HAVE CHILDREN: DESCEND assert curVNode.children.hasNext();//if we put it there then it has something preSiblings(curVNode); curVNode = curVNode.children.next(); } else { //-- NO CHILDREN: ADVANCE TO NEXT SIBLING VNode parentVNode = curVNode.parent; while (true) { if (parentVNode == null) break main; // all done if (parentVNode.children.hasNext()) { //advance next sibling curVNode = parentVNode.children.next(); break; } else { //reached end of siblings; pop up postSiblings(parentVNode); parentVNode.children = null;//GC parentVNode = parentVNode.parent; } } } //Seek to curVNode's cell (or skip if termsEnum has moved beyond) final int compare = indexedCell.compareToNoLeaf(curVNode.cell); if (compare > 0) { // The indexed cell is after; continue loop to next query cell continue; } if (compare < 0) { // The indexed cell is before; seek ahead to query cell: // Seek ! curVNode.cell.getTokenBytesNoLeaf(curVNodeTerm); TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(curVNodeTerm); if (seekStatus == TermsEnum.SeekStatus.END) break; // all done thisTerm = termsEnum.term(); indexedCell = grid.readCell(thisTerm, indexedCell); if (seekStatus == TermsEnum.SeekStatus.NOT_FOUND) { // Did we find a leaf of the cell we were looking for or something after? if (!indexedCell.isLeaf() || indexedCell.compareToNoLeaf(curVNode.cell) != 0) continue; // The indexed cell is after; continue loop to next query cell } } // indexedCell == queryCell (disregarding leaf). // If indexedCell is a leaf then there's no prefix (prefix sorts before) -- just visit and continue if (indexedCell.isLeaf()) { visitLeaf(indexedCell);//TODO or query cell? Though shouldn't matter. if (!nextTerm()) break; continue; } // If a prefix (non-leaf) then visit; see if we descend. final boolean descend = visitPrefix(curVNode.cell);//need to use curVNode.cell not indexedCell if (!nextTerm()) break; // Check for adjacent leaf with the same prefix if (indexedCell.isLeaf() && indexedCell.getLevel() == curVNode.cell.getLevel()) { visitLeaf(indexedCell);//TODO or query cell? Though shouldn't matter. if (!nextTerm()) break; } if (descend) { addIntersectingChildren(); } }//main loop return finish(); } /** Called initially, and whenever {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)} * returns true. */ private void addIntersectingChildren() throws IOException { assert thisTerm != null; Cell cell = curVNode.cell; if (cell.getLevel() >= detailLevel) throw new IllegalStateException("Spatial logic error"); //Decide whether to continue to divide & conquer, or whether it's time to // scan through terms beneath this cell. // Scanning is a performance optimization trade-off. //TODO use termsEnum.docFreq() as heuristic boolean scan = cell.getLevel() >= prefixGridScanLevel;//simple heuristic if (!scan) { //Divide & conquer (ultimately termsEnum.seek()) Iterator<Cell> subCellsIter = findSubCellsToVisit(cell); if (!subCellsIter.hasNext())//not expected return; curVNode.children = new VNodeCellIterator(subCellsIter, new VNode(curVNode)); } else { //Scan (loop of termsEnum.next()) scan(detailLevel); } } /** * Called when doing a divide and conquer to find the next intersecting cells * of the query shape that are beneath {@code cell}. {@code cell} is * guaranteed to have an intersection and thus this must return some number * of nodes. */ protected CellIterator findSubCellsToVisit(Cell cell) { return cell.getNextLevelCells(queryShape); } /** * Scans ({@code termsEnum.next()}) terms until a term is found that does * not start with curVNode's cell. If it finds a leaf cell or a cell at * level {@code scanDetailLevel} then it calls {@link * #visitScanned(org.apache.lucene.spatial.prefix.tree.Cell)}. */ protected void scan(int scanDetailLevel) throws IOException { //note: this can be a do-while instead in 6x; 5x has a back-compat with redundant leaves -- LUCENE-4942 while (curVNode.cell.isPrefixOf(indexedCell)) { if (indexedCell.getLevel() == scanDetailLevel || (indexedCell.getLevel() < scanDetailLevel && indexedCell.isLeaf())) { visitScanned(indexedCell); } //advance if (!nextTerm()) break; } } private boolean nextTerm() throws IOException { if ((thisTerm = termsEnum.next()) == null) return false; indexedCell = grid.readCell(thisTerm, indexedCell); return true; } /** Used for {@link VNode#children}. */ private class VNodeCellIterator implements Iterator<VNode> { final Iterator<Cell> cellIter; private final VNode vNode; VNodeCellIterator(Iterator<Cell> cellIter, VNode vNode) { this.cellIter = cellIter; this.vNode = vNode; } @Override public boolean hasNext() { return cellIter.hasNext(); } @Override public VNode next() { assert hasNext(); vNode.reset(cellIter.next()); return vNode; } @Override public void remove() {//it always removes } } /** Called first to setup things. */ protected abstract void start() throws IOException; /** Called last to return the result. */ protected abstract DocIdSet finish() throws IOException; /** * Visit an indexed non-leaf cell. The presence of a prefix cell implies * there are leaf cells at further levels. The cell passed should have it's * {@link org.apache.lucene.spatial.prefix.tree.Cell#getShapeRel()} set * relative to the filtered shape. * * @param cell An intersecting cell; not a leaf. * @return true to descend to more levels. */ protected abstract boolean visitPrefix(Cell cell) throws IOException; /** * Called when an indexed leaf cell is found. An * indexed leaf cell usually means associated documents won't be found at * further detail levels. However, if a document has * multiple overlapping shapes at different resolutions, then this isn't true. */ protected abstract void visitLeaf(Cell cell) throws IOException; /** * The cell is either indexed as a leaf or is the last level of detail. It * might not even intersect the query shape, so be sure to check for that. * The default implementation will check that and if passes then call * {@link #visitLeaf(org.apache.lucene.spatial.prefix.tree.Cell)} or * {@link #visitPrefix(org.apache.lucene.spatial.prefix.tree.Cell)}. */ protected void visitScanned(Cell cell) throws IOException { final SpatialRelation relate = cell.getShape().relate(queryShape); if (relate.intersects()) { cell.setShapeRel(relate);//just being pedantic if (cell.isLeaf()) { visitLeaf(cell); } else { visitPrefix(cell); } } } protected void preSiblings(VNode vNode) throws IOException { } protected void postSiblings(VNode vNode) throws IOException { } }//class VisitorTemplate /** * A visitor node/cell found via the query shape for {@link VisitorTemplate}. * Sometimes these are reset(cell). It's like a LinkedList node but forms a * tree. * * @lucene.internal */ protected static class VNode { //Note: The VNode tree adds more code to debug/maintain v.s. a flattened // LinkedList that we used to have. There is more opportunity here for // custom behavior (see preSiblings & postSiblings) but that's not // leveraged yet. Maybe this is slightly more GC friendly. final VNode parent;//only null at the root Iterator<VNode> children;//null, then sometimes set, then null Cell cell;//not null (except initially before reset()) /** * call reset(cell) after to set the cell. */ VNode(VNode parent) { // remember to call reset(cell) after this.parent = parent; } void reset(Cell cell) { assert cell != null; this.cell = cell; assert children == null; } } }