package org.apache.lucene.spatial.prefix; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import com.spatial4j.core.context.SpatialContext; import com.spatial4j.core.distance.DistanceUtils; import com.spatial4j.core.shape.Circle; import com.spatial4j.core.shape.Point; import com.spatial4j.core.shape.Rectangle; import com.spatial4j.core.shape.Shape; import com.spatial4j.core.shape.SpatialRelation; import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.spatial.prefix.tree.Cell; import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree; import org.apache.lucene.util.Bits; import org.apache.lucene.util.FixedBitSet; import java.io.IOException; import java.util.Collection; import java.util.Iterator; /** * Finds docs where its indexed shape is {@link org.apache.lucene.spatial.query.SpatialOperation#IsWithin * WITHIN} the query shape. It works by looking at cells outside of the query * shape to ensure documents there are excluded. By default, it will * examine all cells, and it's fairly slow. If you know that the indexed shapes * are never comprised of multiple disjoint parts (which also means it is not multi-valued), * then you can pass {@code SpatialPrefixTree.getDistanceForLevel(maxLevels)} as * the {@code queryBuffer} constructor parameter to minimally look this distance * beyond the query shape's edge. Even if the indexed shapes are sometimes * comprised of multiple disjoint parts, you might want to use this option with * a large buffer as a faster approximation with minimal false-positives. * * @lucene.experimental */ //TODO LUCENE-4869: implement faster algorithm based on filtering out false-positives of a // minimal query buffer by looking in a DocValues cache holding a representative // point of each disjoint component of a document's shape(s). public class WithinPrefixTreeFilter extends AbstractVisitingPrefixTreeFilter { private final Shape bufferedQueryShape;//if null then the whole world /** * See {@link AbstractVisitingPrefixTreeFilter#AbstractVisitingPrefixTreeFilter(com.spatial4j.core.shape.Shape, String, org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree, int, int)}. * {@code queryBuffer} is the (minimum) distance beyond the query shape edge * where non-matching documents are looked for so they can be excluded. If * -1 is used then the whole world is examined (a good default for correctness). */ public WithinPrefixTreeFilter(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel, int prefixGridScanLevel, double queryBuffer) { super(queryShape, fieldName, grid, detailLevel, prefixGridScanLevel); if (queryBuffer == -1) this.bufferedQueryShape = null; else this.bufferedQueryShape = bufferShape(queryShape, queryBuffer); } /** Returns a new shape that is larger than shape by at distErr. */ //TODO move this generic code elsewhere? Spatial4j? protected Shape bufferShape(Shape shape, double distErr) { if (distErr <= 0) throw new IllegalArgumentException("distErr must be > 0"); SpatialContext ctx = grid.getSpatialContext(); if (shape instanceof Point) { return ctx.makeCircle((Point)shape, distErr); } else if (shape instanceof Circle) { Circle circle = (Circle) shape; double newDist = circle.getRadius() + distErr; if (ctx.isGeo() && newDist > 180) newDist = 180; return ctx.makeCircle(circle.getCenter(), newDist); } else { Rectangle bbox = shape.getBoundingBox(); double newMinX = bbox.getMinX() - distErr; double newMaxX = bbox.getMaxX() + distErr; double newMinY = bbox.getMinY() - distErr; double newMaxY = bbox.getMaxY() + distErr; if (ctx.isGeo()) { if (newMinY < -90) newMinY = -90; if (newMaxY > 90) newMaxY = 90; if (newMinY == -90 || newMaxY == 90 || bbox.getWidth() + 2*distErr > 360) { newMinX = -180; newMaxX = 180; } else { newMinX = DistanceUtils.normLonDEG(newMinX); newMaxX = DistanceUtils.normLonDEG(newMaxX); } } else { //restrict to world bounds newMinX = Math.max(newMinX, ctx.getWorldBounds().getMinX()); newMaxX = Math.min(newMaxX, ctx.getWorldBounds().getMaxX()); newMinY = Math.max(newMinY, ctx.getWorldBounds().getMinY()); newMaxY = Math.min(newMaxY, ctx.getWorldBounds().getMaxY()); } return ctx.makeRectangle(newMinX, newMaxX, newMinY, newMaxY); } } @Override public DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException { return new VisitorTemplate(context, acceptDocs, true) { private FixedBitSet inside; private FixedBitSet outside; private SpatialRelation visitRelation; @Override protected void start() { inside = new FixedBitSet(maxDoc); outside = new FixedBitSet(maxDoc); } @Override protected DocIdSet finish() { inside.andNot(outside); return inside; } @Override protected Iterator<Cell> findSubCellsToVisit(Cell cell) { //use buffered query shape instead of orig. Works with null too. return cell.getSubCells(bufferedQueryShape).iterator(); } @Override protected boolean visit(Cell cell) throws IOException { //cell.relate is based on the bufferedQueryShape; we need to examine what // the relation is against the queryShape visitRelation = cell.getShape().relate(queryShape); if (visitRelation == SpatialRelation.WITHIN) { collectDocs(inside); return false; } else if (visitRelation == SpatialRelation.DISJOINT) { collectDocs(outside); return false; } else if (cell.getLevel() == detailLevel) { collectDocs(inside); return false; } return true; } @Override protected void visitLeaf(Cell cell) throws IOException { //visitRelation is declared as a field, populated by visit() so we don't recompute it assert detailLevel != cell.getLevel(); assert visitRelation == cell.getShape().relate(queryShape); if (allCellsIntersectQuery(cell, visitRelation)) collectDocs(inside); else collectDocs(outside); } /** Returns true if the provided cell, and all its sub-cells down to * detailLevel all intersect the queryShape. */ private boolean allCellsIntersectQuery(Cell cell, SpatialRelation relate/*cell to query*/) { if (relate == null) relate = cell.getShape().relate(queryShape); if (cell.getLevel() == detailLevel) return relate.intersects(); if (relate == SpatialRelation.WITHIN) return true; if (relate == SpatialRelation.DISJOINT) return false; // Note: Generating all these cells just to determine intersection is not ideal. // It was easy to implement but could be optimized. For example if the docs // in question are already marked in the 'outside' bitset then it can be avoided. Collection<Cell> subCells = cell.getSubCells(null); for (Cell subCell : subCells) { if (!allCellsIntersectQuery(subCell, null))//recursion return false; } return true; } @Override protected void visitScanned(Cell cell) throws IOException { if (allCellsIntersectQuery(cell, null)) { collectDocs(inside); } else { collectDocs(outside); } } }.getDocIdSet(); } }