ContainsPrefixTreeQuery.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.spatial.prefix;

import java.io.IOException;
import java.util.Arrays;

import org.locationtech.spatial4j.shape.Shape;
import org.locationtech.spatial4j.shape.SpatialRelation;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSet;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.spatial.prefix.tree.Cell;
import org.apache.lucene.spatial.prefix.tree.CellIterator;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.SentinelIntSet;

/**
 * Finds docs where its indexed shape {@link org.apache.lucene.spatial.query.SpatialOperation#Contains
 * CONTAINS} the query shape. For use on {@link RecursivePrefixTreeStrategy}.
 *
 * @lucene.experimental
 */
public class ContainsPrefixTreeQuery extends AbstractPrefixTreeQuery {

  /**
   * If the spatial data for a document is comprised of multiple overlapping or adjacent parts,
   * it might fail to match a query shape when doing the CONTAINS predicate when the sum of
   * those shapes contain the query shape but none do individually.  Set this to false to
   * increase performance if you don't care about that circumstance (such as if your indexed
   * data doesn't even have such conditions).  See LUCENE-5062.
   */
  protected final boolean multiOverlappingIndexedShapes;

  public ContainsPrefixTreeQuery(Shape queryShape, String fieldName, SpatialPrefixTree grid, int detailLevel, boolean multiOverlappingIndexedShapes) {
    super(queryShape, fieldName, grid, detailLevel);
    this.multiOverlappingIndexedShapes = multiOverlappingIndexedShapes;
  }

  @Override
  public boolean equals(Object o) {
    if (!super.equals(o))
      return false;
    return multiOverlappingIndexedShapes == ((ContainsPrefixTreeQuery)o).multiOverlappingIndexedShapes;
  }

  @Override
  public int hashCode() {
    return super.hashCode() + (multiOverlappingIndexedShapes ? 1 : 0);
  }

  @Override
  public String toString(String field) {
    return getClass().getSimpleName() + "(" +
        "fieldName=" + fieldName + "," +
        "queryShape=" + queryShape + "," +
        "detailLevel=" + detailLevel + "," +
        "multiOverlappingIndexedShapes=" + multiOverlappingIndexedShapes +
        ")";
  }

  @Override
  protected DocIdSet getDocIdSet(LeafReaderContext context) throws IOException {
    return new ContainsVisitor(context).visit(grid.getWorldCell(), null);
  }

  private class ContainsVisitor extends BaseTermsEnumTraverser {

    public ContainsVisitor(LeafReaderContext context) throws IOException {
      super(context);
      if (termsEnum != null) {
        nextTerm();//advance to first
      }
    }

    BytesRef seekTerm = new BytesRef();//temp; see seek()
    BytesRef thisTerm;//current term in termsEnum
    Cell indexedCell;//the cell wrapper around thisTerm

    /** This is the primary algorithm; recursive.  Returns null if finds none. */
    private SmallDocSet visit(Cell cell, Bits acceptContains) throws IOException {

      if (thisTerm == null)//signals all done
        return null;

      // Get the AND of all child results (into combinedSubResults)
      SmallDocSet combinedSubResults = null;
      //   Optimization: use null subCellsFilter when we know cell is within the query shape.
      Shape subCellsFilter = queryShape;
      if (cell.getLevel() != 0 && ((cell.getShapeRel() == null || cell.getShapeRel() == SpatialRelation.WITHIN))) {
        subCellsFilter = null;
        assert cell.getShape().relate(queryShape) == SpatialRelation.WITHIN;
      }
      CellIterator subCells = cell.getNextLevelCells(subCellsFilter);
      while (subCells.hasNext()) {
        Cell subCell = subCells.next();
        if (!seek(subCell)) {
          combinedSubResults = null;
        } else if (subCell.getLevel() == detailLevel) {
          combinedSubResults = getDocs(subCell, acceptContains);
        } else if (!multiOverlappingIndexedShapes &&
            subCell.getShapeRel() == SpatialRelation.WITHIN) {
          combinedSubResults = getLeafDocs(subCell, acceptContains);
        } else {
          //OR the leaf docs with all child results
          SmallDocSet leafDocs = getLeafDocs(subCell, acceptContains);
          SmallDocSet subDocs = visit(subCell, acceptContains); //recursion
          combinedSubResults = union(leafDocs, subDocs);
        }

        if (combinedSubResults == null)
          break;
        acceptContains = combinedSubResults;//has the 'AND' effect on next iteration
      }

      return combinedSubResults;
    }

    private boolean seek(Cell cell) throws IOException {
      if (thisTerm == null)
        return false;
      final int compare = indexedCell.compareToNoLeaf(cell);
      if (compare > 0) {
        return false;//leap-frog effect
      } else if (compare == 0) {
        return true; // already there!
      } else {//compare > 0
        //seek!
        seekTerm = cell.getTokenBytesNoLeaf(seekTerm);
        final TermsEnum.SeekStatus seekStatus = termsEnum.seekCeil(seekTerm);
        if (seekStatus == TermsEnum.SeekStatus.END) {
          thisTerm = null;//all done
          return false;
        }
        thisTerm = termsEnum.term();
        indexedCell = grid.readCell(thisTerm, indexedCell);
        if (seekStatus == TermsEnum.SeekStatus.FOUND) {
          return true;
        }
        return indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0;
      }
    }

    /** Get prefix & leaf docs at this cell. */
    private SmallDocSet getDocs(Cell cell, Bits acceptContains) throws IOException {
      assert indexedCell.compareToNoLeaf(cell) == 0;
      //called when we've reached detailLevel.
      if (indexedCell.isLeaf()) {//only a leaf
        SmallDocSet result = collectDocs(acceptContains);
        nextTerm();
        return result;
      } else {
        SmallDocSet docsAtPrefix = collectDocs(acceptContains);
        if (!nextTerm()) {
          return docsAtPrefix;
        }
        //collect leaf too
        if (indexedCell.isLeaf() && indexedCell.compareToNoLeaf(cell) == 0) {
          SmallDocSet docsAtLeaf = collectDocs(acceptContains);
          nextTerm();
          return union(docsAtPrefix, docsAtLeaf);
        } else {
          return docsAtPrefix;
        }
      }
    }

    /** Gets docs on the leaf of the given cell, _if_ there is a leaf cell, otherwise null. */
    private SmallDocSet getLeafDocs(Cell cell, Bits acceptContains) throws IOException {
      assert indexedCell.compareToNoLeaf(cell) == 0;
      //Advance past prefix if we're at a prefix; return null if no leaf
      if (!indexedCell.isLeaf()) {
        if (!nextTerm() || !indexedCell.isLeaf() || indexedCell.getLevel() != cell.getLevel()) {
          return null;
        }
      }
      SmallDocSet result = collectDocs(acceptContains);
      nextTerm();
      return result;
    }

    private boolean nextTerm() throws IOException {
      if ((thisTerm = termsEnum.next()) == null)
        return false;
      indexedCell = grid.readCell(thisTerm, indexedCell);
      return true;
    }

    private SmallDocSet union(SmallDocSet aSet, SmallDocSet bSet) {
      if (bSet != null) {
        if (aSet == null)
          return bSet;
        return aSet.union(bSet);//union is 'or'
      }
      return aSet;
    }

    private SmallDocSet collectDocs(Bits acceptContains) throws IOException {
      SmallDocSet set = null;

      postingsEnum = termsEnum.postings(postingsEnum, PostingsEnum.NONE);
      int docid;
      while ((docid = postingsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
        if (acceptContains != null && acceptContains.get(docid) == false) {
          continue;
        }
        if (set == null) {
          int size = termsEnum.docFreq();
          if (size <= 0)
            size = 16;
          set = new SmallDocSet(size);
        }
        set.set(docid);
      }
      return set;
    }

  }//class ContainsVisitor

  /** A hash based mutable set of docIds. If this were Solr code then we might
   * use a combination of HashDocSet and SortedIntDocSet instead. */
  // TODO use DocIdSetBuilder?
  private static class SmallDocSet extends DocIdSet implements Bits {

    private final SentinelIntSet intSet;
    private int maxInt = 0;

    public SmallDocSet(int size) {
      intSet = new SentinelIntSet(size, -1);
    }

    @Override
    public boolean get(int index) {
      return intSet.exists(index);
    }

    public void set(int index) {
      intSet.put(index);
      if (index > maxInt)
        maxInt = index;
    }

    /** Largest docid. */
    @Override
    public int length() {
      return maxInt;
    }

    /** Number of docids. */
    public int size() {
      return intSet.size();
    }

    /** NOTE: modifies and returns either "this" or "other" */
    public SmallDocSet union(SmallDocSet other) {
      SmallDocSet bigger;
      SmallDocSet smaller;
      if (other.intSet.size() > this.intSet.size()) {
        bigger = other;
        smaller = this;
      } else {
        bigger = this;
        smaller = other;
      }
      //modify bigger
      for (int v : smaller.intSet.keys) {
        if (v == smaller.intSet.emptyVal)
          continue;
        bigger.set(v);
      }
      return bigger;
    }

    @Override
    public Bits bits() throws IOException {
      //if the # of docids is super small, return null since iteration is going
      // to be faster
      return size() > 4 ? this : null;
    }

    @Override
    public DocIdSetIterator iterator() throws IOException {
      if (size() == 0)
        return null;
      //copy the unsorted values to a new array then sort them
      int d = 0;
      final int[] docs = new int[intSet.size()];
      for (int v : intSet.keys) {
        if (v == intSet.emptyVal)
          continue;
        docs[d++] = v;
      }
      assert d == intSet.size();
      final int size = d;

      //sort them
      Arrays.sort(docs, 0, size);

      return new DocIdSetIterator() {
        int idx = -1;
        @Override
        public int docID() {
          if (idx < 0) {
            return -1;
          } else if (idx < size) {
            return docs[idx];
          } else {
            return NO_MORE_DOCS;
          }
        }

        @Override
        public int nextDoc() throws IOException {
          if (++idx < size)
            return docs[idx];
          return NO_MORE_DOCS;
        }

        @Override
        public int advance(int target) throws IOException {
          //for this small set this is likely faster vs. a binary search
          // into the sorted array
          return slowAdvance(target);
        }

        @Override
        public long cost() {
          return size;
        }
      };
    }

    @Override
    public long ramBytesUsed() {
      return RamUsageEstimator.alignObjectSize(
            RamUsageEstimator.NUM_BYTES_OBJECT_REF
          + Integer.BYTES)
          + intSet.ramBytesUsed();
    }

  }//class SmallDocSet

}