GraphTermsCollector.java example

Explorer
lucene-solr-master
- lucene
- solr
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search.join;

import java.io.IOException;

import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;

/**
 * A graph hit collector.  This accumulates the edges for a given graph traversal.
 * On each collect method, the collector skips edge extraction for nodes that it has
 * already traversed.
 * @lucene.internal
 */
class GraphTermsCollector extends SimpleCollector implements Collector {
  
  // the field to collect edge ids from
  private String field;
  // all the collected terms
  private BytesRefHash collectorTerms;
  private SortedSetDocValues docTermOrds;
  // the result set that is being collected.
  private Bits currentResult;
  // known leaf nodes
  private DocSet leafNodes;
  // number of hits discovered at this level.
  int numHits=0;
  BitSet bits;
  final int maxDoc;
  int base;
  int baseInParent;
  // if we care to track this.
  boolean hasCycles = false;
  
  GraphTermsCollector(String field,int maxDoc, Bits currentResult, DocSet leafNodes) {
    this.field = field;
    this.maxDoc = maxDoc;
    this.collectorTerms =  new BytesRefHash();
    this.currentResult = currentResult;
    this.leafNodes = leafNodes;
    if (bits==null) {
      // create a bitset at the start that will hold the graph traversal result set 
      bits = new FixedBitSet(maxDoc);
    }
  }
  
  public void collect(int doc) throws IOException {    
    doc += base;
    if (currentResult.get(doc)) {
      // cycle detected / already been here.
      // knowing if your graph had a cycle might be useful and it's lightweight to implement here.
      hasCycles = true;
      return;
    }
    // collect the docs
    addDocToResult(doc);
    // Optimization to not look up edges for a document that is a leaf node
    if (!leafNodes.exists(doc)) {
      addEdgeIdsToResult(doc-base);
    } 
    // Note: tracking links in for each result would be a huge memory hog... so not implementing at this time.
    
  }
  
  private void addEdgeIdsToResult(int doc) throws IOException {
    // set the doc to pull the edges ids for.
    if (doc > docTermOrds.docID()) {
      docTermOrds.advance(doc);
    }
    if (doc == docTermOrds.docID()) {
      BytesRef edgeValue = new BytesRef();
      long ord;
      while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
        // TODO: handle non string type fields.
        edgeValue = docTermOrds.lookupOrd(ord);
        // add the edge id to the collector terms.
        collectorTerms.add(edgeValue);
      }
    }
  }
  
  private void addDocToResult(int docWithBase) {
    // this document is part of the traversal. mark it in our bitmap.
    bits.set(docWithBase);
    // increment the hit count so we know how many docs we traversed this time.
    numHits++;
  }
  
  public BitDocSet getDocSet() {
    if (bits == null) {
      // TODO: this shouldn't happen
      bits = new FixedBitSet(maxDoc);
    }
    return new BitDocSet((FixedBitSet)bits,numHits);
  }
  
  @Override
  public void doSetNextReader(LeafReaderContext context) throws IOException {
    // Grab the updated doc values.
    docTermOrds = DocValues.getSortedSet(context.reader(), field);
    base = context.docBase;
    baseInParent = context.docBaseInParent;
  }
  
  public BytesRefHash getCollectorTerms() {
    return collectorTerms;
  }
  
  @Override
  public boolean needsScores() {
    return false;
  }
  
}