/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search.join;
import java.io.IOException;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.BitSet;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;
import org.apache.solr.search.BitDocSet;
import org.apache.solr.search.DocSet;
/**
* A graph hit collector. This accumulates the edges for a given graph traversal.
* On each collect method, the collector skips edge extraction for nodes that it has
* already traversed.
* @lucene.internal
*/
class GraphTermsCollector extends SimpleCollector implements Collector {
// the field to collect edge ids from
private String field;
// all the collected terms
private BytesRefHash collectorTerms;
private SortedSetDocValues docTermOrds;
// the result set that is being collected.
private Bits currentResult;
// known leaf nodes
private DocSet leafNodes;
// number of hits discovered at this level.
int numHits=0;
BitSet bits;
final int maxDoc;
int base;
int baseInParent;
// if we care to track this.
boolean hasCycles = false;
GraphTermsCollector(String field,int maxDoc, Bits currentResult, DocSet leafNodes) {
this.field = field;
this.maxDoc = maxDoc;
this.collectorTerms = new BytesRefHash();
this.currentResult = currentResult;
this.leafNodes = leafNodes;
if (bits==null) {
// create a bitset at the start that will hold the graph traversal result set
bits = new FixedBitSet(maxDoc);
}
}
public void collect(int doc) throws IOException {
doc += base;
if (currentResult.get(doc)) {
// cycle detected / already been here.
// knowing if your graph had a cycle might be useful and it's lightweight to implement here.
hasCycles = true;
return;
}
// collect the docs
addDocToResult(doc);
// Optimization to not look up edges for a document that is a leaf node
if (!leafNodes.exists(doc)) {
addEdgeIdsToResult(doc-base);
}
// Note: tracking links in for each result would be a huge memory hog... so not implementing at this time.
}
private void addEdgeIdsToResult(int doc) throws IOException {
// set the doc to pull the edges ids for.
if (doc > docTermOrds.docID()) {
docTermOrds.advance(doc);
}
if (doc == docTermOrds.docID()) {
BytesRef edgeValue = new BytesRef();
long ord;
while ((ord = docTermOrds.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) {
// TODO: handle non string type fields.
edgeValue = docTermOrds.lookupOrd(ord);
// add the edge id to the collector terms.
collectorTerms.add(edgeValue);
}
}
}
private void addDocToResult(int docWithBase) {
// this document is part of the traversal. mark it in our bitmap.
bits.set(docWithBase);
// increment the hit count so we know how many docs we traversed this time.
numHits++;
}
public BitDocSet getDocSet() {
if (bits == null) {
// TODO: this shouldn't happen
bits = new FixedBitSet(maxDoc);
}
return new BitDocSet((FixedBitSet)bits,numHits);
}
@Override
public void doSetNextReader(LeafReaderContext context) throws IOException {
// Grab the updated doc values.
docTermOrds = DocValues.getSortedSet(context.reader(), field);
base = context.docBase;
baseInParent = context.docBaseInParent;
}
public BytesRefHash getCollectorTerms() {
return collectorTerms;
}
@Override
public boolean needsScores() {
return false;
}
}