package org.apache.lucene.search.concordance.charoffsets; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; public class SpansCrawler { public static void crawl(SpanQuery query, Query filter, IndexSearcher searcher, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { float boost = (query instanceof SpanBoostQuery) ? ((SpanBoostQuery)query).getBoost() : 1.0f; SpanWeight w = query.createWeight(searcher, false, boost); if (filter == null) { for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; } boolean cont = visitLeafReader(ctx, spans, visitor); if (!cont) { break; } } } else { filter = searcher.rewrite(filter); Weight searcherWeight = searcher.createWeight(filter, false, boost); for (LeafReaderContext ctx : searcher.getIndexReader().leaves()) { Scorer leafReaderContextScorer = searcherWeight.scorer(ctx); if (leafReaderContextScorer == null) { continue; } Spans spans = w.getSpans(ctx, SpanWeight.Postings.POSITIONS); if (spans == null) { continue; } DocIdSetIterator filterItr = leafReaderContextScorer.iterator(); if (filterItr == null || filterItr.equals(DocIdSetIterator.empty())) { continue; } boolean cont = visitLeafReader(ctx, spans, filterItr, visitor); if (!cont) { break; } } } } private static boolean visitLeafReader(LeafReaderContext leafCtx, Spans spans, DocIdSetIterator filterItr, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { int filterDoc = -1; int spansDoc = spans.nextDoc(); while (true) { if (spansDoc == DocIdSetIterator.NO_MORE_DOCS) { break; } filterDoc = filterItr.advance(spansDoc); if (filterDoc == DocIdSetIterator.NO_MORE_DOCS) { break; } else if (filterDoc > spansDoc) { while (spansDoc <= filterDoc) { spansDoc = spans.nextDoc(); if (spansDoc == filterDoc) { boolean cont = visit(leafCtx, spans, visitor); if (! cont) { return false; } } else { } } } else if (filterDoc == spansDoc) { boolean cont = visit(leafCtx, spans, visitor); if (! cont) { return false; } //then iterate spans spansDoc = spans.nextDoc(); } else if (filterDoc < spansDoc) { throw new IllegalArgumentException("FILTER doc is < spansdoc!!!"); } else { throw new IllegalArgumentException("Something horrible happened"); } } return true; } private static boolean visitLeafReader(LeafReaderContext leafCtx, Spans spans, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { while (spans.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { boolean cont = visit(leafCtx, spans, visitor); if (! cont) { return false; } } return true; } private static boolean visit(LeafReaderContext leafCtx, Spans spans, DocTokenOffsetsVisitor visitor) throws IOException, TargetTokenNotFoundException { Document document = leafCtx.reader().document(spans.docID(), visitor.getFields()); DocTokenOffsets offsets = visitor.getDocTokenOffsets(); offsets.reset(leafCtx.docBase, spans.docID(), document); while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { offsets.addOffset(spans.startPosition(), spans.endPosition()); } return visitor.visit(offsets); } }