package org.apache.lucene.search; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import org.apache.lucene.search.BooleanClause.Occur; /** Expert: Scoring functionality for phrase queries. * <br>A document is considered matching if it contains the phrase-query terms * at "valid" positions. What "valid positions" are * depends on the type of the phrase query: for an exact phrase query terms are required * to appear in adjacent locations, while for a sloppy phrase query some distance between * the terms is allowed. The abstract method {@link #phraseFreq()} of extending classes * is invoked for each document containing all the phrase query terms, in order to * compute the frequency of the phrase query in that document. A non zero frequency * means a match. */ abstract class PhraseScorer extends Scorer { protected byte[] norms; protected float value; private boolean firstTime = true; private boolean more = true; protected PhraseQueue pq; protected PhrasePositions first, last; private float freq; //phrase frequency in current doc as computed by phraseFreq(). PhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, Similarity similarity, byte[] norms) { super(similarity, weight); this.norms = norms; this.value = weight.getValue(); // convert tps to a list of phrase positions. // note: phrase-position differs from term-position in that its position // reflects the phrase offset: pp.pos = tp.pos - offset. // this allows to easily identify a matching (exact) phrase // when all PhrasePositions have exactly the same position. for (int i = 0; i < postings.length; i++) { PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position); if (last != null) { // add next to end of list last.next = pp; } else { first = pp; } last = pp; } pq = new PhraseQueue(postings.length); // construct empty pq first.doc = -1; } @Override public int docID() { return first.doc; } @Override public int nextDoc() throws IOException { if (firstTime) { init(); firstTime = false; } else if (more) { more = last.next(); // trigger further scanning } if (!doNext()) { first.doc = NO_MORE_DOCS; } return first.doc; } // next without initial increment private boolean doNext() throws IOException { while (more) { while (more && first.doc < last.doc) { // find doc w/ all the terms more = first.skipTo(last.doc); // skip first upto last firstToLast(); // and move it to the end } if (more) { // found a doc with all of the terms freq = phraseFreq(); // check for phrase if (freq == 0.0f) // no match more = last.next(); // trigger further scanning else return true; // found a match } } return false; // no more matches } @Override public float score() throws IOException { //System.out.println("scoring " + first.doc); float raw = getSimilarity().tf(freq) * value; // raw score return norms == null ? raw : raw * getSimilarity().decodeNormValue(norms[first.doc]); // normalize } @Override public int advance(int target) throws IOException { firstTime = false; for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.skipTo(target); } if (more) { sort(); // re-sort } if (!doNext()) { first.doc = NO_MORE_DOCS; } return first.doc; } /** * phrase frequency in current doc as computed by phraseFreq(). */ @Override public final float freq() { return freq; } /** * For a document containing all the phrase query terms, compute the * frequency of the phrase in that document. * A non zero frequency means a match. * <br>Note, that containing all phrase terms does not guarantee a match - they have to be found in matching locations. * @return frequency of the phrase in current doc, 0 if not found. */ protected abstract float phraseFreq() throws IOException; private void init() throws IOException { for (PhrasePositions pp = first; more && pp != null; pp = pp.next) { more = pp.next(); } if (more) { sort(); } } private void sort() { pq.clear(); for (PhrasePositions pp = first; pp != null; pp = pp.next) { pq.add(pp); } pqToList(); } protected final void pqToList() { last = first = null; while (pq.top() != null) { PhrasePositions pp = pq.pop(); if (last != null) { // add next to end of list last.next = pp; } else first = pp; last = pp; pp.next = null; } } protected final void firstToLast() { last.next = first; // move first to end of list last = first; first = first.next; last.next = null; } @Override public String toString() { return "scorer(" + weight + ")"; } }