TermsIncludingScoreQuery.java example

Explorer
solr-analytics-master
- lucene
- solr
package org.apache.lucene.search.join;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.ComplexExplanation;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Explanation;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.FixedBitSet;

import java.io.IOException;
import java.util.Locale;
import java.util.Set;

class TermsIncludingScoreQuery extends Query {

  final String field;
  final boolean multipleValuesPerDocument;
  final BytesRefHash terms;
  final float[] scores;
  final int[] ords;
  final Query originalQuery;
  final Query unwrittenOriginalQuery;

  TermsIncludingScoreQuery(String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, Query originalQuery) {
    this.field = field;
    this.multipleValuesPerDocument = multipleValuesPerDocument;
    this.terms = terms;
    this.scores = scores;
    this.originalQuery = originalQuery;
    this.ords = terms.sort(BytesRef.getUTF8SortedAsUnicodeComparator());
    this.unwrittenOriginalQuery = originalQuery;
  }

  private TermsIncludingScoreQuery(String field, boolean multipleValuesPerDocument, BytesRefHash terms, float[] scores, int[] ords, Query originalQuery, Query unwrittenOriginalQuery) {
    this.field = field;
    this.multipleValuesPerDocument = multipleValuesPerDocument;
    this.terms = terms;
    this.scores = scores;
    this.originalQuery = originalQuery;
    this.ords = ords;
    this.unwrittenOriginalQuery = unwrittenOriginalQuery;
  }

  public String toString(String string) {
    return String.format(Locale.ROOT, "TermsIncludingScoreQuery{field=%s;originalQuery=%s}", field, unwrittenOriginalQuery);
  }

  @Override
  public void extractTerms(Set<Term> terms) {
    originalQuery.extractTerms(terms);
  }

  @Override
  public Query rewrite(IndexReader reader) throws IOException {
    final Query originalQueryRewrite = originalQuery.rewrite(reader);
    if (originalQueryRewrite != originalQuery) {
      Query rewritten = new TermsIncludingScoreQuery(field, multipleValuesPerDocument, terms, scores,
          ords, originalQueryRewrite, originalQuery);
      rewritten.setBoost(getBoost());
      return rewritten;
    } else {
      return this;
    }
  }

  @Override
  public Weight createWeight(IndexSearcher searcher) throws IOException {
    final Weight originalWeight = originalQuery.createWeight(searcher);
    return new Weight() {

      private TermsEnum segmentTermsEnum;

      public Explanation explain(AtomicReaderContext context, int doc) throws IOException {
        SVInnerScorer scorer = (SVInnerScorer) scorer(context, false, false, context.reader().getLiveDocs());
        if (scorer != null) {
          if (scorer.advanceForExplainOnly(doc) == doc) {
            return scorer.explain();
          }
        }
        return new ComplexExplanation(false, 0.0f, "Not a match");
      }

      public Query getQuery() {
        return TermsIncludingScoreQuery.this;
      }

      public float getValueForNormalization() throws IOException {
        return originalWeight.getValueForNormalization() * TermsIncludingScoreQuery.this.getBoost() * TermsIncludingScoreQuery.this.getBoost();
      }

      public void normalize(float norm, float topLevelBoost) {
        originalWeight.normalize(norm, topLevelBoost * TermsIncludingScoreQuery.this.getBoost());
      }

      public Scorer scorer(AtomicReaderContext context, boolean scoreDocsInOrder, boolean topScorer, Bits acceptDocs) throws IOException {
        Terms terms = context.reader().terms(field);
        if (terms == null) {
          return null;
        }

        segmentTermsEnum = terms.iterator(segmentTermsEnum);
        if (scoreDocsInOrder) {
          if (multipleValuesPerDocument) {
            return new MVInOrderScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
          } else {
            return new SVInOrderScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
          }
        } else if (multipleValuesPerDocument) {
          return new MVInnerScorer(this, acceptDocs, segmentTermsEnum, context.reader().maxDoc());
        } else {
          return new SVInnerScorer(this, acceptDocs, segmentTermsEnum);
        }
      }
    };
  }

  // This impl assumes that the 'join' values are used uniquely per doc per field. Used for one to many relations.
  class SVInnerScorer extends Scorer {

    final BytesRef spare = new BytesRef();
    final Bits acceptDocs;
    final TermsEnum termsEnum;

    int upto;
    DocsEnum docsEnum;
    DocsEnum reuse;
    int scoreUpto;

    SVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum) {
      super(weight);
      this.acceptDocs = acceptDocs;
      this.termsEnum = termsEnum;
    }

    public float score() throws IOException {
      return scores[ords[scoreUpto]];
    }

    public Explanation explain() throws IOException {
      return new ComplexExplanation(true, score(), "Score based on join value " + termsEnum.term().utf8ToString());
    }

    public int docID() {
      return docsEnum != null ? docsEnum.docID() : DocIdSetIterator.NO_MORE_DOCS;
    }

    public int nextDoc() throws IOException {
      if (docsEnum != null) {
        int docId = docsEnum.nextDoc();
        if (docId == DocIdSetIterator.NO_MORE_DOCS) {
          docsEnum = null;
        } else {
          return docId;
        }
      }

      do {
        if (upto == terms.size()) {
          return DocIdSetIterator.NO_MORE_DOCS;
        }

        scoreUpto = upto;
        if (termsEnum.seekExact(terms.get(ords[upto++], spare), true)) {
          docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, 0);
        }
      } while (docsEnum == null);

      return docsEnum.nextDoc();
    }

    public int advance(int target) throws IOException {
      throw new UnsupportedOperationException("advance() isn't supported because doc ids are emitted out of order");
    }

    private int advanceForExplainOnly(int target) throws IOException {
      int docId;
      do {
        docId = nextDoc();
        if (docId < target) {
          int tempDocId = docsEnum.advance(target);
          if (tempDocId == target) {
            docId = tempDocId;
            break;
          }
        } else if (docId == target) {
          break;
        }
        docsEnum = null; // goto the next ord.
      } while (docId != DocIdSetIterator.NO_MORE_DOCS);
      return docId;
    }

    @Override
    public float freq() {
      return 1;
    }
  }

  // This impl that tracks whether a docid has already been emitted. This check makes sure that docs aren't emitted
  // twice for different join values. This means that the first encountered join value determines the score of a document
  // even if other join values yield a higher score.
  class MVInnerScorer extends SVInnerScorer {

    final FixedBitSet alreadyEmittedDocs;

    MVInnerScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) {
      super(weight, acceptDocs, termsEnum);
      alreadyEmittedDocs = new FixedBitSet(maxDoc);
    }

    public int nextDoc() throws IOException {
      if (docsEnum != null) {
        int docId;
        do {
          docId = docsEnum.nextDoc();
          if (docId == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }
        } while (alreadyEmittedDocs.get(docId));
        if (docId == DocIdSetIterator.NO_MORE_DOCS) {
          docsEnum = null;
        } else {
          alreadyEmittedDocs.set(docId);
          return docId;
        }
      }

      for (;;) {
        do {
          if (upto == terms.size()) {
            return DocIdSetIterator.NO_MORE_DOCS;
          }

          scoreUpto = upto;
          if (termsEnum.seekExact(terms.get(ords[upto++], spare), true)) {
            docsEnum = reuse = termsEnum.docs(acceptDocs, reuse, 0);
          }
        } while (docsEnum == null);

        int docId;
        do {
          docId = docsEnum.nextDoc();
          if (docId == DocIdSetIterator.NO_MORE_DOCS) {
            break;
          }
        } while (alreadyEmittedDocs.get(docId));
        if (docId == DocIdSetIterator.NO_MORE_DOCS) {
          docsEnum = null;
        } else {
          alreadyEmittedDocs.set(docId);
          return docId;
        }
      }
    }
  }

  class SVInOrderScorer extends Scorer {

    final DocIdSetIterator matchingDocsIterator;
    final float[] scores;

    int currentDoc = -1;

    SVInOrderScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) throws IOException {
      super(weight);
      FixedBitSet matchingDocs = new FixedBitSet(maxDoc);
      this.scores = new float[maxDoc];
      fillDocsAndScores(matchingDocs, acceptDocs, termsEnum);
      this.matchingDocsIterator = matchingDocs.iterator();
    }

    protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
      BytesRef spare = new BytesRef();
      DocsEnum docsEnum = null;
      for (int i = 0; i < terms.size(); i++) {
        if (termsEnum.seekExact(terms.get(ords[i], spare), true)) {
          docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0);
          float score = TermsIncludingScoreQuery.this.scores[ords[i]];
          for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
            matchingDocs.set(doc);
            // In the case the same doc is also related to a another doc, a score might be overwritten. I think this
            // can only happen in a many-to-many relation
            scores[doc] = score;
          }
        }
      }
    }

    public float score() throws IOException {
      return scores[currentDoc];
    }

    public float freq() throws IOException {
      return 1;
    }

    public int docID() {
      return currentDoc;
    }

    public int nextDoc() throws IOException {
      return currentDoc = matchingDocsIterator.nextDoc();
    }

    public int advance(int target) throws IOException {
      return currentDoc = matchingDocsIterator.advance(target);
    }
  }

  // This scorer deals with the fact that a document can have more than one score from multiple related documents.
  class MVInOrderScorer extends SVInOrderScorer {

    MVInOrderScorer(Weight weight, Bits acceptDocs, TermsEnum termsEnum, int maxDoc) throws IOException {
      super(weight, acceptDocs, termsEnum, maxDoc);
    }

    @Override
    protected void fillDocsAndScores(FixedBitSet matchingDocs, Bits acceptDocs, TermsEnum termsEnum) throws IOException {
      BytesRef spare = new BytesRef();
      DocsEnum docsEnum = null;
      for (int i = 0; i < terms.size(); i++) {
        if (termsEnum.seekExact(terms.get(ords[i], spare), true)) {
          docsEnum = termsEnum.docs(acceptDocs, docsEnum, 0);
          float score = TermsIncludingScoreQuery.this.scores[ords[i]];
          for (int doc = docsEnum.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docsEnum.nextDoc()) {
            // I prefer this:
            /*if (scores[doc] < score) {
              scores[doc] = score;
              matchingDocs.set(doc);
            }*/
            // But this behaves the same as MVInnerScorer and only then the tests will pass:
            if (!matchingDocs.get(doc)) {
              scores[doc] = score;
              matchingDocs.set(doc);
            }
          }
        }
      }
    }
  }

}