package org.apache.lucene.queryparser.spans;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.*;
import org.apache.lucene.search.spans.SpanBoostQuery;
import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.search.spans.SpanWeight;
import org.apache.lucene.search.spans.Spans;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.LuceneTestCase;
public class SQPTestBase extends LuceneTestCase {
static IndexReader reader;
static IndexSearcher searcher;
static Directory directory;
void countSpansDocs(AbstractSpanQueryParser p, String field, String s, int spanCount,
int docCount) throws Exception {
Query q = p.parse(s);
assertEquals("spanCount: " + s, spanCount, countSpans(field, q));
assertEquals("docCount: " + s, docCount, countDocs(field, q));
}
long countSpans(String field, Query q) throws Exception {
List<LeafReaderContext> ctxs = reader.leaves();
assert (ctxs.size() == 1);
LeafReaderContext leafReaderContext = ctxs.get(0);
SpanQuery sq = convert(field, q);
sq = (SpanQuery) sq.rewrite(reader);
float boost = getBoost(q);
SpanWeight sw = sq.createWeight(searcher, false, boost);
final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
long i = 0;
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
i++;
}
}
}
return i;
}
float getBoost(Query q) {
if (q instanceof BoostQuery) {
return ((BoostQuery)q).getBoost();
} else if (q instanceof SpanBoostQuery) {
return ((SpanBoostQuery)q).getBoost();
}
return 1.0f;
}
long countDocs(String field, Query q) throws Exception {
BitSet docs = new BitSet();
List<LeafReaderContext> ctxs = reader.leaves();
assert (ctxs.size() == 1);
LeafReaderContext leafReaderContext = ctxs.get(0);
SpanQuery sq = convert(field, q);
sq = (SpanQuery) sq.rewrite(reader);
SpanWeight sw = sq.createWeight(searcher, false, getBoost(q));
final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS);
if (spans != null) {
while (spans.nextDoc() != Spans.NO_MORE_DOCS) {
while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) {
docs.set(spans.docID());
}
}
}
long spanDocHits = docs.cardinality();
// double check with a regular searcher and original query
TotalHitCountCollector coll = new TotalHitCountCollector();
searcher.search(q, coll);
assertEquals(coll.getTotalHits(), spanDocHits);
return spanDocHits;
}
/**
* THIS IS AN UNHOLY ABOMINATION: this exists here and in LUCENE-5317 and
* in the highlighter package. Please, please forgive me.
*
* We need to factor this out into a standalone helper class until
* the difference between Query and SpanQueries disappears.
*
* Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter.
* Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this
* {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the
* initial Query. For example, the generated SpanQuery will not include
* clauses of type BooleanClause.Occur.MUST_NOT. Also, the
* {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query}
* might contain multiple fields.
* <p>
* Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that
* is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a
* {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query
* contains no terms in the requested "field" or the Query is a MatchAllDocsQuery.
* <p>
* Throws IllegalArgumentException if the Query is a class that is
* is not yet handled.
* <p>
* This class does not rewrite the SpanQuery before returning it.
* Clients are required to rewrite if necessary.
* <p>
* Much of this code is copied directly from
* oal.search.highlight.WeightedSpanTermExtractor. There are some subtle
* differences.
*
* @param field single field to extract SpanQueries for
* @param query query to convert
* @return SpanQuery for use in highlighting; can return empty SpanQuery
* @throws java.io.IOException for an underlying
* IOException in the IndexReader or an IllegalArgumentException if the query type is not recognized
*/
public SpanQuery convert(String field, Query query) throws IOException {
/*
* copied nearly verbatim from
* org.apache.lucene.search.highlight.WeightedSpanTermExtractor
* TODO:refactor to avoid duplication of code if possible.
* Beware: there are some subtle differences.
*/
if (query instanceof SpanQuery) {
SpanQuery sq = (SpanQuery) query;
if (sq.getField() != null && sq.getField().equals(field)) {
return (SpanQuery) query;
} else {
return getEmptySpanQuery();
}
} else if (query instanceof BooleanQuery) {
List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses();
List<SpanQuery> spanQs = new ArrayList<>();
for (int i = 0; i < queryClauses.size(); i++) {
if (!queryClauses.get(i).isProhibited()) {
tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs);
}
}
if (spanQs.size() == 0) {
return getEmptySpanQuery();
} else if (spanQs.size() == 1) {
return spanQs.get(0);
} else {
return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()]));
}
} else if (query instanceof PhraseQuery) {
PhraseQuery phraseQuery = ((PhraseQuery) query);
Term[] phraseQueryTerms = phraseQuery.getTerms();
if (phraseQueryTerms.length == 0) {
return getEmptySpanQuery();
} else if (!phraseQueryTerms[0].field().equals(field)) {
return getEmptySpanQuery();
}
SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length];
for (int i = 0; i < phraseQueryTerms.length; i++) {
clauses[i] = new SpanTermQuery(phraseQueryTerms[i]);
}
int slop = phraseQuery.getSlop();
int[] positions = phraseQuery.getPositions();
// sum position increments (>1) and add to slop
if (positions.length > 0) {
int lastPos = positions[0];
int sz = positions.length;
for (int i = 1; i < sz; i++) {
int pos = positions[i];
int inc = pos - lastPos-1;
slop += inc;
lastPos = pos;
}
}
boolean inorder = false;
if (phraseQuery.getSlop() == 0) {
inorder = true;
}
SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder);
if (query instanceof BoostQuery) {
return new SpanBoostQuery(sp, ((BoostQuery)query).getBoost());
} else {
return sp;
}
} else if (query instanceof TermQuery) {
TermQuery tq = (TermQuery) query;
if (tq.getTerm().field().equals(field)) {
return new SpanTermQuery(tq.getTerm());
} else {
return getEmptySpanQuery();
}
} else if (query instanceof ConstantScoreQuery) {
return convert(field, ((ConstantScoreQuery) query).getQuery());
} else if (query instanceof DisjunctionMaxQuery) {
List<SpanQuery> spanQs = new ArrayList<SpanQuery>();
for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator
.hasNext();) {
tryToAdd(field, convert(field, iterator.next()), spanQs);
}
if (spanQs.size() == 0) {
return getEmptySpanQuery();
} else if (spanQs.size() == 1) {
return spanQs.get(0);
} else {
return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()]));
}
} else if (query instanceof MatchAllDocsQuery) {
return getEmptySpanQuery();
} else if (query instanceof MultiPhraseQuery) {
final MultiPhraseQuery mpq = (MultiPhraseQuery) query;
final Term[][] termArrays = mpq.getTermArrays();
//test for empty or wrong field
if (termArrays.length == 0) {
return getEmptySpanQuery();
} else if (termArrays.length > 1) {
Term[] ts = termArrays[0];
if (ts.length > 0) {
Term t = ts[0];
if (!t.field().equals(field)) {
return getEmptySpanQuery();
}
}
}
final int[] positions = mpq.getPositions();
if (positions.length > 0) {
int maxPosition = positions[positions.length - 1];
for (int i = 0; i < positions.length - 1; ++i) {
if (positions[i] > maxPosition) {
maxPosition = positions[i];
}
}
@SuppressWarnings("unchecked")
final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1];
int distinctPositions = 0;
for (int i = 0; i < termArrays.length; ++i) {
final Term[] termArray = termArrays[i];
List<SpanQuery> disjuncts = disjunctLists[positions[i]];
if (disjuncts == null) {
disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>(
termArray.length));
++distinctPositions;
}
for (int j = 0; j < termArray.length; ++j) {
disjuncts.add(new SpanTermQuery(termArray[j]));
}
}
int positionGaps = 0;
int position = 0;
final SpanQuery[] clauses = new SpanQuery[distinctPositions];
for (int i = 0; i < disjunctLists.length; ++i) {
List<SpanQuery> disjuncts = disjunctLists[i];
if (disjuncts != null) {
if (disjuncts.size() == 1){
clauses[position++] = disjuncts.get(0);
} else {
clauses[position++] = new SpanOrQuery(
disjuncts.toArray(new SpanQuery[disjuncts.size()]));
}
} else {
++positionGaps;
}
}
final int slop = mpq.getSlop();
final boolean inorder = (slop == 0);
SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps,
inorder);
if (query instanceof BoostQuery) {
return new SpanBoostQuery(sp, ((BoostQuery)query).getBoost());
} else {
return sp;
}
}
} else if (query instanceof MultiTermQuery) {
return new SpanMultiTermQueryWrapper<>((MultiTermQuery)query);
} else if (query instanceof SynonymQuery) {
List<SpanQuery> clauses = new ArrayList<>();
for (Term term : ((SynonymQuery)query).getTerms()) {
clauses.add(new SpanTermQuery(term));
}
return new SpanOrQuery(clauses.toArray(new SpanQuery[clauses.size()]));
}
throw new IllegalArgumentException("Can't convert query of type: "+query.getClass());
}
private void tryToAdd(String field, SpanQuery q, List<SpanQuery> qs) {
if (q == null || isEmptyQuery(q) || !q.getField().equals(field)) {
return;
}
qs.add(q);
}
/**
*
* @return an empty SpanQuery (SpanOrQuery with no cluases)
*/
private SpanQuery getEmptySpanQuery() {
return new SpanOrQuery(new SpanTermQuery[0]);
}
/**
* Is this a null or empty SpanQuery
* @param q query to test
* @return whether a null or empty SpanQuery
*/
private boolean isEmptyQuery(SpanQuery q) {
if (q == null) {
return true;
}
if (q instanceof SpanOrQuery) {
SpanOrQuery soq = (SpanOrQuery)q;
for (SpanQuery sq : soq.getClauses()) {
if (! isEmptyQuery(sq)) {
return false;
}
}
return true;
}
return false;
}
}