package org.apache.lucene.queryparser.spans; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.BitSet; import java.util.Iterator; import java.util.List; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; import org.apache.lucene.search.spans.SpanBoostQuery; import org.apache.lucene.search.spans.SpanMultiTermQueryWrapper; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; import org.apache.lucene.search.spans.SpanWeight; import org.apache.lucene.search.spans.Spans; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; public class SQPTestBase extends LuceneTestCase { static IndexReader reader; static IndexSearcher searcher; static Directory directory; void countSpansDocs(AbstractSpanQueryParser p, String field, String s, int spanCount, int docCount) throws Exception { Query q = p.parse(s); assertEquals("spanCount: " + s, spanCount, countSpans(field, q)); assertEquals("docCount: " + s, docCount, countDocs(field, q)); } long countSpans(String field, Query q) throws Exception { List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q); sq = (SpanQuery) sq.rewrite(reader); float boost = getBoost(q); SpanWeight sw = sq.createWeight(searcher, false, boost); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); long i = 0; if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { i++; } } } return i; } float getBoost(Query q) { if (q instanceof BoostQuery) { return ((BoostQuery)q).getBoost(); } else if (q instanceof SpanBoostQuery) { return ((SpanBoostQuery)q).getBoost(); } return 1.0f; } long countDocs(String field, Query q) throws Exception { BitSet docs = new BitSet(); List<LeafReaderContext> ctxs = reader.leaves(); assert (ctxs.size() == 1); LeafReaderContext leafReaderContext = ctxs.get(0); SpanQuery sq = convert(field, q); sq = (SpanQuery) sq.rewrite(reader); SpanWeight sw = sq.createWeight(searcher, false, getBoost(q)); final Spans spans = sw.getSpans(leafReaderContext, SpanWeight.Postings.POSITIONS); if (spans != null) { while (spans.nextDoc() != Spans.NO_MORE_DOCS) { while (spans.nextStartPosition() != Spans.NO_MORE_POSITIONS) { docs.set(spans.docID()); } } } long spanDocHits = docs.cardinality(); // double check with a regular searcher and original query TotalHitCountCollector coll = new TotalHitCountCollector(); searcher.search(q, coll); assertEquals(coll.getTotalHits(), spanDocHits); return spanDocHits; } /** * THIS IS AN UNHOLY ABOMINATION: this exists here and in LUCENE-5317 and * in the highlighter package. Please, please forgive me. * * We need to factor this out into a standalone helper class until * the difference between Query and SpanQueries disappears. * * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter. * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the * initial Query. For example, the generated SpanQuery will not include * clauses of type BooleanClause.Occur.MUST_NOT. Also, the * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query} * might contain multiple fields. * <p> * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery. * <p> * Throws IllegalArgumentException if the Query is a class that is * is not yet handled. * <p> * This class does not rewrite the SpanQuery before returning it. * Clients are required to rewrite if necessary. * <p> * Much of this code is copied directly from * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle * differences. * * @param field single field to extract SpanQueries for * @param query query to convert * @return SpanQuery for use in highlighting; can return empty SpanQuery * @throws java.io.IOException for an underlying * IOException in the IndexReader or an IllegalArgumentException if the query type is not recognized */ public SpanQuery convert(String field, Query query) throws IOException { /* * copied nearly verbatim from * org.apache.lucene.search.highlight.WeightedSpanTermExtractor * TODO:refactor to avoid duplication of code if possible. * Beware: there are some subtle differences. */ if (query instanceof SpanQuery) { SpanQuery sq = (SpanQuery) query; if (sq.getField() != null && sq.getField().equals(field)) { return (SpanQuery) query; } else { return getEmptySpanQuery(); } } else if (query instanceof BooleanQuery) { List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses(); List<SpanQuery> spanQs = new ArrayList<>(); for (int i = 0; i < queryClauses.size(); i++) { if (!queryClauses.get(i).isProhibited()) { tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs); } } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 0) { return getEmptySpanQuery(); } else if (!phraseQueryTerms[0].field().equals(field)) { return getEmptySpanQuery(); } SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // sum position increments (>1) and add to slop if (positions.length > 0) { int lastPos = positions[0]; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos-1; slop += inc; lastPos = pos; } } boolean inorder = false; if (phraseQuery.getSlop() == 0) { inorder = true; } SpanNearQuery sp = new SpanNearQuery(clauses, slop, inorder); if (query instanceof BoostQuery) { return new SpanBoostQuery(sp, ((BoostQuery)query).getBoost()); } else { return sp; } } else if (query instanceof TermQuery) { TermQuery tq = (TermQuery) query; if (tq.getTerm().field().equals(field)) { return new SpanTermQuery(tq.getTerm()); } else { return getEmptySpanQuery(); } } else if (query instanceof ConstantScoreQuery) { return convert(field, ((ConstantScoreQuery) query).getQuery()); } else if (query instanceof DisjunctionMaxQuery) { List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator .hasNext();) { tryToAdd(field, convert(field, iterator.next()), spanQs); } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } else if (query instanceof MatchAllDocsQuery) { return getEmptySpanQuery(); } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final Term[][] termArrays = mpq.getTermArrays(); //test for empty or wrong field if (termArrays.length == 0) { return getEmptySpanQuery(); } else if (termArrays.length > 1) { Term[] ts = termArrays[0]; if (ts.length > 0) { Term t = ts[0]; if (!t.field().equals(field)) { return getEmptySpanQuery(); } } } final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.length; ++i) { final Term[] termArray = termArrays[i]; List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>( termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { if (disjuncts.size() == 1){ clauses[position++] = disjuncts.get(0); } else { clauses[position++] = new SpanOrQuery( disjuncts.toArray(new SpanQuery[disjuncts.size()])); } } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); if (query instanceof BoostQuery) { return new SpanBoostQuery(sp, ((BoostQuery)query).getBoost()); } else { return sp; } } } else if (query instanceof MultiTermQuery) { return new SpanMultiTermQueryWrapper<>((MultiTermQuery)query); } else if (query instanceof SynonymQuery) { List<SpanQuery> clauses = new ArrayList<>(); for (Term term : ((SynonymQuery)query).getTerms()) { clauses.add(new SpanTermQuery(term)); } return new SpanOrQuery(clauses.toArray(new SpanQuery[clauses.size()])); } throw new IllegalArgumentException("Can't convert query of type: "+query.getClass()); } private void tryToAdd(String field, SpanQuery q, List<SpanQuery> qs) { if (q == null || isEmptyQuery(q) || !q.getField().equals(field)) { return; } qs.add(q); } /** * * @return an empty SpanQuery (SpanOrQuery with no cluases) */ private SpanQuery getEmptySpanQuery() { return new SpanOrQuery(new SpanTermQuery[0]); } /** * Is this a null or empty SpanQuery * @param q query to test * @return whether a null or empty SpanQuery */ private boolean isEmptyQuery(SpanQuery q) { if (q == null) { return true; } if (q instanceof SpanOrQuery) { SpanOrQuery soq = (SpanOrQuery)q; for (SpanQuery sq : soq.getClauses()) { if (! isEmptyQuery(sq)) { return false; } } return true; } return false; } }