package org.apache.lucene.search.spans; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.IOException; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import org.apache.lucene.index.Term; import org.apache.lucene.search.*; public class SimpleSpanQueryConverter { /** * Converts a regular query to a {@link org.apache.lucene.search.spans.SpanQuery} for use in a highlighter. * Because of subtle differences in {@link org.apache.lucene.search.spans.SpanQuery} and {@link org.apache.lucene.search.Query}, this * {@link org.apache.lucene.search.spans.SpanQuery} will not necessarily return the same documents as the * initial Query. For example, the generated SpanQuery will not include * clauses of type BooleanClause.Occur.MUST_NOT. Also, the * {@link org.apache.lucene.search.spans.SpanQuery} will only cover a single field, whereas the {@link org.apache.lucene.search.Query} * might contain multiple fields. * <p> * Returns an empty SpanQuery if the {@link org.apache.lucene.search.Query} is a class that * is handled, but for some reason can't be converted from a {@link org.apache.lucene.search.Query} to a * {@link org.apache.lucene.search.spans.SpanQuery}. This can happen for many reasons: e.g. if the Query * contains no terms in the requested "field" or the Query is a MatchAllDocsQuery. * <p> * Throws IllegalArgumentException if the Query is a class that is * is not yet handled. * <p> * This class does not rewrite the SpanQuery before returning it. * Clients are required to rewrite if necessary. * <p> * Much of this code is copied directly from * oal.search.highlight.WeightedSpanTermExtractor. There are some subtle * differences. * <p> * Throws IllegalArgumentException if an unknown query type is passed in. * * @param field single field to extract SpanQueries for * @param queryToConvert query to convert * @return SpanQuery for use in highlighting; can return empty SpanQuery * @throws java.io.IOException if encountered during parse */ public SpanQuery convert(String field, Query queryToConvert) throws IOException { Float boost = null; Query query = queryToConvert; if (queryToConvert instanceof BoostQuery) { query = ((BoostQuery)query).getQuery(); boost = ((BoostQuery)query).getBoost(); } /* * copied nearly verbatim from * org.apache.lucene.search.highlight.WeightedSpanTermExtractor * TODO:refactor to avoid duplication of code if possible. * Beware: there are some subtle differences. */ if (query instanceof SpanQuery) { SpanQuery sq = (SpanQuery) query; if (sq.getField().equals(field)) { return (SpanQuery) query; } else { return getEmptySpanQuery(); } } else if (query instanceof BooleanQuery) { List<BooleanClause> queryClauses = ((BooleanQuery) query).clauses(); List<SpanQuery> spanQs = new ArrayList<SpanQuery>(); for (int i = 0; i < queryClauses.size(); i++) { if (!queryClauses.get(i).isProhibited()) { tryToAdd(field, convert(field, queryClauses.get(i).getQuery()), spanQs); } } return addBoost(buildSpanOr(spanQs), boost); } else if (query instanceof PhraseQuery) { PhraseQuery phraseQuery = ((PhraseQuery) query); Term[] phraseQueryTerms = phraseQuery.getTerms(); if (phraseQueryTerms.length == 0) { return getEmptySpanQuery(); } else if (!phraseQueryTerms[0].field().equals(field)) { return getEmptySpanQuery(); } SpanQuery[] clauses = new SpanQuery[phraseQueryTerms.length]; for (int i = 0; i < phraseQueryTerms.length; i++) { clauses[i] = new SpanTermQuery(phraseQueryTerms[i]); } int slop = phraseQuery.getSlop(); int[] positions = phraseQuery.getPositions(); // sum position increments (>1) and add to slop if (positions.length > 0) { int lastPos = positions[0]; int sz = positions.length; for (int i = 1; i < sz; i++) { int pos = positions[i]; int inc = pos - lastPos - 1; slop += inc; lastPos = pos; } } boolean inorder = false; if (phraseQuery.getSlop() == 0) { inorder = true; } SpanQuery sp = new SpanNearQuery(clauses, slop, inorder); return addBoost(sp, boost); } else if (query instanceof TermQuery) { TermQuery tq = (TermQuery) query; if (tq.getTerm().field().equals(field)) { return addBoost(new SpanTermQuery(tq.getTerm()), boost); } else { return getEmptySpanQuery(); } } else if (query instanceof ConstantScoreQuery) { return convert(field, ((ConstantScoreQuery) query).getQuery()); } else if (query instanceof DisjunctionMaxQuery) { List<SpanQuery> spanQs = new ArrayList<>(); for (Iterator<Query> iterator = ((DisjunctionMaxQuery) query).iterator(); iterator .hasNext(); ) { tryToAdd(field, convert(field, iterator.next()), spanQs); } if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return addBoost(spanQs.get(0), boost); } else { return addBoost(new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])), boost); } } else if (query instanceof MatchAllDocsQuery) { return getEmptySpanQuery(); } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; final Term[][] termArrays = mpq.getTermArrays(); //test for empty or wrong field if (termArrays.length == 0) { return getEmptySpanQuery(); } else if (termArrays.length > 1) { Term[] ts = termArrays[0]; if (ts.length > 0) { Term t = ts[0]; if (!t.field().equals(field)) { return getEmptySpanQuery(); } } } final int[] positions = mpq.getPositions(); if (positions.length > 0) { int maxPosition = positions[positions.length - 1]; for (int i = 0; i < positions.length - 1; ++i) { if (positions[i] > maxPosition) { maxPosition = positions[i]; } } @SuppressWarnings("unchecked") final List<SpanQuery>[] disjunctLists = new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.length; ++i) { final Term[] termArray = termArrays[i]; List<SpanQuery> disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { disjuncts = (disjunctLists[positions[i]] = new ArrayList<SpanQuery>( termArray.length)); ++distinctPositions; } for (int j = 0; j < termArray.length; ++j) { disjuncts.add(new SpanTermQuery(termArray[j])); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { List<SpanQuery> disjuncts = disjunctLists[i]; if (disjuncts != null) { if (disjuncts.size() == 1) { clauses[position++] = disjuncts.get(0); } else { clauses[position++] = new SpanOrQuery( disjuncts.toArray(new SpanQuery[disjuncts.size()])); } } else { ++positionGaps; } } final int slop = mpq.getSlop(); final boolean inorder = (slop == 0); SpanNearQuery sp = new SpanNearQuery(clauses, slop + positionGaps, inorder); return addBoost(sp, boost); } } else if (query instanceof MultiTermQuery) { MultiTermQuery tq = (MultiTermQuery) query; if (! tq.getField().equals(field)) { return getEmptySpanQuery(); } return addBoost( new SpanMultiTermQueryWrapper<>((MultiTermQuery) query), boost); } else if (query instanceof SynonymQuery) { SynonymQuery sq = (SynonymQuery)query; List<SpanQuery> spanQs = new ArrayList<>(); for (Term t : sq.getTerms()) { spanQs.add(new SpanTermQuery(t)); } return addBoost(buildSpanOr(spanQs), boost); } return convertUnknownQuery(field, queryToConvert); } private SpanQuery buildSpanOr(List<SpanQuery> spanQs) { if (spanQs.size() == 0) { return getEmptySpanQuery(); } else if (spanQs.size() == 1) { return spanQs.get(0); } else { return new SpanOrQuery(spanQs.toArray(new SpanQuery[spanQs.size()])); } } private SpanQuery addBoost(SpanQuery sq, Float boost) { if (boost == null) { return sq; } return new SpanBoostQuery(sq, boost); } private void tryToAdd(String field, SpanQuery q, List<SpanQuery> qs) { if (q == null || isEmptyQuery(q) || !q.getField().equals(field)) { return; } qs.add(q); } /** * Extend this to handle queries that are not currently handled. * Might consider extending SpanQueryConverter in the queries compilation unit; * that includes CommonTermsQuery. * <p> * In this class, this always throws an IllegalArgumentException * * @param field field to convert * @param query query to convert * @return nothing. Throws IllegalArgumentException */ protected SpanQuery convertUnknownQuery(String field, Query query) { throw new IllegalArgumentException("SpanQueryConverter is unable to convert this class " + query.getClass().toString()); } /** * @return an empty SpanQuery (SpanOrQuery with no cluases) */ protected SpanQuery getEmptySpanQuery() { return new SpanOrQuery(new SpanTermQuery[0]); } /** * Is this a null or empty SpanQuery * * @param q query to test * @return whether a null or empty SpanQuery */ private boolean isEmptyQuery(SpanQuery q) { if (q == null) { return true; } if (q instanceof SpanOrQuery) { SpanOrQuery soq = (SpanOrQuery) q; for (SpanQuery sq : soq.getClauses()) { if (!isEmptyQuery(sq)) { return false; } } return true; } return false; } }