package org.apache.lucene.bigram; /** * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.HashSet; import java.util.Iterator; import java.util.Set; import java.util.Vector; import org.apache.lucene.chunk.SpanChunkedNotQuery; import org.apache.lucene.chunk.SpanDechunkingQuery; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.QueryRewriter; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanNotNearQuery; import org.apache.lucene.search.spans.SpanOrNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; /** * Rewrites a query to eliminate stop words by combining them with * adjacent non-stop-words, forming "bi-grams" (or bi-grams with 2 words). * This is a fairly in-depth process, as bi-gramming across NEAR and OR * queries is complex. */ public class BigramQueryRewriter extends QueryRewriter { /** Set of stop-words (e.g. "the", "a", "and", etc.) to remove */ protected Set stopSet; /** Maximum slop to allow in a query, based on the index being queried */ protected int maxSlop; /** Keeps track of all stop-words removed from the query */ protected HashSet removedTerms = new HashSet(); /** * Constructs a rewriter using the given stopword set. * * @param stopSet Set of stopwords to remove or bi-gram. This can be * constructed easily by calling * {@link #makeStopSet(String)}. * @param maxSlop Maximum slop to allow in a query, based on the index * being queried. */ public BigramQueryRewriter(Set stopSet, int maxSlop) { this.stopSet = stopSet; this.maxSlop = maxSlop; } // constructor /** * Make a stop set given a space, comma, or semicolon delimited list of * stop words. * * @param stopWords String of words to make into a set * * @return A stop word set suitable for use when constructing * an {@link BigramQueryRewriter}. */ public static Set makeStopSet(String stopWords) { return BigramStopFilter.makeStopSet(stopWords); } // makeStopSet() /** * Determines if the given string is an bi-gram of a real word with a * stop-word. * * @param stopWords The set of stop-words * @param str The string to check * @return true if it's an bi-gram */ public static boolean isBigram(Set stopWords, String str) { int pos = str.indexOf('~'); // A tilde tells us if it's a bigram. if (pos < 0) return false; // Let's do some sanity checking if (pos < 0 || pos >= str.length() - 1) return false; String before = str.substring(0, pos); String after = str.substring(pos + 1); // Sometimes the original token contains a bi-gram. Don't barf on it. if (!stopWords.contains(before) && !stopWords.contains(after)) // This wasn't correctly inverted before! return false; // It's a bi-gram. return true; } // isBigram() /** * Rewrite a BooleanQuery. Prohibited or allowed (not required) clauses * that are single stop words will be removed. Required clauses will not * have bi-gramming applied. * * @param bq The query to rewrite * @return Rewritten version, or 'bq' unchanged if no changed needed. */ protected Query rewrite(BooleanQuery bq) { // Classify all the clauses as required, prohibited, or just allowed. // Rewrite them along the way. // Vector required = new Vector(); Vector prohibited = new Vector(); Vector allowed = new Vector(); // Process each clause in turn BooleanClause[] clauses = bq.getClauses(); boolean anyChange = false; for (int i = 0; i < clauses.length; i++) { // Single stop words must be removed. Make sure to add them to the // removed list so the user will be notified. // if (stopSet.contains(extractTermText(clauses[i].getQuery()))) { removedTerms.add(extractTermText(clauses[i].getQuery())); anyChange = true; continue; } // Rewrite the clause and/or its descendants Query rewrittenQuery = rewriteQuery(clauses[i].getQuery()); if (rewrittenQuery != clauses[i].getQuery()) anyChange = true; // And add it to the appropriate vector. if (rewrittenQuery == null) continue; else if (clauses[i].getOccur() == BooleanClause.Occur.MUST_NOT) prohibited.add(rewrittenQuery); else if (clauses[i].getOccur() == BooleanClause.Occur.MUST) required.add(rewrittenQuery); else allowed.add(rewrittenQuery); } // for i // NOTE: // Do NOT bi-gram the required clauses, because they don't have any real // order, and besides, they might be from entirely different fields. // If no changes were needed, return the original query unchanged. if (!anyChange) return bq; // If we ended up with nothing, let the caller know. if (required.isEmpty() && prohibited.isEmpty() && allowed.isEmpty()) return null; // If we ended up with a single required clause and no other clauses, return // just that. // if (required.size() == 1 && prohibited.isEmpty() && allowed.isEmpty()) return combineBoost(bq, (Query)required.elementAt(0)); // Otherwise, we need to construct a new one. bq = (BooleanQuery)copyBoost(bq, new BooleanQuery(bq.isCoordDisabled())); for (Iterator iter = required.iterator(); iter.hasNext();) bq.add((Query)iter.next(), BooleanClause.Occur.MUST); for (Iterator iter = prohibited.iterator(); iter.hasNext();) bq.add((Query)iter.next(), BooleanClause.Occur.MUST_NOT); for (Iterator iter = allowed.iterator(); iter.hasNext();) bq.add((Query)iter.next(), BooleanClause.Occur.SHOULD); return bq; } // rewrite() /** * Rewrite a span NEAR query. Stop words will be bi-grammed into adjacent * terms. * * @param q The query to rewrite * @return Rewritten version, or 'q' unchanged if no changed needed. */ protected Query rewrite(final SpanNearQuery q) { // Rewrite each clause. Allow single clauses to be promoted, and // do perform bi-gramming. // return rewriteClauses(q, q.getClauses(), true, true, q.getSlop(), new SpanClauseJoiner() { public SpanQuery join(SpanQuery[] clauses) { return new SpanNearQuery(clauses, q.getSlop(), false); } }); } // rewrite() /** * Rewrite a span OR-NEAR query. Stop words will be bi-grammed into adjacent * terms. * * @param q The query to rewrite * @return Rewritten version, or 'q' unchanged if no changed needed. */ protected Query rewrite(final SpanOrNearQuery q) { // Rewrite each clause. Allow single clauses to be promoted, and // do perform bi-gramming. // return rewriteClauses(q, q.getClauses(), true, true, q.getSlop(), new SpanClauseJoiner() { public SpanQuery join(SpanQuery[] clauses) { return new SpanOrNearQuery(clauses, q.getSlop(), false); } }); } // rewrite() /** * Rewrite a span-based OR query. The procedure in this case is simple: * remove all stop words, with no bi-gramming performed. * * @param q The query to rewrite * @return Rewritten version, or 'q' unchanged if no changed needed. */ protected Query rewrite(final SpanOrQuery q) { // Rewrite each clause. Allow single clauses to be promoted, and // avoid bi-gramming. // return rewriteClauses(q, q.getClauses(), true, false, 0, new SpanClauseJoiner() { public SpanQuery join(SpanQuery[] clauses) { return new SpanOrQuery(clauses); } }); } // rewrite() /** * Utility function that takes care of rewriting a series of span query * clauses. * * @param oldQuery Query being rewritten * @param oldClauses Clauses to rewrite * @param shuntSingle true to allow single-clause result to be returned, * false to force wrapping. * @param bigram true to bigram stop-words, false to simply remove them * @param slop if bigramming, 0 for phrase, non-zero for near * @param joiner Handles joining new clauses into wrapper query * @return New rewritten query, or 'oldQuery' if no changes. */ protected Query rewriteClauses(Query oldQuery, SpanQuery[] oldClauses, boolean shuntSingle, boolean bigram, int slop, SpanClauseJoiner joiner) { Vector newClauseVec = new Vector(); boolean anyChanges = false; for (int i = 0; i < oldClauses.length; i++) { SpanQuery clause = (SpanQuery)rewriteQuery(oldClauses[i]); if (clause != oldClauses[i]) anyChanges = true; // If the clause ended up null, skip it. if (clause == null) continue; // Skip stop-words if we're not bigramming. if (!bigram && stopSet.contains(extractTermText(clause))) { removedTerms.add(extractTermText(clause)); anyChanges = true; continue; } // Retain everything else. newClauseVec.add(clause); } // for i SpanQuery[] newClauses = (SpanQuery[])newClauseVec.toArray( new SpanQuery[newClauseVec.size()]); // Apply bi-gramming to the rewritten clauses if requested. boolean alreadyJoined = false; if (bigram) { SpanQuery[] bigrammedClauses = bigramQueries(newClauses, slop, joiner); if (bigrammedClauses != newClauses) { assert bigrammedClauses.length <= 1 : "bigramQueries should result in one clause if any bigramming performed"; newClauses = bigrammedClauses; anyChanges = true; alreadyJoined = true; } } // If no changes, just return the original clauses. boolean force = forceRewrite(oldQuery); if (!anyChanges && !force) return oldQuery; // If we ended up with zero clauses, let the caller know they can delete // the query. // if (newClauses.length == 0) return null; // If only one clause (and we're allowed to shunt), just return the single // clause instead of a wrapping query. // if (newClauses.length == 1 && (alreadyJoined || (shuntSingle && !force))) { // Since we're getting rid of the parent, pass on its boost to the // child. // return combineBoost(oldQuery, newClauses[0]); } // Construct a new query joining all the rewritten clauses. Query newQuery = joiner.join(newClauses); return copyBoost(oldQuery, newQuery); } /** * Removes stop words from a set of consecutive queries by combining * them with adjacent non-stop-words. * * @param clauses array of queries to work on * @param slop zero for exact matching, non-zero for 'near' matching. * @param joiner used to join the resulting bi-grammed clauses * @return original list, or a new query containing bi-grams */ protected SpanQuery[] bigramQueries(SpanQuery[] clauses, int slop, SpanClauseJoiner joiner) { assert clauses.length > 0 : "cannot bigram empty list"; // Extract the term text from each query. String[] terms = new String[clauses.length]; for (int i = 0; i < clauses.length; i++) terms[i] = extractTermText(clauses[i]); // If there's only one query, and it's not a stop word, the we have // nothing to do. // if (clauses.length == 1 && !stopSet.contains(terms[0])) return clauses; // Find out if none of the queries are stop words (so we can take the easy // way out). // // Along the way, make note of the stop words so we can later // inform the user (since we're going to remove them one way or another.) // // Also, count the max # of consecutive stop words. // int nStopWords = 0; int consecStopWords = 0; int maxConsecStopWords = 0; for (int i = 0; i < clauses.length; i++) { if (!stopSet.contains(terms[i])) { consecStopWords = 0; continue; } removedTerms.add(terms[i]); nStopWords++; consecStopWords++; if (consecStopWords > maxConsecStopWords) maxConsecStopWords = consecStopWords; } // No stop words? Nothing to do. if (nStopWords == 0) return clauses; // If the query is entirely stop words, it's not going to produce // anything useful. Just clear the query list and let the caller know // we have made a change. // if (nStopWords == clauses.length) return new SpanQuery[0]; // At this point, we know the query has at least one stop word and // at least one real word. // // We have three cases to worry about: // (1) Phrase search // (2) Near search with max 2 consecutive stop words // (3) Near search with 3 or more consecutive stop words. // // Case (1): Phrase search // SpanQuery ret; if (slop == 0) ret = bigramTermsExact(clauses, terms, joiner); // Case (2): Near search with max 2 consecutive stop words else if (maxConsecStopWords <= 2) ret = bigramTermsInexact(clauses, terms, joiner); // Case (3): Near search with 3 or more consecutive stop words else { // This case is a bit strange. Since doing an inexact query will end // up eliminating at least one stop word, we also do an exact query, // and let the best match win. Give boost priority to the exact one. // SpanQuery[] both = new SpanQuery[2]; both[0] = bigramTermsExact(clauses, terms, joiner); both[1] = bigramTermsInexact(clauses, terms, joiner); reduceBoost(both[1]); ret = new SpanOrQuery(both); } // We definitely made changes SpanQuery[] retArray = new SpanQuery[1]; retArray[0] = ret; return retArray; } // bigramQueries() /** * Given a sequence of terms consisting of mixed stop and real words, * figure out the bigrammed sequence that will give hits on at least * the real words, and give priority to ones that are near the closest * stop words. * * Examples: "man of the world" * -> "(man or man-of) near (the-world or world)" * "hello there" * -> "hello there" * "it is not a problem" * -> "(a-problem or problem)" * * @param queries Original queries in the sequence * @param terms Corresponding term text of each query * @param joiner Used to join the resulting bi-grammed clauses * * @return A new query possibly containing bi-grams */ protected SpanQuery bigramTermsInexact(Query[] queries, String[] terms, SpanClauseJoiner joiner) { SpanQuery[] clauses = new SpanQuery[terms.length * 2]; int nClauses = 0; // Process each term in turn, looking at its relation to the next term. for (int i = 0; i < terms.length; i++) { // There are six cases to consider: // (1) real followed by nothing // (2) real followed by real // (3) real followed by stop // (4) stop followed by nothing // (5) stop followed by real // (6) stop followed by stop // // First, handle cases (1), (2) and (3), which all start with a // real word. // if (!stopSet.contains(terms[i])) { // If the previous term was a stop-word, then this real // word has already been incorporated. Skip it. // if (i > 0 && stopSet.contains(terms[i - 1])) continue; // Case 1 and 2: Real followed by nothing or another real word. // In these cases, there's no need to glom. // if (i == terms.length - 1 || !stopSet.contains(terms[i + 1])) { clauses[nClauses++] = convertToSpanQuery(queries[i]); continue; } // Case 3: Real followed by stop. In this case, we make an // OR-query, like this: (real OR real-stop). Slightly reduce the // boost factor on the real alone, so that the real-stop pair // will be scored higher. // SpanQuery[] both = new SpanQuery[2]; both[0] = convertToSpanQuery(queries[i]); both[1] = convertToSpanQuery(glomQueries(queries[i], queries[i + 1])); reduceBoost(both[0]); clauses[nClauses++] = new SpanOrQuery(both); continue; } // Now handle cases (4), (5) and (6) that begin with a stop word. // // Case (4): Stop followed by nothing. Just drop the stop word. // if (i == terms.length - 1) continue; // Case (5): Stop followed by real. In this case, we make an OR // query, like this: (stop-real OR real). Reduce the // boost factor on the real word alone, so that the // stop-real pair will score higher. // if (!stopSet.contains(terms[i + 1])) { SpanQuery[] both = new SpanQuery[2]; both[0] = convertToSpanQuery(glomQueries(queries[i], queries[i + 1])); both[1] = convertToSpanQuery(queries[i + 1]); reduceBoost(both[1]); clauses[nClauses++] = new SpanOrQuery(both); continue; } // Case (6): Stop followed by stop. Throw it away. continue; } // for i // If we ended up with only one clause, just return that. if (nClauses == 1) return clauses[0]; // Otherwise, join them all together in a "near" query. SpanQuery[] resized = new SpanQuery[nClauses]; System.arraycopy(clauses, 0, resized, 0, nClauses); return joiner.join(resized); } // bigramTermsInexact() /** * Converts non-span queries to span queries, and passes span queries through * unchanged. * * @param q Query to convert (span or non-span) * @return Equivalent SpanQuery. */ protected SpanQuery convertToSpanQuery(Query q) { if (q instanceof SpanQuery) return (SpanQuery)q; if (q instanceof TermQuery) { Term t = ((TermQuery)q).getTerm(); int termLength = isBigram(stopSet, t.text()) ? 2 : 1; return (SpanQuery)copyBoost(q, new SpanTermQuery(t, termLength)); } assert false : "case not handled"; return null; } // convertToSpanQuery() /** * Construct a term given its text and field name. This function is used * instead of Term's constructor to add an extra check that the text * is never a stop word. * * @param text Text for the new term * @param field Field being queried * * @return A properly constructed Term, never a stop-word. */ protected Term newTerm(String field, String text) { assert !stopSet.contains(text) : "cannot directly query a stop-word"; return new Term(field, text); } // newTerm() /** * Given a sequence of terms consisting of mixed stop and real words, * figure out the bigrammed sequence required to get an exact match with * the index. * * Examples: "man of the world" -> "man-of of-the the-world" * "hello there" -> "hello there" * "it is not a problem" -> "it-is is-not not-a a-problem" * * @param queries Original queries in the sequence * @param terms Corresponding term text of each query * @param joiner Used to join the resulting bi-grammed clauses * * @return A new query possibly containing bi-grams */ protected SpanQuery bigramTermsExact(Query[] queries, String[] terms, SpanClauseJoiner joiner) { Vector newQueries = new Vector(queries.length * 2); // Process each term in turn, looking at its relation to the next term. for (int i = 0; i < terms.length; i++) { // There are six cases to consider: // (1) real followed by nothing // (2) real followed by real // (3) real followed by stop // (4) stop followed by nothing // (5) stop followed by real // (6) stop followed by stop // // First, handle cases (1), (2) and (3), which all start with a // real word. // if (!stopSet.contains(terms[i])) { // Cases 1 and 2: Real followed by nothing or another real // word. In this case, there's no need to glom. // if (i == terms.length - 1 || !stopSet.contains(terms[i + 1])) { // If the previous term was a stop-word, then this real // word has already been incorporated. Skip it. // if (i > 0 && stopSet.contains(terms[i - 1])) continue; newQueries.add(queries[i]); continue; } // Case 3: Real followed by stop. In this case, we stick the // real and the stop together. // newQueries.add(glomQueries(queries[i], queries[i + 1])); continue; } // Now handle cases (4), (5) and (6) that start with a stop word. // // Case (4): stop word followed by nothing. Just throw it away. // Don't worry, it should have been incorporated into // the previous glommed term. // if (i == terms.length - 1) continue; // Cases (5) and (6): stop word followed by anything else. Just // glom the stop word with whatever comes after. // newQueries.add(glomQueries(queries[i], queries[i + 1])); } // for i // Convert the vector of queries to a handy array. SpanQuery[] newArray = new SpanQuery[newQueries.size()]; newQueries.toArray(newArray); // And finally, make the "near" query that will join them all. return joiner.join(newArray); } // bigramTermsExact() /** * Joins a stop word to a real word, or vice-versa. Also handles more complex * cases, like joining a stop-word to an OR query. * * Examples: the rabbit -> the-rabbit * the (white OR beige) -> the-white OR the-beige * * @param q1 First query * @param q2 Second query * @return A query representing the join. */ protected Query glomQueries(Query q1, Query q2) { // If they're both terms, our work is easy. if (q1 instanceof SpanTermQuery && q2 instanceof SpanTermQuery) { SpanTermQuery st1 = (SpanTermQuery)q1; SpanTermQuery st2 = (SpanTermQuery)q2; Term t = newTerm(st1.getField(), st1.getTerm().text() + "~" + st2.getTerm().text()); int termLength = isBigram(stopSet, t.text()) ? 2 : 1; return copyBoost(st1, st2, new SpanTermQuery(t, termLength)); } if (q1 instanceof TermQuery && q2 instanceof TermQuery) { TermQuery t1 = (TermQuery)q1; TermQuery t2 = (TermQuery)q2; Term t = newTerm(t1.getTerm().field(), t1.getTerm().text() + "~" + t2.getTerm().text()); return copyBoost(t1, t2, new TermQuery(t)); } // If joining a term to an OR query or vice-versa, we have a bunch to do. if (q1 instanceof SpanTermQuery && q2 instanceof SpanOrQuery) return glomInside((SpanOrQuery)q2, (SpanTermQuery)q1, true); if (q1 instanceof SpanOrQuery && q2 instanceof SpanTermQuery) return glomInside((SpanOrQuery)q1, (SpanTermQuery)q2, false); // If joining a term to a NOT query, only glom it's include clause (the // exclude clause is independent.) // if (q1 instanceof SpanTermQuery && q2 instanceof SpanChunkedNotQuery) return glomInside((SpanChunkedNotQuery)q2, (SpanTermQuery)q1, true); if (q1 instanceof SpanChunkedNotQuery && q2 instanceof SpanTermQuery) return glomInside((SpanChunkedNotQuery)q1, (SpanTermQuery)q2, false); if (q1 instanceof SpanTermQuery && q2 instanceof SpanNotNearQuery) return glomInside((SpanNotNearQuery)q2, (SpanTermQuery)q1, true); if (q1 instanceof SpanNotNearQuery && q2 instanceof SpanTermQuery) return glomInside((SpanNotNearQuery)q1, (SpanTermQuery)q2, false); // Don't mess with near queries. if (q1 instanceof SpanTermQuery && q2 instanceof SpanNearQuery) return q2; if (q1 instanceof SpanNearQuery && q2 instanceof SpanTermQuery) return q1; assert false : "case not handled yet"; return null; } // glomQueries() /** * Gloms the term onto each clause within an OR query. * * @param oq Query to glom into * @param term Term to glom on * @param before true to prepend the term, false to append. * @return A new glommed query. */ protected SpanQuery glomInside(SpanOrQuery oq, SpanTermQuery term, boolean before) { SpanQuery[] clauses = oq.getClauses(); boolean anyChanges = false; for (int i = 0; i < clauses.length; i++) { if (clauses[i] instanceof SpanTermQuery) { String ctText = extractTermText(clauses[i]); String newText = before ? (extractTermText(term) + "~" + ctText) : (ctText + "~" + extractTermText(term)); SpanQuery oldClause = clauses[i]; int termLength = isBigram(stopSet, newText) ? 2 : 1; clauses[i] = new SpanTermQuery(newTerm(term.getTerm().field(), newText), termLength); copyBoost(oldClause, term, clauses[i]); anyChanges = true; } else if (clauses[i] instanceof SpanOrQuery) { SpanQuery newq = glomInside((SpanOrQuery)clauses[i], term, before); if (newq != oq) { clauses[i] = newq; anyChanges = true; } } else assert false : "case not handled"; } // for i // No changes? Return the unaltered original query. if (!anyChanges) return oq; // All done! return (SpanQuery)copyBoost(oq, new SpanOrQuery(clauses)); } // glomInside() /** * Gloms the term onto each clause within a NOT query. * * @param nq Query to glom into * @param term Term to glom on * @param before true to prepend the term, false to append. * @return A new glommed query. */ protected SpanQuery glomInside(SpanChunkedNotQuery nq, SpanTermQuery term, boolean before) { // Only glom into the 'include' clause. The 'exclude' clause is entirely // independent. // SpanQuery newInclude; if (before) newInclude = (SpanQuery)glomQueries(term, nq.getInclude()); else newInclude = (SpanQuery)glomQueries(nq.getInclude(), term); // If no change was made to the 'include' clause, then we needn't change // the NOT query. // if (newInclude == nq.getInclude()) return nq; // Make a new NOT query then. return (SpanQuery)copyBoost(nq, new SpanChunkedNotQuery( newInclude, nq.getExclude(), nq.getSlop())); } // glomInside() /** * Gloms the term onto each clause within a NOT query. * * @param nq Query to glom into * @param term Term to glom on * @param before true to prepend the term, false to append. * @return A new glommed query. */ protected SpanQuery glomInside(SpanNotNearQuery nq, SpanTermQuery term, boolean before) { // Only glom into the 'include' clause. The 'exclude' clause is entirely // independent. // SpanQuery newInclude; if (before) newInclude = (SpanQuery)glomQueries(term, nq.getInclude()); else newInclude = (SpanQuery)glomQueries(nq.getInclude(), term); // If no change was made to the 'include' clause, then we needn't change // the NOT query. // if (newInclude == nq.getInclude()) return nq; // Make a new NOT query then. return (SpanQuery)copyBoost(nq, new SpanNotNearQuery(newInclude, nq.getExclude(), nq.getSlop())); } // glomInside() /** * Given a term, term query, span term query (or plain string), extract * the term text. This method is handy so we don't have to sprinkle if * statements everywhere we need to get the text. * * @param obj String, Term, TermQuery, or SpanTermQuery to check * @return text of the term */ protected String extractTermText(Object obj) { if (obj instanceof String) return (String)obj; Term t = extractTerm(obj); if (t == null) return ""; return t.text(); } // extractText() /** * Given a term query, span term query (or plain term), extract * the Term itself. This method is handy so we don't have to sprinkle if * statements everywhere we need to get the term from a query. * * @param obj Term, TermQuery, or SpanTermQuery to check * @return the Term */ protected Term extractTerm(Object obj) { if (obj instanceof Term) return (Term)obj; if (obj instanceof TermQuery) return ((TermQuery)obj).getTerm(); if (obj instanceof SpanTermQuery) return ((SpanTermQuery)obj).getTerm(); if (obj instanceof SpanDechunkingQuery) return extractTerm(((SpanDechunkingQuery)obj).getWrapped()); return null; } // extractTerm() /** * Reduces the boost factor of a query (typically the non-bigram of a pair in * an OR) so that the bigram will get scored higher. */ protected void reduceBoost(Query query) { query.setBoost(query.getBoost() * 0.8f); } // reduceBoost() } // class BigramQueryRewriter