package org.apache.lucene.spelt; /** * Copyright 2007 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.ArrayList; import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; /** * <p> * Traverses and rewrites simple Lucene queries. This includes boolean * and phrase queries, but not much else. Very useful for extracting and/or * changing the terms in a query. Usually you derive a class and then * override {@link #rewrite(Term)} to do what you need. * </p> * <p> * If you need to handle other types of queries, derive a class and * provide a {@link #rewriteQuery(Query)} method that dispatches to your * custom rewriting methods. * </p. * * @author Martin Haye */ public abstract class SimpleQueryRewriter { /** * Rewrite a query of any supported type. * * @param q Query to rewrite * @return A new query, or 'q' unchanged if no change was needed. */ public Query rewriteQuery(Query q) { if (q instanceof BooleanQuery) return rewrite((BooleanQuery)q); if (q instanceof PhraseQuery) return rewrite((PhraseQuery)q); if (q instanceof TermQuery) return rewrite((TermQuery)q); // Unknown type: do nothing return q; } // rewriteQuery() /** * Rewrite a BooleanQuery. * * @param bq The query to rewrite * @return Rewritten version, or 'bq' unchanged if no changed needed. */ protected Query rewrite(BooleanQuery bq) { ArrayList<BooleanClause> newClauses = new ArrayList<BooleanClause>(); boolean anyChange = false; for (BooleanClause oldClause : bq.getClauses()) { // Rewrite the clause and its descendants Query rewritten = rewriteQuery(oldClause.getQuery()); if (rewritten != oldClause.getQuery()) { anyChange = true; if (rewritten != null) newClauses.add(new BooleanClause(rewritten, oldClause.getOccur())); } else newClauses.add(oldClause); } // If no clauses changed, then the BooleanQuery doesn't change either. if (!anyChange) return bq; // If we ended up with nothing, let the caller know. if (newClauses.isEmpty()) return null; // If we ended up with a single clause, return just that. if (newClauses.size() == 1) { BooleanClause clause = newClauses.get(0); if (clause.getOccur() != BooleanClause.Occur.MUST_NOT) { Query newq = clause.getQuery(); newq.setBoost(Math.max(bq.getBoost(), newq.getBoost())); return newq; } } // Otherwise, we need to construct a new BooleanQuery. bq = new BooleanQuery(bq.isCoordDisabled()); bq.setBoost(bq.getBoost()); for (BooleanClause newClause : newClauses) bq.add(newClause); return bq; } /** * Rewrite a phrase query. The base class does nothing. * * @param pq The query to rewrite * @return Rewritten version, or 'pq' unchanged if no change needed. */ protected Query rewrite(PhraseQuery pq) { Term[] oldTerms = pq.getTerms(); ArrayList<Term> newTerms = new ArrayList<Term>(); // Rewrite each term in turn. boolean anyChange = false; for (int i=0; i<oldTerms.length; i++) { Term newTerm = rewrite(oldTerms[i]); if (newTerm != oldTerms[i]) { anyChange = true; if (newTerm != null) { // If the term is splitting, make it into two terms. int spacePos = newTerm.text().indexOf(' '); if (oldTerms[i].text().indexOf(' ') < 0 && spacePos >= 0) { newTerms.add(new Term(newTerm.field(), newTerm.text().substring(0, spacePos))); newTerms.add(new Term(newTerm.field(), newTerm.text().substring(spacePos+1))); } else newTerms.add(newTerm); } } else { newTerms.add(oldTerms[i]); } } // If no changes, return the original. if (!anyChange) return pq; // If all terms disappeared, inform the caller. if (newTerms.size() == 0) return null; // If only one term, convert to a term query. if (newTerms.size() == 1) { TermQuery newq = new TermQuery(newTerms.get(0)); newq.setBoost(pq.getBoost()); return newq; } // Make a new phrase query. PhraseQuery newq = new PhraseQuery(); newq.setBoost(pq.getBoost()); newq.setSlop(pq.getSlop()); for (int i=0; i<newTerms.size(); i++) newq.add(newTerms.get(i)); return newq; } /** * Rewrite a term query. The base class rewrites the term itself. * * @param q The query to rewrite * @return Rewritten version, or 'q' unchanged if no change needed. */ protected Query rewrite(TermQuery q) { Term oldTerm = q.getTerm(); Term newTerm = rewrite(oldTerm); // If the term is unchanged, don't change the query. if (oldTerm == newTerm) return q; // If the term is going away, inform the caller if (newTerm == null) return null; // If the term is splitting, make it into two terms. int spacePos = newTerm.text().indexOf(' '); if (oldTerm.text().indexOf(' ') < 0 && spacePos >= 0) { PhraseQuery pq = new PhraseQuery(); pq.add(new Term(newTerm.field(), newTerm.text().substring(0, spacePos))); pq.add(new Term(newTerm.field(), newTerm.text().substring(spacePos+1))); pq.setBoost(q.getBoost()); return pq; } // Make a new query for the new term. TermQuery newQuery = new TermQuery(newTerm); newQuery.setBoost(q.getBoost()); return newQuery; } /** * Rewrite a term (e.g. part of a TermQuery or PhraseQuery). The base * class does nothing. * * @param t The term to rewrite * @return Rewritten version, or 't' unchanged if no change needed. */ protected Term rewrite(Term t) { return t; } }