/** * Copyright (c) 2008--2015 Red Hat, Inc. * * This software is licensed to you under the GNU General Public License, * version 2 (GPLv2). There is NO WARRANTY for this software, express or * implied, including the implied warranties of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 * along with this software; if not, see * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. * * Red Hat trademarks are not licensed under GPLv2. No permission is * granted to use or replicate Red Hat trademarks that are incorporated * in this software or its documentation. */ package com.redhat.satellite.search.index.ngram; import org.apache.lucene.analysis.Token; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.index.Term; import java.io.IOException; import java.io.StringReader; /** * NGramQuery * A custom BooleanQuery, it takes each ngram-token and adds as an OR term. * @version $Rev$ */ public class NGramQuery extends BooleanQuery { private static final long serialVersionUID = 1L; /** * Constructor * @param field name of the field * @param queryTerms String containing a term or a series of terms to search. * The string will be parsed and will be broken up into a series of NGrams. * @throws IOException something went wrong parsing queryTerms * */ public NGramQuery(String field, String queryTerms, int min, int max) throws IOException { NGramAnalyzer nga = new NGramAnalyzer(min, max); TokenStream ngrams = nga.tokenStream(new StringReader(queryTerms)); Token token; while ((token = ngrams.next()) != null) { Term t = new Term(field, new String(token.termBuffer()).trim()); add(new TermQuery(t), BooleanClause.Occur.SHOULD); } } /** * * @param pq PhraseQuery to break up and convert to NGramQuery * Forms a BooleanQuery with each term in the original PhraseQuery OR'd. * Note: Assumes that each term has already been tokenized into a ngram, * this method will not re-tokenize terms. * @param useMust controls if BooleanClause.Occur SHOULD or MUST is used. */ public NGramQuery(PhraseQuery pq, boolean useMust) { Term[] terms = pq.getTerms(); for (int i = 0; i < terms.length; i++) { BooleanClause.Occur occur = BooleanClause.Occur.SHOULD; if (useMust) { occur = BooleanClause.Occur.MUST; } add(new TermQuery(terms[i]), occur); } } }