NGramQueryParser.java example

Explorer
spacewalk-master
/**
 * Copyright (c) 2008--2015 Red Hat, Inc.
 *
 * This software is licensed to you under the GNU General Public License,
 * version 2 (GPLv2). There is NO WARRANTY for this software, express or
 * implied, including the implied warranties of MERCHANTABILITY or FITNESS
 * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2
 * along with this software; if not, see
 * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt.
 *
 * Red Hat trademarks are not licensed under GPLv2. No permission is
 * granted to use or replicate Red Hat trademarks that are incorporated
 * in this software or its documentation.
 */
package com.redhat.satellite.search.index.ngram;

import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lucene.search.ConstantScoreRangeQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.NumberTools;

import org.apache.log4j.Logger;

/**
 * NGramQueryParser
 * Creates a custom query parser of ngram-tokenized search terms
 *
 * @version $Rev$
 */
public class NGramQueryParser extends QueryParser {

    private static Logger log = Logger.getLogger(NGramQueryParser.class);
    private boolean useMust = false;
    /**
     * Constructor
     * @param f field name
     * @param a analyzer
     * @param useMustIn boolean option set to true when doing a Free Form or
     * advanced search, it will force each NGramQuery to be constructed using
     * the BooleanClause.Occur.MUST.  This narrows returned search results and
     * is not recommended for regular searches which want spelling variances
     * to be tolerated.
     */
    public NGramQueryParser(String f, Analyzer a, boolean useMustIn) {
        super(f, a);
        this.setDateResolution(DateTools.Resolution.DAY);
        this.useMust = useMustIn;
    }

    /**
     * Constructor
     * @param f field name
     * @param a analyzer
     */
    public NGramQueryParser(String f, Analyzer a) {
        super(f, a);
        this.setDateResolution(DateTools.Resolution.DAY);
    }

    /**
     *
     * @return value for useMust
     */
    public boolean getUseMust() {
        return useMust;
    }

    /**
     *
     * @param value for useMust
     */
    public void setUseMust(boolean value) {
        useMust = value;
    }

    protected Query getFieldQuery(String defaultField,
            String queryText) throws ParseException {
        Query orig = super.getFieldQuery(defaultField, queryText);
        if (!(orig instanceof PhraseQuery)) {
            log.debug("Returning default query.  No phrase query translation.");
            return orig;
        }
        /**
         * A ngram when parsed will become a series of smaller search terms,
         * these terms are grouped together into a PhraseQuery.  We are taking
         * that PhraseQuery and breaking out each ngram term then combining all
         * ngrams together to form a BooleanQuery.
         */
        PhraseQuery pq = (PhraseQuery)orig;
        return new NGramQuery(pq, useMust);
    }

    /**
     *
     * @param field
     * @return return   true if this looks to be a date string
     *                  false if this is not a date string
     */
    protected boolean isDate(String field) {
        if (field.length() == 12) {
            return true;
        }
        return false;
    }
    /**
     * This will look to see if "part1" or "part2" are strings of all digits,
     * if they are, then they will be converted to a lexicographically safe string
     * representation, then passed into the inherited getRangeQuery().  This is needed when
     * comparing something like "4" to be less than "10".
     * If the strings don't fit the pattern of all digits, then they get passed through
     * to the inherited getRangeQuery().
     */
    protected Query getRangeQuery(String field,
            String part1,
            String part2,
            boolean inclusive) throws ParseException {
        if (isDate(part1) && isDate(part2)) {
            if (log.isDebugEnabled()) {
                log.debug("Detected passed in terms are dates, creating " +
                    "ConstantScoreRangeQuery(" + field + ", " + part1 + ", " +
                    part2 + ", " + inclusive + ", " + inclusive);
            }
            return new ConstantScoreRangeQuery(field, part1, part2, inclusive,
                    inclusive);
        }
        String newPart1 = part1;
        String newPart2 = part2;
        String regEx = "(\\d)*";
        Pattern pattern = Pattern.compile(regEx);
        Matcher matcher1 = pattern.matcher(part1);
        Matcher matcher2 = pattern.matcher(part2);
        if (matcher1.matches() && matcher2.matches()) {
            newPart1 = NumberTools.longToString(Long.parseLong(part1));
            newPart2 = NumberTools.longToString(Long.parseLong(part2));
            if (log.isDebugEnabled()) {
                log.debug("NGramQueryParser.getRangeQuery() Converted " + part1 + " to " +
                    newPart1 + ", Converted " + part2 + " to " + newPart2);
            }
        }
        return super.getRangeQuery(field, newPart1, newPart2, inclusive);
    }
}