/** * Copyright (c) 2008--2015 Red Hat, Inc. * * This software is licensed to you under the GNU General Public License, * version 2 (GPLv2). There is NO WARRANTY for this software, express or * implied, including the implied warranties of MERCHANTABILITY or FITNESS * FOR A PARTICULAR PURPOSE. You should have received a copy of GPLv2 * along with this software; if not, see * http://www.gnu.org/licenses/old-licenses/gpl-2.0.txt. * * Red Hat trademarks are not licensed under GPLv2. No permission is * granted to use or replicate Red Hat trademarks that are incorporated * in this software or its documentation. */ package com.redhat.satellite.search.index.ngram; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.search.ConstantScoreRangeQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.DateTools; import org.apache.lucene.document.NumberTools; import org.apache.log4j.Logger; /** * NGramQueryParser * Creates a custom query parser of ngram-tokenized search terms * * @version $Rev$ */ public class NGramQueryParser extends QueryParser { private static Logger log = Logger.getLogger(NGramQueryParser.class); private boolean useMust = false; /** * Constructor * @param f field name * @param a analyzer * @param useMustIn boolean option set to true when doing a Free Form or * advanced search, it will force each NGramQuery to be constructed using * the BooleanClause.Occur.MUST. This narrows returned search results and * is not recommended for regular searches which want spelling variances * to be tolerated. */ public NGramQueryParser(String f, Analyzer a, boolean useMustIn) { super(f, a); this.setDateResolution(DateTools.Resolution.DAY); this.useMust = useMustIn; } /** * Constructor * @param f field name * @param a analyzer */ public NGramQueryParser(String f, Analyzer a) { super(f, a); this.setDateResolution(DateTools.Resolution.DAY); } /** * * @return value for useMust */ public boolean getUseMust() { return useMust; } /** * * @param value for useMust */ public void setUseMust(boolean value) { useMust = value; } protected Query getFieldQuery(String defaultField, String queryText) throws ParseException { Query orig = super.getFieldQuery(defaultField, queryText); if (!(orig instanceof PhraseQuery)) { log.debug("Returning default query. No phrase query translation."); return orig; } /** * A ngram when parsed will become a series of smaller search terms, * these terms are grouped together into a PhraseQuery. We are taking * that PhraseQuery and breaking out each ngram term then combining all * ngrams together to form a BooleanQuery. */ PhraseQuery pq = (PhraseQuery)orig; return new NGramQuery(pq, useMust); } /** * * @param field * @return return true if this looks to be a date string * false if this is not a date string */ protected boolean isDate(String field) { if (field.length() == 12) { return true; } return false; } /** * This will look to see if "part1" or "part2" are strings of all digits, * if they are, then they will be converted to a lexicographically safe string * representation, then passed into the inherited getRangeQuery(). This is needed when * comparing something like "4" to be less than "10". * If the strings don't fit the pattern of all digits, then they get passed through * to the inherited getRangeQuery(). */ protected Query getRangeQuery(String field, String part1, String part2, boolean inclusive) throws ParseException { if (isDate(part1) && isDate(part2)) { if (log.isDebugEnabled()) { log.debug("Detected passed in terms are dates, creating " + "ConstantScoreRangeQuery(" + field + ", " + part1 + ", " + part2 + ", " + inclusive + ", " + inclusive); } return new ConstantScoreRangeQuery(field, part1, part2, inclusive, inclusive); } String newPart1 = part1; String newPart2 = part2; String regEx = "(\\d)*"; Pattern pattern = Pattern.compile(regEx); Matcher matcher1 = pattern.matcher(part1); Matcher matcher2 = pattern.matcher(part2); if (matcher1.matches() && matcher2.matches()) { newPart1 = NumberTools.longToString(Long.parseLong(part1)); newPart2 = NumberTools.longToString(Long.parseLong(part2)); if (log.isDebugEnabled()) { log.debug("NGramQueryParser.getRangeQuery() Converted " + part1 + " to " + newPart1 + ", Converted " + part2 + " to " + newPart2); } } return super.getRangeQuery(field, newPart1, newPart2, inclusive); } }