/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.queryparser.flexible.standard.processors; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.queryparser.flexible.core.QueryNodeException; import org.apache.lucene.queryparser.flexible.core.nodes.FieldQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.FuzzyQueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QueryNode; import org.apache.lucene.queryparser.flexible.core.nodes.QuotedFieldQueryNode; import org.apache.lucene.queryparser.flexible.core.processors.QueryNodeProcessorImpl; import org.apache.lucene.queryparser.flexible.core.util.UnescapedCharSequence; import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys; import org.apache.lucene.queryparser.flexible.standard.nodes.PrefixWildcardQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.TermRangeQueryNode; import org.apache.lucene.queryparser.flexible.standard.nodes.WildcardQueryNode; import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.util.BytesRef; /** * The {@link StandardSyntaxParser} creates {@link PrefixWildcardQueryNode} nodes which * have values containing the prefixed wildcard. However, Lucene * {@link PrefixQuery} cannot contain the prefixed wildcard. So, this processor * basically removed the prefixed wildcard from the * {@link PrefixWildcardQueryNode} value. * * @see PrefixQuery * @see PrefixWildcardQueryNode */ public class WildcardQueryNodeProcessor extends QueryNodeProcessorImpl { private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\.)|([?*]+)"); // because we call utf8ToString, this will only work with the default TermToBytesRefAttribute private static String analyzeWildcard(Analyzer a, String field, String wildcard) { // best effort to not pass the wildcard characters through #normalize Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(wildcard); StringBuilder sb = new StringBuilder(); int last = 0; while (wildcardMatcher.find()){ // continue if escaped char if (wildcardMatcher.group(1) != null){ continue; } if (wildcardMatcher.start() > 0){ String chunk = wildcard.substring(last, wildcardMatcher.start()); BytesRef normalized = a.normalize(field, chunk); sb.append(normalized.utf8ToString()); } //append the wildcard character sb.append(wildcardMatcher.group(2)); last = wildcardMatcher.end(); } if (last < wildcard.length()){ String chunk = wildcard.substring(last); BytesRef normalized = a.normalize(field, chunk); sb.append(normalized.utf8ToString()); } return sb.toString(); } public WildcardQueryNodeProcessor() { // empty constructor } @Override protected QueryNode postProcessNode(QueryNode node) throws QueryNodeException { // the old Lucene Parser ignores FuzzyQueryNode that are also PrefixWildcardQueryNode or WildcardQueryNode // we do the same here, also ignore empty terms if (node instanceof FieldQueryNode || node instanceof FuzzyQueryNode) { FieldQueryNode fqn = (FieldQueryNode) node; CharSequence text = fqn.getText(); // do not process wildcards for TermRangeQueryNode children and // QuotedFieldQueryNode to reproduce the old parser behavior if (fqn.getParent() instanceof TermRangeQueryNode || fqn instanceof QuotedFieldQueryNode || text.length() <= 0){ // Ignore empty terms return node; } // Code below simulates the old lucene parser behavior for wildcards if (isWildcard(text)) { Analyzer analyzer = getQueryConfigHandler().get(ConfigurationKeys.ANALYZER); if (analyzer != null) { text = analyzeWildcard(analyzer, fqn.getFieldAsString(), text.toString()); } if (isPrefixWildcard(text)) { return new PrefixWildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd()); } else { return new WildcardQueryNode(fqn.getField(), text, fqn.getBegin(), fqn.getEnd()); } } } return node; } private boolean isWildcard(CharSequence text) { if (text ==null || text.length() <= 0) return false; // If a un-escaped '*' or '?' if found return true // start at the end since it's more common to put wildcards at the end for(int i=text.length()-1; i>=0; i--){ if ((text.charAt(i) == '*' || text.charAt(i) == '?') && !UnescapedCharSequence.wasEscaped(text, i)){ return true; } } return false; } private boolean isPrefixWildcard(CharSequence text) { if (text == null || text.length() <= 0 || !isWildcard(text)) return false; // Validate last character is a '*' and was not escaped // If single '*' is is a wildcard not prefix to simulate old queryparser if (text.charAt(text.length()-1) != '*') return false; if (UnescapedCharSequence.wasEscaped(text, text.length()-1)) return false; if (text.length() == 1) return false; // Only make a prefix if there is only one single star at the end and no '?' or '*' characters // If single wildcard return false to mimic old queryparser for(int i=0; i<text.length(); i++){ if (text.charAt(i) == '?') return false; if (text.charAt(i) == '*' && !UnescapedCharSequence.wasEscaped(text, i)){ if (i == text.length()-1) return true; else return false; } } return false; } @Override protected QueryNode preProcessNode(QueryNode node) throws QueryNodeException { return node; } @Override protected List<QueryNode> setChildrenOrder(List<QueryNode> children) throws QueryNodeException { return children; } }