package org.apache.solr.analysis; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.util.Map; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.reverse.ReverseStringFilter; /** * Factory for {@link ReversedWildcardFilter}-s. When this factory is * added to an analysis chain, it will be used both for filtering the * tokens during indexing, and to determine the query processing of * this field during search. * <p>This class supports the following init arguments: * <ul> * <li><code>withOriginal</code> - if true, then produce both original and reversed tokens at * the same positions. If false, then produce only reversed tokens.</li> * <li><code>maxPosAsterisk</code> - maximum position (1-based) of the asterisk wildcard * ('*') that triggers the reversal of query term. Asterisk that occurs at * positions higher than this value will not cause the reversal of query term. * Defaults to 2, meaning that asterisks on positions 1 and 2 will cause * a reversal.</li> * <li><code>maxPosQuestion</code> - maximum position (1-based) of the question * mark wildcard ('?') that triggers the reversal of query term. Defaults to 1. * Set this to 0, and <code>maxPosAsterisk</code> to 1 to reverse only * pure suffix queries (i.e. ones with a single leading asterisk).</li> * <li><code>maxFractionAsterisk</code> - additional parameter that * triggers the reversal if asterisk ('*') position is less than this * fraction of the query token length. Defaults to 0.0f (disabled).</li> * <li><code>minTrailing</code> - minimum number of trailing characters in query * token after the last wildcard character. For good performance this should be * set to a value larger than 1. Defaults to 2. * </ul> * Note 1: This filter always reverses input tokens during indexing. * Note 2: Query tokens without wildcard characters will never be reversed. */ public class ReversedWildcardFilterFactory extends BaseTokenFilterFactory { private char markerChar = ReverseStringFilter.START_OF_HEADING_MARKER; private boolean withOriginal; private int maxPosAsterisk; private int maxPosQuestion; private int minTrailing; private float maxFractionAsterisk; @Override public void init(Map<String, String> args) { super.init(args); withOriginal = getBoolean("withOriginal", true); maxPosAsterisk = getInt("maxPosAsterisk", 2); maxPosQuestion = getInt("maxPosQuestion", 1); minTrailing = getInt("minTrailing", 2); maxFractionAsterisk = getFloat("maxFractionAsterisk", 0.0f); } public TokenStream create(TokenStream input) { return new ReversedWildcardFilter(input, withOriginal, markerChar); } /** * This method encapsulates the logic that determines whether * a query token should be reversed in order to use the * reversed terms in the index. * @param token input token. * @return true if input token should be reversed, false otherwise. */ public boolean shouldReverse(String token) { int posQ = token.indexOf('?'); int posA = token.indexOf('*'); if (posQ == -1 && posA == -1) { // not a wildcard query return false; } int pos; int lastPos; int len = token.length(); lastPos = token.lastIndexOf('?'); pos = token.lastIndexOf('*'); if (pos > lastPos) lastPos = pos; if (posQ != -1) { pos = posQ; if (posA != -1) { pos = Math.min(posQ, posA); } } else { pos = posA; } if (len - lastPos < minTrailing) { // too few trailing chars return false; } if (posQ != -1 && posQ < maxPosQuestion) { // leading '?' return true; } if (posA != -1 && posA < maxPosAsterisk) { // leading '*' return true; } // '*' in the leading part if (maxFractionAsterisk > 0.0f && pos < (float)token.length() * maxFractionAsterisk) { return true; } return false; } public char getMarkerChar() { return markerChar; } protected float getFloat(String name, float defValue) { String val = args.get(name); if (val == null) { return defValue; } else { return Float.parseFloat(val); } } }