/**
* Copyright 2014 National University of Ireland, Galway.
*
* This file is part of the SIREn project. Project and contact information:
*
* https://github.com/rdelbru/SIREn
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sindice.siren.search.node;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.AutomatonQuery;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.util.ToStringUtils;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.BasicAutomata;
import org.apache.lucene.util.automaton.BasicOperations;
/**
* A {@link NodePrimitiveQuery} that implements the wildcard search query.
*
* <p>
*
* Supported wildcards are <code>*</code>, which
* matches any character sequence (including the empty one), and <code>?</code>,
* which matches any single character. Note this query can be slow, as it
* needs to iterate over many terms. In order to prevent extremely slow WildcardQueries,
* a Wildcard term should not start with the wildcards <code>*</code>.
*
* <p>This query uses the {@link
* MultiNodeTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}
* rewrite method.
*
* <p> Code taken from {@link WildcardQuery} and adapted for SIREn.
*
* @see AutomatonQuery
**/
public class NodeWildcardQuery extends NodeAutomatonQuery {
/** String equality with support for wildcards */
public static final char WILDCARD_STRING = '*';
/** Char equality with support for wildcards */
public static final char WILDCARD_CHAR = '?';
/** Escape character */
public static final char WILDCARD_ESCAPE = '\\';
public NodeWildcardQuery(final Term term) {
super(term, toAutomaton(term));
}
/**
* Convert wildcard syntax into an automaton.
* @lucene.internal
*/
@SuppressWarnings("fallthrough")
public static Automaton toAutomaton(final Term wildcardquery) {
final List<Automaton> automata = new ArrayList<Automaton>();
final String wildcardText = wildcardquery.text();
for (int i = 0; i < wildcardText.length();) {
final int c = wildcardText.codePointAt(i);
int length = Character.charCount(c);
switch(c) {
case WILDCARD_STRING:
automata.add(BasicAutomata.makeAnyString());
break;
case WILDCARD_CHAR:
automata.add(BasicAutomata.makeAnyChar());
break;
case WILDCARD_ESCAPE:
// add the next codepoint instead, if it exists
if (i + length < wildcardText.length()) {
final int nextChar = wildcardText.codePointAt(i + length);
length += Character.charCount(nextChar);
automata.add(BasicAutomata.makeChar(nextChar));
break;
} // else fallthru, lenient parsing with a trailing \
default:
automata.add(BasicAutomata.makeChar(c));
}
i += length;
}
return BasicOperations.concatenate(automata);
}
/**
* Returns the pattern term.
*/
public Term getTerm() {
return term;
}
/** Prints a user-readable version of this query. */
@Override
public String toString(final String field) {
final StringBuffer buffer = new StringBuffer();
buffer.append(term.text());
buffer.append(ToStringUtils.boost(this.getBoost()));
return this.wrapToStringWithDatatype(buffer).toString();
}
}