/*
* ModeShape (http://www.modeshape.org)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.modeshape.jcr.index.lucene.query;
import java.text.CharacterIterator;
import java.text.StringCharacterIterator;
import java.util.regex.Pattern;
import org.apache.lucene.search.Query;
import org.modeshape.common.annotation.Immutable;
/**
* A Lucene {@link Query} implementation that matches some value by document property as like pattern.
*
* @since 4.5
*/
@Immutable
public class RelikeQuery extends ConstantScoreWeightQuery {
private final String relikeValue;
protected RelikeQuery( String field, String relikeValue ) {
super(field);
this.relikeValue = relikeValue;
}
@Override
protected boolean accepts(String value) {
return value != null && like(relikeValue, value);
}
public String toString( String field ) {
final StringBuilder sb = new StringBuilder();
return sb.append(field).append(" RELIKE ").append(relikeValue).toString();
}
@Override
public Query clone() {
return new RelikeQuery(field(), relikeValue);
}
private static boolean like( String value, String pattern ) {
CompareType cmpType = getCompareType(pattern);
switch (cmpType) {
case EQ: {
return value.equals(pattern);
}
case ENDS_WITH: {
return value.endsWith(pattern.substring(1));
}
case STARTS_WITH: {
return value.startsWith(pattern.substring(0, pattern.length() - 1));
}
case REGEXP:
default: {
Pattern p = Pattern.compile(toRegularExpression(pattern));
return p.matcher(value).matches();
}
}
}
private enum CompareType {
EQ,
STARTS_WITH,
ENDS_WITH,
REGEXP;
}
/**
* Determine the compare type given the expression. This method returns:
* <ul>
* <li>CompareType.EQ if expression does not contains '_' or '%';</li>
* <li>CompareType.ENDS_WITH if expression has only one '%' at start and does not contains '_';</li>
* <li>CompareType.STARTS_WITH if expression has only one '%' at end and does not contains '_';</li>
* <li>CompareType.REGEXP otherwise</li>
* </ul>
*
* @param expression the expression for which the {@link CompareType} is to be found
* @return the compare type; never null;
*/
private static CompareType getCompareType( String expression ) {
CompareType result = CompareType.EQ;
CharacterIterator iter = new StringCharacterIterator(expression);
final int fistIndex = 0;
final int lastIndex = expression.length() - 1;
boolean skipNext = false;
for (char c = iter.first(); c != CharacterIterator.DONE; c = iter.next()) {
if (skipNext) {
skipNext = false;
continue;
}
if (c == '_') {
return CompareType.REGEXP;
}
if (c == '%') {
if (result != CompareType.EQ) {
// pattern like '%abcdfe%' -> only regexp can handle this;
return CompareType.REGEXP;
}
int index = iter.getIndex();
if (index == fistIndex) {
result = CompareType.ENDS_WITH;
} else if (index == lastIndex) {
result = CompareType.STARTS_WITH;
} else {
return CompareType.REGEXP;
}
}
if (c == '\\') skipNext = true;
}
return result;
}
private static String toRegularExpression( String likeExpression ) {
// Replace all '\x' with 'x' ...
String result = likeExpression.replaceAll("\\\\(.)", "$1");
// Escape characters used as metacharacters in regular expressions, including
// '[', '^', '\', '$', '.', '|', '+', '*', '?', '(', and ')'
result = result.replaceAll("([$.|+()\\*\\?\\[\\\\^\\\\\\\\])", "\\\\$1");
// Replace '%'->'[.]*' and '_'->'[.]
result = result.replace("%", ".*").replace("_", ".");
return result;
}
}