package org.apache.lucene.search.regex;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.UnicodeUtil;
import org.apache.regexp.CharacterIterator;
import org.apache.regexp.RE;
import org.apache.regexp.REProgram;
import java.lang.reflect.Field;
import java.lang.reflect.Method;
/**
* Implementation tying <a href="http://jakarta.apache.org/regexp">Jakarta
* Regexp</a> to RegexQuery. Jakarta Regepx internally supports a
* {@link #prefix} implementation which can offer performance gains under
* certain circumstances. Yet, the implementation appears to be rather shaky as
* it doesn't always provide a prefix even if one would exist.
*/
public class JakartaRegexpCapabilities implements RegexCapabilities {
private static Field prefixField;
private static Method getPrefixMethod;
static {
try {
getPrefixMethod = REProgram.class.getMethod("getPrefix");
} catch (Exception e) {
getPrefixMethod = null;
}
try {
prefixField = REProgram.class.getDeclaredField("prefix");
prefixField.setAccessible(true);
} catch (Exception e) {
prefixField = null;
}
}
// Define the flags that are possible. Redefine them here
// to avoid exposing the RE class to the caller.
private int flags = RE.MATCH_NORMAL;
/**
* Flag to specify normal, case-sensitive matching behaviour. This is the default.
*/
public static final int FLAG_MATCH_NORMAL = RE.MATCH_NORMAL;
/**
* Flag to specify that matching should be case-independent (folded)
*/
public static final int FLAG_MATCH_CASEINDEPENDENT = RE.MATCH_CASEINDEPENDENT;
/**
* Constructs a RegexCapabilities with the default MATCH_NORMAL match style.
*/
public JakartaRegexpCapabilities() {}
/**
* Constructs a RegexCapabilities with the provided match flags.
* Multiple flags should be ORed together.
*
* @param flags The matching style
*/
public JakartaRegexpCapabilities(int flags)
{
this.flags = flags;
}
public RegexCapabilities.RegexMatcher compile(String regex) {
return new JakartaRegexMatcher(regex, flags);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + flags;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null) return false;
if (getClass() != obj.getClass()) return false;
JakartaRegexpCapabilities other = (JakartaRegexpCapabilities) obj;
if (flags != other.flags) return false;
return true;
}
class JakartaRegexMatcher implements RegexCapabilities.RegexMatcher {
private RE regexp;
private final UnicodeUtil.UTF16Result utf16 = new UnicodeUtil.UTF16Result();
private final CharacterIterator utf16wrapper = new CharacterIterator() {
public char charAt(int pos) {
return utf16.result[pos];
}
public boolean isEnd(int pos) {
return pos >= utf16.length;
}
public String substring(int beginIndex) {
return substring(beginIndex, utf16.length);
}
public String substring(int beginIndex, int endIndex) {
return new String(utf16.result, beginIndex, endIndex - beginIndex);
}
};
public JakartaRegexMatcher(String regex, int flags) {
regexp = new RE(regex, flags);
}
public boolean match(BytesRef term) {
UnicodeUtil.UTF8toUTF16(term.bytes, term.offset, term.length, utf16);
return regexp.match(utf16wrapper, 0);
}
public String prefix() {
try {
final char[] prefix;
if (getPrefixMethod != null) {
prefix = (char[]) getPrefixMethod.invoke(regexp.getProgram());
} else if (prefixField != null) {
prefix = (char[]) prefixField.get(regexp.getProgram());
} else {
return null;
}
return prefix == null ? null : new String(prefix);
} catch (Exception e) {
// if we cannot get the prefix, return none
return null;
}
}
}
}