package org.exist.indexing.range;
import org.apache.logging.log4j.LogManager;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.apache.lucene.collation.CollationAttributeFactory;
import org.apache.lucene.util.AttributeFactory;
import org.exist.util.Collations;
import org.exist.util.DatabaseConfigurationException;
import org.apache.logging.log4j.Logger;
import org.exist.xquery.XPathException;
import org.w3c.dom.Element;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.text.Collator;
import java.util.ArrayList;
import java.util.List;
/**
* Lucene analyzer used by the range index. Based on {@link KeywordTokenizer}, it allows additional
* filters to be added to the pipeline through the collection.xconf configuration. A collation may be
* specified as well.
*/
public class RangeIndexAnalyzer extends Analyzer {
private final static Logger LOG = LogManager.getLogger(RangeIndexAnalyzer.class);
private static class FilterConfig {
Constructor<?> constructor;
FilterConfig(Element config) throws DatabaseConfigurationException {
final String className = config.getAttribute("class");
if (className == null) {
throw new DatabaseConfigurationException("No class specified for filter");
}
try {
Class clazz = Class.forName(className);
if (!TokenFilter.class.isAssignableFrom(clazz)) {
throw new DatabaseConfigurationException("Filter " + className + " is not a subclass of " +
TokenFilter.class.getName());
}
constructor = clazz.getConstructor(TokenStream.class);
} catch (ClassNotFoundException e) {
throw new DatabaseConfigurationException("Filter not found: " + className, e);
} catch (NoSuchMethodException e) {
throw new DatabaseConfigurationException("Filter class " + className + " has non-default " +
"constructor", e);
}
}
}
private List<FilterConfig> filterConfigs = new ArrayList<>();
private Collator collator = null;
public RangeIndexAnalyzer() {
}
public void addFilter(Element filter) throws DatabaseConfigurationException {
filterConfigs.add(new FilterConfig(filter));
}
public void addCollation(String uri) throws DatabaseConfigurationException {
try {
collator = Collations.getCollationFromURI(null, uri);
} catch (XPathException e) {
throw new DatabaseConfigurationException(e.getMessage(), e);
}
}
@Override
protected TokenStreamComponents createComponents(final String fieldName, final Reader reader) {
AttributeFactory factory = AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY;
if (collator != null) {
factory = new CollationAttributeFactory(collator);
}
Tokenizer src = new KeywordTokenizer(factory, reader, 256);
TokenStream tok = src;
for (FilterConfig filter: filterConfigs) {
try {
tok = (TokenStream) filter.constructor.newInstance(tok);
} catch (InstantiationException | IllegalAccessException | InvocationTargetException e) {
LOG.warn(e.getMessage(), e);
}
}
return new TokenStreamComponents(src, tok);
}
}