/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-2015 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.indexing.lucene;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.apache.commons.lang3.StringUtils;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.util.CharArraySet;
import org.apache.lucene.util.Version;
import org.exist.collections.CollectionConfiguration;
import org.exist.util.DatabaseConfigurationException;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
public class AnalyzerConfig {
/*
Supported configurations
<analyzer class="org.apache.lucene.analysis.standard.StandardAnalyzer"/>
<analyzer id="ws" class="org.apache.lucene.analysis.core.WhitespaceAnalyzer"/>
<analyzer id="stdstops" class="org.apache.lucene.analysis.standard.StandardAnalyzer">
..<param name="stopwords" type="java.io.File" value="/tmp/stop.txt"/>
</analyzer>
<analyzer id="stdstops" class="org.apache.lucene.analysis.standard.StandardAnalyzer">
..<param name="stopwords" type="java.util.Set">
....<value>the</value>
....<value>this</value>
....<value>and</value>
....<value>that</value>
..</param>
</analyzer>
<analyzer id="sbstops" class="org.apache.lucene.analysis.snowball.SnowballAnalyzer">
..<param name="name" value="English"/>
..<param name="stopwords" type="java.util.Set">
....<value>the</value>
....<value>this</value>
....<value>and</value>
....<value>that</value>
..</param>
</analyzer>
*/
private static final Logger LOG = LogManager.getLogger(AnalyzerConfig.class);
private static final String ID_ATTRIBUTE = "id";
private static final String NAME_ATTRIBUTE = "name";
private static final String TYPE_ATTRIBUTE = "type";
private static final String CLASS_ATTRIBUTE = "class";
private static final String PARAM_VALUE_ENTRY = "value";
private static final String PARAM_ELEMENT_NAME = "param";
private Map<String, Analyzer> analyzers = new TreeMap<>();
private Analyzer defaultAnalyzer = null;
public Analyzer getAnalyzerById(String id) {
return analyzers.get(id);
}
public Analyzer getDefaultAnalyzer() {
return defaultAnalyzer;
}
/**
* Parse <analyzer/> element and register configured analyzer.
*
* @param config The analyzer element from .xconf file.
*
* @throws DatabaseConfigurationException Something unexpected happened.
*/
public void addAnalyzer(Element config) throws DatabaseConfigurationException {
// Configure lucene analuzer with configuration
final Analyzer analyzer = configureAnalyzer(config);
if (analyzer == null) {
return;
}
// Get (optional) id-attribute of analyzer
final String id = config.getAttribute(ID_ATTRIBUTE);
// If no ID is provided, register as default analyzer
// else register analyzer
if (StringUtils.isBlank(id)) {
setDefaultAnalyzer(analyzer);
} else {
analyzers.put(id, analyzer);
}
}
/**
* Set default the analyzer.
*
* @param analyzer Lucene analyzer
*/
public void setDefaultAnalyzer(Analyzer analyzer) {
defaultAnalyzer = analyzer;
}
/**
* Parse <analyzer/> element from xconf and initialize an analyzer with the
* parameters.
*
* @param config The analyzer element
* @return Initialized Analyzer object
*
* @throws DatabaseConfigurationException Something unexpected happened.
*/
protected static Analyzer configureAnalyzer(Element config) throws DatabaseConfigurationException {
// Get classname from attribute
final String className = config.getAttribute(CLASS_ATTRIBUTE);
Analyzer newAnalyzer = null;
if (StringUtils.isBlank(className)) {
// No classname is defined.
LOG.error("Missing class attribute or attribute is empty.");
// DW: throw exception?
} else {
// Classname is defined.
// Probe class
Class<?> clazz = null;
try {
clazz = Class.forName(className);
} catch (ClassNotFoundException e) {
LOG.error(String.format("Lucene index: analyzer class %s not found. (%s)", className, e.getMessage()));
return null;
}
// CHeck if class is an Analyzer
if (!Analyzer.class.isAssignableFrom(clazz)) {
LOG.error(String.format("Lucene index: analyzer class has to be a subclass of %s", Analyzer.class.getName()));
return null;
}
// Get list of parameters
List<KeyTypedValue> cParams;
try {
cParams = getAllConstructorParameters(config);
} catch (ParameterException pe) {
// Unable to parse parameters.
LOG.error(String.format("Unable to get parameters for %s: %s", className, pe.getMessage()), pe);
cParams = new ArrayList<>();
}
// Iterate over all parameters, convert data to two arrays
// that can be used in the reflection code
final Class<?> cParamClasses[] = new Class<?>[cParams.size()];
final Object cParamValues[] = new Object[cParams.size()];
for (int i = 0; i < cParams.size(); i++) {
KeyTypedValue ktv = cParams.get(i);
cParamClasses[i] = ktv.getValueClass();
cParamValues[i] = ktv.getValue();
}
// Create new analyzer
if (cParamClasses.length > 0 && cParamClasses[0] == Version.class) {
if (LOG.isDebugEnabled()) {
Version version = (Version) cParamValues[0];
LOG.debug(String.format("An explicit Version %s of lucene has been specified.", version.toString()));
}
// A lucene Version object has been provided, so it shall be used
newAnalyzer = createInstance(clazz, cParamClasses, cParamValues);
} else {
// Either no parameters have been provided or more than one parameter
// Extend arrays with (default) Version object info, add to front.
Class<?>[] vcParamClasses = addVersionToClasses(cParamClasses);
Object[] vcParamValues = addVersionValueToValues(cParamValues);
// Finally create Analyzer
newAnalyzer = createInstance(clazz, vcParamClasses, vcParamValues);
// Fallback scenario: a special (not standard type of) Analyzer has been specified without
// a 'Version' argument on purpose. For this (try) to create the Analyzer with
// the original parameters.
if (newAnalyzer == null) {
newAnalyzer = createInstance(clazz, cParamClasses, cParamValues);
}
}
}
if (newAnalyzer == null) {
LOG.error(String.format("Unable to create analyzer '%s'", className));
}
return newAnalyzer;
}
/**
* Create instance of the lucene analyzer with provided arguments
*
* @param clazz The analyzer class
* @param vcParamClasses The parameter classes
* @param vcParamValues The parameter values
* @return The lucene analyzer
*/
private static Analyzer createInstance(Class<?> clazz, Class<?>[] vcParamClasses, Object[] vcParamValues) {
String className = clazz.getName();
try {
final Constructor<?> cstr = clazz.getDeclaredConstructor(vcParamClasses);
cstr.setAccessible(true);
if (LOG.isDebugEnabled()) {
LOG.debug(String.format("Using analyzer %s", className));
}
return (Analyzer) cstr.newInstance(vcParamValues);
} catch (IllegalArgumentException | IllegalAccessException | InstantiationException | InvocationTargetException | SecurityException e) {
LOG.error(String.format("Exception while instantiating analyzer class %s: %s", className, e.getMessage()), e);
} catch (NoSuchMethodException ex) {
LOG.error(String.format("Could not find matching analyzer class constructor%s: %s", className, ex.getMessage()), ex);
}
return null;
}
/**
* Extend list of values, add version-value to front
*/
private static Object[] addVersionValueToValues(final Object[] cParamValues) {
final Object vcParamValues[] = new Object[cParamValues.length + 1];
vcParamValues[0] = LuceneIndex.LUCENE_VERSION_IN_USE;
System.arraycopy(cParamValues, 0, vcParamValues, 1, cParamValues.length);
return vcParamValues;
}
/**
* Extend list of classes, add version-class to front
*/
private static Class<?>[] addVersionToClasses(final Class<?>[] cParamClasses) {
final Class<?> vcParamClasses[] = new Class<?>[cParamClasses.length + 1];
vcParamClasses[0] = Version.class;
System.arraycopy(cParamClasses, 0, vcParamClasses, 1, cParamClasses.length);
return vcParamClasses;
}
/**
* Retrieve parameter info from all <param/> elements.
*
* @param config The <analyzer/> element from the provided configuration
* @return List of triples key-value-valueType
* @throws org.exist.indexing.lucene.AnalyzerConfig.ParameterException
*/
private static List<KeyTypedValue> getAllConstructorParameters(Element config) throws ParameterException {
final List<KeyTypedValue> parameters = new ArrayList<>();
final NodeList params = config.getElementsByTagNameNS(CollectionConfiguration.NAMESPACE, PARAM_ELEMENT_NAME);
// iterate over all <param/> elements
for (int i = 0; i < params.getLength(); i++) {
parameters.add(getConstructorParameter((Element) params.item(i)));
}
return parameters;
}
/**
* Retrieve configuration information from one <param/> element. Type
* information is used to construct actual data containing objects.
*
* @param param Element that represents <param/>
* @return Triple key-value-value-type
* @throws org.exist.indexing.lucene.AnalyzerConfig.ParameterException
*/
private static KeyTypedValue getConstructorParameter(Element param) throws ParameterException {
// Get attributes
final NamedNodeMap attrs = param.getAttributes();
// Get name of parameter, NULL when no value is present
Node namedItem = attrs.getNamedItem(NAME_ATTRIBUTE);
final String name = (namedItem == null) ? null : namedItem.getNodeValue();
// Get value type information of parameter, NULL when not available
namedItem = attrs.getNamedItem(TYPE_ATTRIBUTE);
final String type = (namedItem == null) ? null : namedItem.getNodeValue();
// Get actual value from attribute, or NULL when not available.
namedItem = attrs.getNamedItem(PARAM_VALUE_ENTRY);
final String value = (namedItem == null) ? null : namedItem.getNodeValue();
// Place holder return value
KeyTypedValue parameter = null;
if (StringUtils.isBlank(type) || "java.lang.String".equals(type)) {
// String or no type is provided, assume string.
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain String value.");
}
parameter = new KeyTypedValue(name, value);
} else {
switch (type) {
case "java.lang.reflect.Field":
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain a full classname.");
}
// Use reflection
// - retrieve classname from the value field
// - retrieve fieldname from the value field
final String clazzName = value.substring(0, value.lastIndexOf('.'));
final String fieldName = value.substring(value.lastIndexOf('.') + 1);
try {
// Retrieve value from Field
final Class<?> fieldClazz = Class.forName(clazzName);
final Field field = fieldClazz.getField(fieldName);
field.setAccessible(true);
final Object fValue = field.get(fieldClazz.newInstance());
parameter = new KeyTypedValue(name, fValue);
} catch (NoSuchFieldException | ClassNotFoundException | InstantiationException | IllegalAccessException nsfe) {
throw new ParameterException(nsfe.getMessage(), nsfe);
}
break;
case "java.io.File": {
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain a file name.");
}
LOG.info(String.format("Type '%s' has been deprecated in recent Lucene versions, "
+ "please use 'java.io.FileReader' (short 'file') instead.", type));
parameter = new KeyTypedValue(name, new java.io.File(value), java.io.File.class);
break;
}
case "java.io.FileReader":
case "file": {
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain a file name.");
}
try {
// ToDo: check where to close reade to prevent resource leakage
Reader fileReader = new java.io.FileReader(value);
parameter = new KeyTypedValue(name, fileReader, Reader.class);
} catch (java.io.FileNotFoundException ex) {
LOG.error(String.format("File '%s' could not be found.", value), ex);
}
break;
}
case "java.util.Set": {
LOG.info(String.format("Type '%s' has been deprecated in recent Lucene versions, "
+ "please use 'org.apache.lucene.analysis.util.CharArraySet' (short 'set') instead.", type));
final Set s = getConstructorParameterSetValues(param);
parameter = new KeyTypedValue(name, s, Set.class);
break;
}
case "org.apache.lucene.analysis.util.CharArraySet":
case "set": {
// This is mandatory to use iso a normal Set since Lucene 4
final CharArraySet s = getConstructorParameterCharArraySetValues(param);
parameter = new KeyTypedValue(name, s, CharArraySet.class);
break;
}
case "java.lang.Integer":
case "int":
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain an integer value.");
}
try {
final Integer n = Integer.parseInt(value);
parameter = new KeyTypedValue(name, n);
} catch (NumberFormatException ex) {
LOG.error(String.format("Value %s could not be converted to an integer. %s", value, ex.getMessage()));
}
break;
case "java.lang.Boolean":
case "boolean":
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain a boolean value.");
}
final boolean b = Boolean.parseBoolean(value);
parameter = new KeyTypedValue(name, b);
break;
default:
// FallBack there was no match
if (value == null) {
throw new ParameterException("The 'value' attribute must exist and must contain a value.");
}
try {
//if the type is an Enum then use valueOf()
final Class clazz = Class.forName(type);
if (clazz.isEnum()) {
parameter = new KeyTypedValue(name, Enum.valueOf(clazz, value), clazz);
} else {
//default, assume java.lang.String
parameter = new KeyTypedValue(name, value);
}
} catch (ClassNotFoundException cnfe) {
throw new ParameterException(String.format("Class for type: %s not found. %s", type, cnfe.getMessage()), cnfe);
}
break;
}
}
return parameter;
}
/**
* Get parameter configuration data as standard Java (Hash)Set.
*
* @param param The parameter-configuration element.
* @return Set of parameter values
*/
private static Set<String> getConstructorParameterSetValues(Element param) {
final Set<String> set = new HashSet<>();
final NodeList values = param.getElementsByTagNameNS(CollectionConfiguration.NAMESPACE, PARAM_VALUE_ENTRY);
for (int i = 0; i < values.getLength(); i++) {
final Element value = (Element) values.item(i);
//TODO getNodeValue() on org.exist.dom.persistent.ElementImpl should return null according to W3C spec!
if (value instanceof org.exist.dom.persistent.ElementImpl) {
set.add(value.getNodeValue());
} else {
set.add(value.getTextContent());
}
}
return set;
}
/**
* Get parameter configuration data as a Lucene CharArraySet.
*
* @param param The parameter-configuration element.
* @return Parameter data as Lucene CharArraySet
*/
private static CharArraySet getConstructorParameterCharArraySetValues(Element param) {
final Set<String> set = getConstructorParameterSetValues(param);
return CharArraySet.copy(LuceneIndex.LUCENE_VERSION_IN_USE, set);
}
/**
* CLass for containing the Triple : key (name), corresponding value and
* class type of value.
*/
private static class KeyTypedValue {
private final String key;
private final Object value;
private final Class<?> valueClass;
public KeyTypedValue(String key, Object value) {
this(key, value, value.getClass());
}
public KeyTypedValue(String key, Object value, Class<?> valueClass) {
this.key = key;
this.value = value;
this.valueClass = valueClass;
}
public String getKey() {
return key;
}
public Object getValue() {
return value;
}
public Class<?> getValueClass() {
return valueClass;
}
}
/**
* Exception class to for reporting problems with the parameters.
*/
private static class ParameterException extends Exception {
private static final long serialVersionUID = -4823392401966008877L;
public ParameterException(String message) {
super(message);
}
public ParameterException(String message, Throwable cause) {
super(message, cause);
}
}
}