/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.core.query.lucene;
import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.util.Collections;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* This is the global jackrabbit lucene analyzer. By default, all
* properties are indexed with the <code>StandardAnalyzer(new String[]{})</code>,
* unless in the <SearchIndex> configuration a global analyzer is defined.
*
* In the indexing configuration, properties can be configured to be
* indexed with a specific analyzer. If configured, this analyzer is used to
* index the text of the property and to parse searchtext for this property.
*/
public class JackrabbitAnalyzer extends Analyzer {
private static Logger log =
LoggerFactory.getLogger(JackrabbitAnalyzer.class);
private static final Analyzer DEFAULT_ANALYZER = new ClassicAnalyzer(
Version.LUCENE_36, Collections.emptySet());
/**
* Returns a new instance of the named Lucene {@link Analyzer} class,
* or the default analyzer if the given class can not be instantiated.
*
* @param className name of the analyzer class
* @return new analyzer instance, or the default analyzer
*/
static Analyzer getAnalyzerInstance(String className) {
Class<?> analyzerClass;
try {
analyzerClass = Class.forName(className);
} catch (ClassNotFoundException e) {
log.warn(className + " could not be found", e);
return DEFAULT_ANALYZER;
}
if (!Analyzer.class.isAssignableFrom(analyzerClass)) {
log.warn(className + " is not a Lucene Analyzer");
return DEFAULT_ANALYZER;
} else if (JackrabbitAnalyzer.class.isAssignableFrom(analyzerClass)) {
log.warn(className + " can not be used as a JackrabbitAnalyzer component");
return DEFAULT_ANALYZER;
}
Exception cause = null;
Constructor<?>[] constructors = analyzerClass.getConstructors();
for (Constructor<?> constructor : constructors) {
Class<?>[] types = constructor.getParameterTypes();
if (types.length == 1 && types[0] == Version.class) {
try {
return (Analyzer) constructor.newInstance(Version.LUCENE_36);
} catch (Exception e) {
cause = e;
}
}
}
for (Constructor<?> constructor : constructors) {
if (constructor.getParameterTypes().length == 0) {
try {
return (Analyzer) constructor.newInstance();
} catch (Exception e) {
cause = e;
}
}
}
log.warn(className + " could not be instantiated", cause);
return DEFAULT_ANALYZER;
}
/**
* The default Jackrabbit analyzer if none is configured in
* <code><SearchIndex></code> configuration.
*/
private Analyzer defaultAnalyzer = DEFAULT_ANALYZER;
/**
* The indexing configuration.
*/
private IndexingConfiguration indexingConfig;
/**
* A param indexingConfig the indexing configuration.
*/
protected void setIndexingConfig(IndexingConfiguration indexingConfig) {
this.indexingConfig = indexingConfig;
}
/**
* @param analyzer the default jackrabbit analyzer
*/
protected void setDefaultAnalyzer(Analyzer analyzer) {
defaultAnalyzer = analyzer;
}
String getDefaultAnalyzerClass() {
return defaultAnalyzer.getClass().getName();
}
void setDefaultAnalyzerClass(String className) {
setDefaultAnalyzer(getAnalyzerInstance(className));
}
/**
* Creates a TokenStream which tokenizes all the text in the provided
* Reader. If the fieldName (property) is configured to have a different
* analyzer than the default, this analyzer is used for tokenization
*/
public final TokenStream tokenStream(String fieldName, Reader reader) {
if (indexingConfig != null) {
Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName);
if (propertyAnalyzer != null) {
return propertyAnalyzer.tokenStream(fieldName, reader);
}
}
return defaultAnalyzer.tokenStream(fieldName, reader);
}
@Override
public final TokenStream reusableTokenStream(String fieldName, Reader reader)
throws IOException {
if (indexingConfig != null) {
Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName);
if (propertyAnalyzer != null) {
return propertyAnalyzer.reusableTokenStream(fieldName, reader);
}
}
return defaultAnalyzer.reusableTokenStream(fieldName, reader);
}
}