JackrabbitAnalyzer.java example

Explorer
jackrabbit-master
- jackrabbit-trunk
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.core.query.lucene;

import java.io.IOException;
import java.io.Reader;
import java.lang.reflect.Constructor;
import java.util.Collections;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.ClassicAnalyzer;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This is the global jackrabbit lucene analyzer. By default, all
 * properties are indexed with the <code>StandardAnalyzer(new String[]{})</code>,
 * unless in the <SearchIndex> configuration a global analyzer is defined.
 *
 * In the indexing configuration, properties can be configured to be
 * indexed with a specific analyzer. If configured, this analyzer is used to
 * index the text of the property and to parse searchtext for this property.
 */
public class JackrabbitAnalyzer extends Analyzer {

    private static Logger log =
            LoggerFactory.getLogger(JackrabbitAnalyzer.class);

    private static final Analyzer DEFAULT_ANALYZER = new ClassicAnalyzer(
            Version.LUCENE_36, Collections.emptySet());

    /**
     * Returns a new instance of the named Lucene {@link Analyzer} class,
     * or the default analyzer if the given class can not be instantiated.
     *
     * @param className name of the analyzer class
     * @return new analyzer instance, or the default analyzer
     */
    static Analyzer getAnalyzerInstance(String className) {
        Class<?> analyzerClass;
        try {
            analyzerClass = Class.forName(className);
        } catch (ClassNotFoundException e) {
            log.warn(className + " could not be found", e);
            return DEFAULT_ANALYZER;
        }
        if (!Analyzer.class.isAssignableFrom(analyzerClass)) {
            log.warn(className + " is not a Lucene Analyzer");
            return DEFAULT_ANALYZER;
        } else if (JackrabbitAnalyzer.class.isAssignableFrom(analyzerClass)) {
            log.warn(className + " can not be used as a JackrabbitAnalyzer component");
            return DEFAULT_ANALYZER;
        }

        Exception cause = null;
        Constructor<?>[] constructors = analyzerClass.getConstructors();
        for (Constructor<?> constructor : constructors) {
            Class<?>[] types = constructor.getParameterTypes();
            if (types.length == 1 && types[0] == Version.class) {
                try {
                    return (Analyzer) constructor.newInstance(Version.LUCENE_36);
                } catch (Exception e) {
                    cause = e;
                }
            }
        }
        for (Constructor<?> constructor : constructors) {
            if (constructor.getParameterTypes().length == 0) {
                try {
                    return (Analyzer) constructor.newInstance();
                } catch (Exception e) {
                    cause = e;
                }
            }
        }

        log.warn(className + " could not be instantiated", cause);
        return DEFAULT_ANALYZER;
    }

    /**
     * The default Jackrabbit analyzer if none is configured in
     * <code><SearchIndex></code> configuration.
     */
    private Analyzer defaultAnalyzer = DEFAULT_ANALYZER;

    /**
     * The indexing configuration.
     */
    private IndexingConfiguration indexingConfig;

    /**
     * A param indexingConfig the indexing configuration.
     */
    protected void setIndexingConfig(IndexingConfiguration indexingConfig) {
        this.indexingConfig = indexingConfig;
    }

    /**
     * @param analyzer the default jackrabbit analyzer
     */
    protected void setDefaultAnalyzer(Analyzer analyzer) {
        defaultAnalyzer = analyzer;
    }

    String getDefaultAnalyzerClass() {
        return defaultAnalyzer.getClass().getName();
    }

    void setDefaultAnalyzerClass(String className) {
        setDefaultAnalyzer(getAnalyzerInstance(className));
    }

    /**
     * Creates a TokenStream which tokenizes all the text in the provided
     * Reader. If the fieldName (property) is configured to have a different
     * analyzer than the default, this analyzer is used for tokenization
     */
    public final TokenStream tokenStream(String fieldName, Reader reader) {
        if (indexingConfig != null) {
            Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName);
            if (propertyAnalyzer != null) {
                return propertyAnalyzer.tokenStream(fieldName, reader);
            }
        }
        return defaultAnalyzer.tokenStream(fieldName, reader);
    }

    @Override
    public final TokenStream reusableTokenStream(String fieldName, Reader reader)
            throws IOException {
        if (indexingConfig != null) {
            Analyzer propertyAnalyzer = indexingConfig.getPropertyAnalyzer(fieldName);
            if (propertyAnalyzer != null) {
                return propertyAnalyzer.reusableTokenStream(fieldName, reader);
            }
        }
        return defaultAnalyzer.reusableTokenStream(fieldName, reader);
    }
}