/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.util;
import java.io.IOException;
import java.io.Reader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
/**
* An convenience subclass of Analyzer that makes it easy to implement
* {@link TokenStream} reuse.
* <p>
* ReusableAnalyzerBase is a simplification of Analyzer that supports easy reuse
* for the most common use-cases. Analyzers such as
* {@link PerFieldAnalyzerWrapper} that behave differently depending upon the
* field name need to subclass Analyzer directly instead.
* </p>
* <p>
* To prevent consistency problems, this class does not allow subclasses to
* extend {@link #reusableTokenStream(String, Reader)} or
* {@link #tokenStream(String, Reader)} directly. Instead, subclasses must
* implement {@link #createComponents(String, Reader)}.
* </p>
*/
public abstract class ReusableAnalyzerBase extends Analyzer {
/**
* Creates a new {@link TokenStreamComponents} instance for this analyzer.
*
* @param fieldName
* the name of the fields content passed to the
* {@link TokenStreamComponents} sink as a reader
* @param aReader
* the reader passed to the {@link Tokenizer} constructor
* @return the {@link TokenStreamComponents} for this analyzer.
*/
protected abstract TokenStreamComponents createComponents(String fieldName,
Reader aReader);
/**
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents}. It returns the sink of the
* components and stores the components internally. Subsequent calls to this
* method will reuse the previously stored components if and only if the
* {@link TokenStreamComponents#reset(Reader)} method returned
* <code>true</code>. Otherwise a new instance of
* {@link TokenStreamComponents} is created.
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
*/
@Override
public final TokenStream reusableTokenStream(final String fieldName,
final Reader reader) throws IOException {
TokenStreamComponents streamChain = (TokenStreamComponents)
getPreviousTokenStream();
if (streamChain == null || !streamChain.reset(reader)) {
streamChain = createComponents(fieldName, reader);
setPreviousTokenStream(streamChain);
}
return streamChain.getTokenStream();
}
/**
* This method uses {@link #createComponents(String, Reader)} to obtain an
* instance of {@link TokenStreamComponents} and returns the sink of the
* components. Each calls to this method will create a new instance of
* {@link TokenStreamComponents}. Created {@link TokenStream} instances are
* never reused.
*
* @param fieldName the name of the field the created TokenStream is used for
* @param reader the reader the streams source reads from
*/
@Override
public final TokenStream tokenStream(final String fieldName,
final Reader reader) {
return createComponents(fieldName, reader).getTokenStream();
}
/**
* This class encapsulates the outer components of a token stream. It provides
* access to the source ({@link Tokenizer}) and the outer end (sink), an
* instance of {@link TokenFilter} which also serves as the
* {@link TokenStream} returned by
* {@link Analyzer#tokenStream(String, Reader)} and
* {@link Analyzer#reusableTokenStream(String, Reader)}.
*/
public static class TokenStreamComponents {
protected final Tokenizer source;
protected final TokenStream sink;
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
* @param result
* the analyzer's resulting token stream
*/
public TokenStreamComponents(final Tokenizer source,
final TokenStream result) {
this.source = source;
this.sink = result;
}
/**
* Creates a new {@link TokenStreamComponents} instance.
*
* @param source
* the analyzer's tokenizer
*/
public TokenStreamComponents(final Tokenizer source) {
this.source = source;
this.sink = source;
}
/**
* Resets the encapsulated components with the given reader. This method by
* default returns <code>true</code> indicating that the components have
* been reset successfully. Subclasses of {@link ReusableAnalyzerBase} might use
* their own {@link TokenStreamComponents} returning <code>false</code> if
* the components cannot be reset.
*
* @param reader
* a reader to reset the source component
* @return <code>true</code> if the components were reset, otherwise
* <code>false</code>
* @throws IOException
* if the component's reset method throws an {@link IOException}
*/
protected boolean reset(final Reader reader) throws IOException {
source.reset(reader);
if(sink != source)
sink.reset(); // only reset if the sink reference is different from source
return true;
}
/**
* Returns the sink {@link TokenStream}
*
* @return the sink {@link TokenStream}
*/
protected TokenStream getTokenStream() {
return sink;
}
}
}