/**
* Copyright (C) 2009-2013 FoundationDB, LLC
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.foundationdb.server.service.text;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.LowerCaseFilter;
import org.apache.lucene.analysis.core.StopAnalyzer;
import org.apache.lucene.analysis.core.StopFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.apache.lucene.analysis.standard.StandardTokenizer;
import org.apache.lucene.util.Version;
import java.io.Reader;
import java.util.Set;
public class SelectiveCaseAnalyzer extends Analyzer
{
protected static final Version matchVersion = Version.LUCENE_40;
private final Set<String> casePreservingFieldNames;
static class CaseReuseStrategy extends ReuseStrategy {
private final Set<String> casePreservingFieldNames;
public CaseReuseStrategy(Set<String> casePreservingFieldNames) {
this.casePreservingFieldNames = casePreservingFieldNames;
}
@Override
public TokenStreamComponents getReusableComponents(String fieldName) {
TokenStreamComponents[] stored = (TokenStreamComponents[])getStoredValue();
if (stored == null) {
return null;
}
else if (casePreservingFieldNames.contains(fieldName)) {
return stored[0];
}
else {
return stored[1];
}
}
@Override
public void setReusableComponents(String fieldName, TokenStreamComponents components) {
TokenStreamComponents[] stored = (TokenStreamComponents[])getStoredValue();
if (stored == null) {
stored = new TokenStreamComponents[2];
setStoredValue(stored);
}
if (casePreservingFieldNames.contains(fieldName)) {
stored[0] = components;
}
else {
stored[1] = components;
}
}
}
public SelectiveCaseAnalyzer(Set<String> casePreservingFieldNames) {
super(new CaseReuseStrategy(casePreservingFieldNames));
this.casePreservingFieldNames = casePreservingFieldNames;
}
@Override
protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
final Tokenizer source = new StandardTokenizer(matchVersion, reader);
TokenStream sink = new StandardFilter(matchVersion, source);
if (!casePreservingFieldNames.contains(fieldName)) {
sink = new LowerCaseFilter(matchVersion, sink);
}
sink = new StopFilter(matchVersion, sink, StopAnalyzer.ENGLISH_STOP_WORDS_SET);
return new TokenStreamComponents(source, sink);
}
}