package org.xbib.elasticsearch.index.analysis.sortform;
import com.ibm.icu.text.Collator;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.core.KeywordTokenizer;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenizerFactory;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.CustomAnalyzer;
import org.elasticsearch.index.analysis.CustomAnalyzerProvider;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.xbib.elasticsearch.index.analysis.icu.IcuCollationKeyAnalyzerProvider;
import org.xbib.elasticsearch.index.analysis.icu.IcuCollationAttributeFactory;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
/**
*/
public class SortformAnalyzerProvider extends CustomAnalyzerProvider {
private final Settings analyzerSettings;
private final TokenizerFactory tokenizerFactory;
private CustomAnalyzer customAnalyzer;
public SortformAnalyzerProvider(IndexSettings indexSettings, Environment environment, String name,
Settings settings) {
super(indexSettings, name, settings);
this.tokenizerFactory = new SortformTokenizerFactory(indexSettings, name, settings);
this.analyzerSettings = settings;
}
@Override
public void build(final Map<String, TokenizerFactory> tokenizers,
final Map<String, CharFilterFactory> charFilters,
final Map<String, TokenFilterFactory> tokenFilters) {
List<CharFilterFactory> myCharFilters = new ArrayList<>();
String[] charFilterNames = analyzerSettings.getAsArray("char_filter");
for (String charFilterName : charFilterNames) {
CharFilterFactory charFilter = charFilters.get(charFilterName);
if (charFilter == null) {
throw new IllegalArgumentException("Sortform Analyzer [" + name() + "] failed to find char_filter under name [" + charFilterName + "]");
}
myCharFilters.add(charFilter);
}
List<TokenFilterFactory> myTokenFilters = new ArrayList<>();
String[] tokenFilterNames = analyzerSettings.getAsArray("filter");
for (String tokenFilterName : tokenFilterNames) {
TokenFilterFactory tokenFilter = tokenFilters.get(tokenFilterName);
if (tokenFilter == null) {
throw new IllegalArgumentException("Sortform Analyzer [" + name() + "] failed to find filter under name [" + tokenFilterName + "]");
}
myTokenFilters.add(tokenFilter);
}
int positionOffsetGap = analyzerSettings.getAsInt("position_offset_gap", 0);
int offsetGap = analyzerSettings.getAsInt("offset_gap", -1);
this.customAnalyzer = new CustomAnalyzer(tokenizerFactory,
myCharFilters.toArray(new CharFilterFactory[myCharFilters.size()]),
myTokenFilters.toArray(new TokenFilterFactory[myTokenFilters.size()]),
positionOffsetGap,
offsetGap
);
}
@Override
public CustomAnalyzer get() {
return this.customAnalyzer;
}
class SortformTokenizerFactory extends AbstractTokenizerFactory {
IcuCollationAttributeFactory factory;
int bufferSize;
SortformTokenizerFactory(IndexSettings indexSettings, String name, Settings settings) {
super(indexSettings, name, settings);
Collator collator = IcuCollationKeyAnalyzerProvider.createCollator(settings);
factory = new IcuCollationAttributeFactory(collator);
bufferSize = settings.getAsInt("bufferSize", 256);
}
@Override
public Tokenizer create() {
return new KeywordTokenizer(factory, bufferSize);
}
}
}