/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.indices.analysis;
import org.apache.lucene.analysis.LowerCaseFilter;
import org.apache.lucene.analysis.standard.StandardFilter;
import org.elasticsearch.Version;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.NamedRegistry;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.AnalyzerProvider;
import org.elasticsearch.index.analysis.ApostropheFilterFactory;
import org.elasticsearch.index.analysis.ArabicAnalyzerProvider;
import org.elasticsearch.index.analysis.ArabicNormalizationFilterFactory;
import org.elasticsearch.index.analysis.ArabicStemTokenFilterFactory;
import org.elasticsearch.index.analysis.ArmenianAnalyzerProvider;
import org.elasticsearch.index.analysis.BasqueAnalyzerProvider;
import org.elasticsearch.index.analysis.BrazilianAnalyzerProvider;
import org.elasticsearch.index.analysis.BrazilianStemTokenFilterFactory;
import org.elasticsearch.index.analysis.BulgarianAnalyzerProvider;
import org.elasticsearch.index.analysis.CJKBigramFilterFactory;
import org.elasticsearch.index.analysis.CJKWidthFilterFactory;
import org.elasticsearch.index.analysis.CatalanAnalyzerProvider;
import org.elasticsearch.index.analysis.CharFilterFactory;
import org.elasticsearch.index.analysis.ChineseAnalyzerProvider;
import org.elasticsearch.index.analysis.CjkAnalyzerProvider;
import org.elasticsearch.index.analysis.ClassicFilterFactory;
import org.elasticsearch.index.analysis.ClassicTokenizerFactory;
import org.elasticsearch.index.analysis.CommonGramsTokenFilterFactory;
import org.elasticsearch.index.analysis.CzechAnalyzerProvider;
import org.elasticsearch.index.analysis.CzechStemTokenFilterFactory;
import org.elasticsearch.index.analysis.DanishAnalyzerProvider;
import org.elasticsearch.index.analysis.DecimalDigitFilterFactory;
import org.elasticsearch.index.analysis.DelimitedPayloadTokenFilterFactory;
import org.elasticsearch.index.analysis.DutchAnalyzerProvider;
import org.elasticsearch.index.analysis.DutchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenFilterFactory;
import org.elasticsearch.index.analysis.EdgeNGramTokenizerFactory;
import org.elasticsearch.index.analysis.ElisionTokenFilterFactory;
import org.elasticsearch.index.analysis.EnglishAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintAnalyzerProvider;
import org.elasticsearch.index.analysis.FingerprintTokenFilterFactory;
import org.elasticsearch.index.analysis.FinnishAnalyzerProvider;
import org.elasticsearch.index.analysis.FlattenGraphTokenFilterFactory;
import org.elasticsearch.index.analysis.FrenchAnalyzerProvider;
import org.elasticsearch.index.analysis.FrenchStemTokenFilterFactory;
import org.elasticsearch.index.analysis.GalicianAnalyzerProvider;
import org.elasticsearch.index.analysis.GermanAnalyzerProvider;
import org.elasticsearch.index.analysis.GermanNormalizationFilterFactory;
import org.elasticsearch.index.analysis.GermanStemTokenFilterFactory;
import org.elasticsearch.index.analysis.GreekAnalyzerProvider;
import org.elasticsearch.index.analysis.HindiAnalyzerProvider;
import org.elasticsearch.index.analysis.HindiNormalizationFilterFactory;
import org.elasticsearch.index.analysis.HungarianAnalyzerProvider;
import org.elasticsearch.index.analysis.HunspellTokenFilterFactory;
import org.elasticsearch.index.analysis.IndicNormalizationFilterFactory;
import org.elasticsearch.index.analysis.IndonesianAnalyzerProvider;
import org.elasticsearch.index.analysis.IrishAnalyzerProvider;
import org.elasticsearch.index.analysis.ItalianAnalyzerProvider;
import org.elasticsearch.index.analysis.KStemTokenFilterFactory;
import org.elasticsearch.index.analysis.KeepTypesFilterFactory;
import org.elasticsearch.index.analysis.KeepWordFilterFactory;
import org.elasticsearch.index.analysis.KeywordAnalyzerProvider;
import org.elasticsearch.index.analysis.KeywordMarkerTokenFilterFactory;
import org.elasticsearch.index.analysis.KeywordTokenizerFactory;
import org.elasticsearch.index.analysis.LatvianAnalyzerProvider;
import org.elasticsearch.index.analysis.LengthTokenFilterFactory;
import org.elasticsearch.index.analysis.LetterTokenizerFactory;
import org.elasticsearch.index.analysis.LimitTokenCountFilterFactory;
import org.elasticsearch.index.analysis.LithuanianAnalyzerProvider;
import org.elasticsearch.index.analysis.LowerCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.LowerCaseTokenizerFactory;
import org.elasticsearch.index.analysis.MinHashTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenFilterFactory;
import org.elasticsearch.index.analysis.NGramTokenizerFactory;
import org.elasticsearch.index.analysis.NorwegianAnalyzerProvider;
import org.elasticsearch.index.analysis.PathHierarchyTokenizerFactory;
import org.elasticsearch.index.analysis.PatternAnalyzerProvider;
import org.elasticsearch.index.analysis.PatternCaptureGroupTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternReplaceTokenFilterFactory;
import org.elasticsearch.index.analysis.PatternTokenizerFactory;
import org.elasticsearch.index.analysis.PersianAnalyzerProvider;
import org.elasticsearch.index.analysis.PersianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.PorterStemTokenFilterFactory;
import org.elasticsearch.index.analysis.PortugueseAnalyzerProvider;
import org.elasticsearch.index.analysis.PreConfiguredTokenFilter;
import org.elasticsearch.index.analysis.ReverseTokenFilterFactory;
import org.elasticsearch.index.analysis.RomanianAnalyzerProvider;
import org.elasticsearch.index.analysis.RussianAnalyzerProvider;
import org.elasticsearch.index.analysis.RussianStemTokenFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianFoldingFilterFactory;
import org.elasticsearch.index.analysis.ScandinavianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SerbianNormalizationFilterFactory;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.SimpleAnalyzerProvider;
import org.elasticsearch.index.analysis.SnowballAnalyzerProvider;
import org.elasticsearch.index.analysis.SnowballTokenFilterFactory;
import org.elasticsearch.index.analysis.SoraniAnalyzerProvider;
import org.elasticsearch.index.analysis.SoraniNormalizationFilterFactory;
import org.elasticsearch.index.analysis.SpanishAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardHtmlStripAnalyzerProvider;
import org.elasticsearch.index.analysis.StandardTokenFilterFactory;
import org.elasticsearch.index.analysis.StandardTokenizerFactory;
import org.elasticsearch.index.analysis.StemmerOverrideTokenFilterFactory;
import org.elasticsearch.index.analysis.StemmerTokenFilterFactory;
import org.elasticsearch.index.analysis.StopAnalyzerProvider;
import org.elasticsearch.index.analysis.StopTokenFilterFactory;
import org.elasticsearch.index.analysis.SwedishAnalyzerProvider;
import org.elasticsearch.index.analysis.ThaiAnalyzerProvider;
import org.elasticsearch.index.analysis.ThaiTokenizerFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.analysis.TokenizerFactory;
import org.elasticsearch.index.analysis.TrimTokenFilterFactory;
import org.elasticsearch.index.analysis.TruncateTokenFilterFactory;
import org.elasticsearch.index.analysis.TurkishAnalyzerProvider;
import org.elasticsearch.index.analysis.UAX29URLEmailTokenizerFactory;
import org.elasticsearch.index.analysis.UniqueTokenFilterFactory;
import org.elasticsearch.index.analysis.UpperCaseTokenFilterFactory;
import org.elasticsearch.index.analysis.WhitespaceAnalyzerProvider;
import org.elasticsearch.index.analysis.WhitespaceTokenizerFactory;
import org.elasticsearch.index.analysis.compound.DictionaryCompoundWordTokenFilterFactory;
import org.elasticsearch.index.analysis.compound.HyphenationCompoundWordTokenFilterFactory;
import org.elasticsearch.indices.analysis.PreBuiltCacheFactory.CachingStrategy;
import org.elasticsearch.plugins.AnalysisPlugin;
import java.io.IOException;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import static java.util.Collections.unmodifiableMap;
import static org.elasticsearch.plugins.AnalysisPlugin.requriesAnalysisSettings;
/**
* Sets up {@link AnalysisRegistry}.
*/
public final class AnalysisModule {
static {
Settings build = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).put(IndexMetaData
.SETTING_NUMBER_OF_REPLICAS, 1).put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, 1).build();
IndexMetaData metaData = IndexMetaData.builder("_na_").settings(build).build();
NA_INDEX_SETTINGS = new IndexSettings(metaData, Settings.EMPTY);
}
private static final IndexSettings NA_INDEX_SETTINGS;
private final HunspellService hunspellService;
private final AnalysisRegistry analysisRegistry;
public AnalysisModule(Environment environment, List<AnalysisPlugin> plugins) throws IOException {
NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = setupCharFilters(plugins);
NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = setupHunspellDictionaries(plugins);
hunspellService = new HunspellService(environment.settings(), environment, hunspellDictionaries.getRegistry());
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = setupTokenFilters(plugins, hunspellService);
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = setupTokenizers(plugins);
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = setupAnalyzers(plugins);
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = setupNormalizers(plugins);
Map<String, PreConfiguredTokenFilter> preConfiguredTokenFilters = setupPreConfiguredTokenFilters(plugins);
analysisRegistry = new AnalysisRegistry(environment, charFilters.getRegistry(), tokenFilters.getRegistry(), tokenizers
.getRegistry(), analyzers.getRegistry(), normalizers.getRegistry(), preConfiguredTokenFilters);
}
HunspellService getHunspellService() {
return hunspellService;
}
public AnalysisRegistry getAnalysisRegistry() {
return analysisRegistry;
}
private NamedRegistry<AnalysisProvider<CharFilterFactory>> setupCharFilters(List<AnalysisPlugin> plugins) {
NamedRegistry<AnalysisProvider<CharFilterFactory>> charFilters = new NamedRegistry<>("char_filter");
charFilters.extractAndRegister(plugins, AnalysisPlugin::getCharFilters);
return charFilters;
}
public NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> setupHunspellDictionaries(List<AnalysisPlugin> plugins) {
NamedRegistry<org.apache.lucene.analysis.hunspell.Dictionary> hunspellDictionaries = new NamedRegistry<>("dictionary");
hunspellDictionaries.extractAndRegister(plugins, AnalysisPlugin::getHunspellDictionaries);
return hunspellDictionaries;
}
private NamedRegistry<AnalysisProvider<TokenFilterFactory>> setupTokenFilters(List<AnalysisPlugin> plugins, HunspellService
hunspellService) {
NamedRegistry<AnalysisProvider<TokenFilterFactory>> tokenFilters = new NamedRegistry<>("token_filter");
tokenFilters.register("stop", StopTokenFilterFactory::new);
tokenFilters.register("reverse", ReverseTokenFilterFactory::new);
tokenFilters.register("length", LengthTokenFilterFactory::new);
tokenFilters.register("lowercase", LowerCaseTokenFilterFactory::new);
tokenFilters.register("uppercase", UpperCaseTokenFilterFactory::new);
tokenFilters.register("porter_stem", PorterStemTokenFilterFactory::new);
tokenFilters.register("kstem", KStemTokenFilterFactory::new);
tokenFilters.register("standard", StandardTokenFilterFactory::new);
tokenFilters.register("nGram", NGramTokenFilterFactory::new);
tokenFilters.register("ngram", NGramTokenFilterFactory::new);
tokenFilters.register("edgeNGram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("edge_ngram", EdgeNGramTokenFilterFactory::new);
tokenFilters.register("shingle", ShingleTokenFilterFactory::new);
tokenFilters.register("min_hash", MinHashTokenFilterFactory::new);
tokenFilters.register("unique", UniqueTokenFilterFactory::new);
tokenFilters.register("truncate", requriesAnalysisSettings(TruncateTokenFilterFactory::new));
tokenFilters.register("trim", TrimTokenFilterFactory::new);
tokenFilters.register("limit", LimitTokenCountFilterFactory::new);
tokenFilters.register("common_grams", requriesAnalysisSettings(CommonGramsTokenFilterFactory::new));
tokenFilters.register("snowball", SnowballTokenFilterFactory::new);
tokenFilters.register("stemmer", StemmerTokenFilterFactory::new);
tokenFilters.register("delimited_payload_filter", DelimitedPayloadTokenFilterFactory::new);
tokenFilters.register("elision", ElisionTokenFilterFactory::new);
tokenFilters.register("flatten_graph", FlattenGraphTokenFilterFactory::new);
tokenFilters.register("keep", requriesAnalysisSettings(KeepWordFilterFactory::new));
tokenFilters.register("keep_types", requriesAnalysisSettings(KeepTypesFilterFactory::new));
tokenFilters.register("pattern_capture", requriesAnalysisSettings(PatternCaptureGroupTokenFilterFactory::new));
tokenFilters.register("pattern_replace", requriesAnalysisSettings(PatternReplaceTokenFilterFactory::new));
tokenFilters.register("dictionary_decompounder", requriesAnalysisSettings(DictionaryCompoundWordTokenFilterFactory::new));
tokenFilters.register("hyphenation_decompounder", requriesAnalysisSettings(HyphenationCompoundWordTokenFilterFactory::new));
tokenFilters.register("arabic_stem", ArabicStemTokenFilterFactory::new);
tokenFilters.register("brazilian_stem", BrazilianStemTokenFilterFactory::new);
tokenFilters.register("czech_stem", CzechStemTokenFilterFactory::new);
tokenFilters.register("dutch_stem", DutchStemTokenFilterFactory::new);
tokenFilters.register("french_stem", FrenchStemTokenFilterFactory::new);
tokenFilters.register("german_stem", GermanStemTokenFilterFactory::new);
tokenFilters.register("russian_stem", RussianStemTokenFilterFactory::new);
tokenFilters.register("keyword_marker", requriesAnalysisSettings(KeywordMarkerTokenFilterFactory::new));
tokenFilters.register("stemmer_override", requriesAnalysisSettings(StemmerOverrideTokenFilterFactory::new));
tokenFilters.register("arabic_normalization", ArabicNormalizationFilterFactory::new);
tokenFilters.register("german_normalization", GermanNormalizationFilterFactory::new);
tokenFilters.register("hindi_normalization", HindiNormalizationFilterFactory::new);
tokenFilters.register("indic_normalization", IndicNormalizationFilterFactory::new);
tokenFilters.register("sorani_normalization", SoraniNormalizationFilterFactory::new);
tokenFilters.register("persian_normalization", PersianNormalizationFilterFactory::new);
tokenFilters.register("scandinavian_normalization", ScandinavianNormalizationFilterFactory::new);
tokenFilters.register("scandinavian_folding", ScandinavianFoldingFilterFactory::new);
tokenFilters.register("serbian_normalization", SerbianNormalizationFilterFactory::new);
tokenFilters.register("hunspell", requriesAnalysisSettings((indexSettings, env, name, settings) -> new HunspellTokenFilterFactory
(indexSettings, name, settings, hunspellService)));
tokenFilters.register("cjk_bigram", CJKBigramFilterFactory::new);
tokenFilters.register("cjk_width", CJKWidthFilterFactory::new);
tokenFilters.register("apostrophe", ApostropheFilterFactory::new);
tokenFilters.register("classic", ClassicFilterFactory::new);
tokenFilters.register("decimal_digit", DecimalDigitFilterFactory::new);
tokenFilters.register("fingerprint", FingerprintTokenFilterFactory::new);
tokenFilters.extractAndRegister(plugins, AnalysisPlugin::getTokenFilters);
return tokenFilters;
}
static Map<String, PreConfiguredTokenFilter> setupPreConfiguredTokenFilters(List<AnalysisPlugin> plugins) {
NamedRegistry<PreConfiguredTokenFilter> preConfiguredTokenFilters = new NamedRegistry<>("pre-configured token_filter");
// Add filters available in lucene-core
preConfiguredTokenFilters.register("lowercase", PreConfiguredTokenFilter.singleton("lowercase", true, LowerCaseFilter::new));
preConfiguredTokenFilters.register("standard", PreConfiguredTokenFilter.singleton("standard", false, StandardFilter::new));
/* Note that "stop" is available in lucene-core but it's pre-built
* version uses a set of English stop words that are in
* lucene-analyzers-common so "stop" is defined in the analysis-common
* module. */
// Add token filters declared in PreBuiltTokenFilters until they have all been migrated
for (PreBuiltTokenFilters preBuilt : PreBuiltTokenFilters.values()) {
switch (preBuilt) {
case LOWERCASE:
// This has been migrated but has to stick around until PreBuiltTokenizers is removed.
continue;
default:
if (CachingStrategy.ONE != preBuilt.getCachingStrategy()) {
throw new UnsupportedOperationException("shim not available for " + preBuilt.getCachingStrategy());
}
String name = preBuilt.name().toLowerCase(Locale.ROOT);
preConfiguredTokenFilters.register(name, PreConfiguredTokenFilter.singleton(name, preBuilt.isMultiTermAware(),
tokenStream -> preBuilt.create(tokenStream, Version.CURRENT)));
}
}
for (AnalysisPlugin plugin: plugins) {
for (PreConfiguredTokenFilter filter : plugin.getPreConfiguredTokenFilters()) {
preConfiguredTokenFilters.register(filter.getName(), filter);
}
}
return unmodifiableMap(preConfiguredTokenFilters.getRegistry());
}
private NamedRegistry<AnalysisProvider<TokenizerFactory>> setupTokenizers(List<AnalysisPlugin> plugins) {
NamedRegistry<AnalysisProvider<TokenizerFactory>> tokenizers = new NamedRegistry<>("tokenizer");
tokenizers.register("standard", StandardTokenizerFactory::new);
tokenizers.register("uax_url_email", UAX29URLEmailTokenizerFactory::new);
tokenizers.register("path_hierarchy", PathHierarchyTokenizerFactory::new);
tokenizers.register("PathHierarchy", PathHierarchyTokenizerFactory::new);
tokenizers.register("keyword", KeywordTokenizerFactory::new);
tokenizers.register("letter", LetterTokenizerFactory::new);
tokenizers.register("lowercase", LowerCaseTokenizerFactory::new);
tokenizers.register("whitespace", WhitespaceTokenizerFactory::new);
tokenizers.register("nGram", NGramTokenizerFactory::new);
tokenizers.register("ngram", NGramTokenizerFactory::new);
tokenizers.register("edgeNGram", EdgeNGramTokenizerFactory::new);
tokenizers.register("edge_ngram", EdgeNGramTokenizerFactory::new);
tokenizers.register("pattern", PatternTokenizerFactory::new);
tokenizers.register("classic", ClassicTokenizerFactory::new);
tokenizers.register("thai", ThaiTokenizerFactory::new);
tokenizers.extractAndRegister(plugins, AnalysisPlugin::getTokenizers);
return tokenizers;
}
private NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> setupAnalyzers(List<AnalysisPlugin> plugins) {
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> analyzers = new NamedRegistry<>("analyzer");
analyzers.register("default", StandardAnalyzerProvider::new);
analyzers.register("standard", StandardAnalyzerProvider::new);
analyzers.register("standard_html_strip", StandardHtmlStripAnalyzerProvider::new);
analyzers.register("simple", SimpleAnalyzerProvider::new);
analyzers.register("stop", StopAnalyzerProvider::new);
analyzers.register("whitespace", WhitespaceAnalyzerProvider::new);
analyzers.register("keyword", KeywordAnalyzerProvider::new);
analyzers.register("pattern", PatternAnalyzerProvider::new);
analyzers.register("snowball", SnowballAnalyzerProvider::new);
analyzers.register("arabic", ArabicAnalyzerProvider::new);
analyzers.register("armenian", ArmenianAnalyzerProvider::new);
analyzers.register("basque", BasqueAnalyzerProvider::new);
analyzers.register("brazilian", BrazilianAnalyzerProvider::new);
analyzers.register("bulgarian", BulgarianAnalyzerProvider::new);
analyzers.register("catalan", CatalanAnalyzerProvider::new);
analyzers.register("chinese", ChineseAnalyzerProvider::new);
analyzers.register("cjk", CjkAnalyzerProvider::new);
analyzers.register("czech", CzechAnalyzerProvider::new);
analyzers.register("danish", DanishAnalyzerProvider::new);
analyzers.register("dutch", DutchAnalyzerProvider::new);
analyzers.register("english", EnglishAnalyzerProvider::new);
analyzers.register("finnish", FinnishAnalyzerProvider::new);
analyzers.register("french", FrenchAnalyzerProvider::new);
analyzers.register("galician", GalicianAnalyzerProvider::new);
analyzers.register("german", GermanAnalyzerProvider::new);
analyzers.register("greek", GreekAnalyzerProvider::new);
analyzers.register("hindi", HindiAnalyzerProvider::new);
analyzers.register("hungarian", HungarianAnalyzerProvider::new);
analyzers.register("indonesian", IndonesianAnalyzerProvider::new);
analyzers.register("irish", IrishAnalyzerProvider::new);
analyzers.register("italian", ItalianAnalyzerProvider::new);
analyzers.register("latvian", LatvianAnalyzerProvider::new);
analyzers.register("lithuanian", LithuanianAnalyzerProvider::new);
analyzers.register("norwegian", NorwegianAnalyzerProvider::new);
analyzers.register("persian", PersianAnalyzerProvider::new);
analyzers.register("portuguese", PortugueseAnalyzerProvider::new);
analyzers.register("romanian", RomanianAnalyzerProvider::new);
analyzers.register("russian", RussianAnalyzerProvider::new);
analyzers.register("sorani", SoraniAnalyzerProvider::new);
analyzers.register("spanish", SpanishAnalyzerProvider::new);
analyzers.register("swedish", SwedishAnalyzerProvider::new);
analyzers.register("turkish", TurkishAnalyzerProvider::new);
analyzers.register("thai", ThaiAnalyzerProvider::new);
analyzers.register("fingerprint", FingerprintAnalyzerProvider::new);
analyzers.extractAndRegister(plugins, AnalysisPlugin::getAnalyzers);
return analyzers;
}
private NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> setupNormalizers(List<AnalysisPlugin> plugins) {
NamedRegistry<AnalysisProvider<AnalyzerProvider<?>>> normalizers = new NamedRegistry<>("normalizer");
// TODO: provide built-in normalizer providers?
// TODO: pluggability?
return normalizers;
}
/**
* The basic factory interface for analysis components.
*/
public interface AnalysisProvider<T> {
/**
* Creates a new analysis provider.
*
* @param indexSettings the index settings for the index this provider is created for
* @param environment the nodes environment to load resources from persistent storage
* @param name the name of the analysis component
* @param settings the component specific settings without context prefixes
* @return a new provider instance
* @throws IOException if an {@link IOException} occurs
*/
T get(IndexSettings indexSettings, Environment environment, String name, Settings settings) throws IOException;
/**
* Creates a new global scope analysis provider without index specific settings not settings for the provider itself.
* This can be used to get a default instance of an analysis factory without binding to an index.
*
* @param environment the nodes environment to load resources from persistent storage
* @param name the name of the analysis component
* @return a new provider instance
* @throws IOException if an {@link IOException} occurs
* @throws IllegalArgumentException if the provider requires analysis settings ie. if {@link #requiresAnalysisSettings()} returns
* <code>true</code>
*/
default T get(Environment environment, String name) throws IOException {
if (requiresAnalysisSettings()) {
throw new IllegalArgumentException("Analysis settings required - can't instantiate analysis factory");
}
return get(NA_INDEX_SETTINGS, environment, name, NA_INDEX_SETTINGS.getSettings());
}
/**
* If <code>true</code> the analysis component created by this provider requires certain settings to be instantiated.
* it can't be created with defaults. The default is <code>false</code>.
*/
default boolean requiresAnalysisSettings() {
return false;
}
}
}