/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.analysis; import com.google.common.collect.ImmutableMap; import org.apache.lucene.analysis.Analyzer; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.logging.DeprecationLogger; import org.elasticsearch.common.logging.Loggers; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.index.AbstractIndexComponent; import org.elasticsearch.index.Index; import org.elasticsearch.index.mapper.core.StringFieldMapper; import org.elasticsearch.index.settings.IndexSettingsService; import org.elasticsearch.indices.analysis.IndicesAnalysisService; import java.io.Closeable; import java.util.Map; import static com.google.common.collect.Maps.newHashMap; /** * Manages configuring and storing analyzers, tokenizers, char filters and token filters. */ public class AnalysisService extends AbstractIndexComponent implements Closeable { private static final DeprecationLogger DEPRECATION_LOGGER = new DeprecationLogger(Loggers.getLogger(AnalysisService.class)); private final ImmutableMap<String, NamedAnalyzer> analyzers; private final ImmutableMap<String, TokenizerFactory> tokenizers; private final ImmutableMap<String, CharFilterFactory> charFilters; private final ImmutableMap<String, TokenFilterFactory> tokenFilters; private final NamedAnalyzer defaultIndexAnalyzer; private final NamedAnalyzer defaultSearchAnalyzer; private final NamedAnalyzer defaultSearchQuoteAnalyzer; public AnalysisService(Index index, Settings indexSettings) { this(index, indexSettings, null, null, null, null, null); } @Inject public AnalysisService(Index index, IndexSettingsService indexSettingsService, @Nullable IndicesAnalysisService indicesAnalysisService, @Nullable Map<String, AnalyzerProviderFactory> analyzerFactoryFactories, @Nullable Map<String, TokenizerFactoryFactory> tokenizerFactoryFactories, @Nullable Map<String, CharFilterFactoryFactory> charFilterFactoryFactories, @Nullable Map<String, TokenFilterFactoryFactory> tokenFilterFactoryFactories) { this(index, indexSettingsService.getSettings(), indicesAnalysisService, analyzerFactoryFactories, tokenizerFactoryFactories, charFilterFactoryFactories, tokenFilterFactoryFactories); } //package private for testing AnalysisService(Index index, Settings indexSettings, @Nullable IndicesAnalysisService indicesAnalysisService, @Nullable Map<String, AnalyzerProviderFactory> analyzerFactoryFactories, @Nullable Map<String, TokenizerFactoryFactory> tokenizerFactoryFactories, @Nullable Map<String, CharFilterFactoryFactory> charFilterFactoryFactories, @Nullable Map<String, TokenFilterFactoryFactory> tokenFilterFactoryFactories) { super(index, indexSettings); Settings defaultSettings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, Version.indexCreated(indexSettings)).build(); Map<String, TokenizerFactory> tokenizers = newHashMap(); if (tokenizerFactoryFactories != null) { Map<String, Settings> tokenizersSettings = indexSettings.getGroups("index.analysis.tokenizer"); for (Map.Entry<String, TokenizerFactoryFactory> entry : tokenizerFactoryFactories.entrySet()) { String tokenizerName = entry.getKey(); TokenizerFactoryFactory tokenizerFactoryFactory = entry.getValue(); Settings tokenizerSettings = tokenizersSettings.get(tokenizerName); if (tokenizerSettings == null) { tokenizerSettings = defaultSettings; } TokenizerFactory tokenizerFactory = tokenizerFactoryFactory.create(tokenizerName, tokenizerSettings); tokenizers.put(tokenizerName, tokenizerFactory); } } if (indicesAnalysisService != null) { for (Map.Entry<String, PreBuiltTokenizerFactoryFactory> entry : indicesAnalysisService.tokenizerFactories().entrySet()) { String name = entry.getKey(); if (!tokenizers.containsKey(name)) { tokenizers.put(name, entry.getValue().create(name, defaultSettings)); } } } this.tokenizers = ImmutableMap.copyOf(tokenizers); Map<String, CharFilterFactory> charFilters = newHashMap(); if (charFilterFactoryFactories != null) { Map<String, Settings> charFiltersSettings = indexSettings.getGroups("index.analysis.char_filter"); for (Map.Entry<String, CharFilterFactoryFactory> entry : charFilterFactoryFactories.entrySet()) { String charFilterName = entry.getKey(); CharFilterFactoryFactory charFilterFactoryFactory = entry.getValue(); Settings charFilterSettings = charFiltersSettings.get(charFilterName); if (charFilterSettings == null) { charFilterSettings = defaultSettings; } CharFilterFactory tokenFilterFactory = charFilterFactoryFactory.create(charFilterName, charFilterSettings); charFilters.put(charFilterName, tokenFilterFactory); } } if (indicesAnalysisService != null) { for (Map.Entry<String, PreBuiltCharFilterFactoryFactory> entry : indicesAnalysisService.charFilterFactories().entrySet()) { String name = entry.getKey(); if (!charFilters.containsKey(name)) { charFilters.put(name, entry.getValue().create(name, defaultSettings)); } } } this.charFilters = ImmutableMap.copyOf(charFilters); Map<String, TokenFilterFactory> tokenFilters = newHashMap(); if (tokenFilterFactoryFactories != null) { Map<String, Settings> tokenFiltersSettings = indexSettings.getGroups("index.analysis.filter"); for (Map.Entry<String, TokenFilterFactoryFactory> entry : tokenFilterFactoryFactories.entrySet()) { String tokenFilterName = entry.getKey(); TokenFilterFactoryFactory tokenFilterFactoryFactory = entry.getValue(); Settings tokenFilterSettings = tokenFiltersSettings.get(tokenFilterName); if (tokenFilterSettings == null) { tokenFilterSettings = defaultSettings; } TokenFilterFactory tokenFilterFactory = tokenFilterFactoryFactory.create(tokenFilterName, tokenFilterSettings); tokenFilters.put(tokenFilterName, tokenFilterFactory); } } // pre initialize the globally registered ones into the map if (indicesAnalysisService != null) { for (Map.Entry<String, PreBuiltTokenFilterFactoryFactory> entry : indicesAnalysisService.tokenFilterFactories().entrySet()) { String name = entry.getKey(); if (!tokenFilters.containsKey(name)) { tokenFilters.put(name, entry.getValue().create(name, defaultSettings)); } } } this.tokenFilters = ImmutableMap.copyOf(tokenFilters); Map<String, AnalyzerProvider> analyzerProviders = newHashMap(); if (analyzerFactoryFactories != null) { Map<String, Settings> analyzersSettings = indexSettings.getGroups("index.analysis.analyzer"); for (Map.Entry<String, AnalyzerProviderFactory> entry : analyzerFactoryFactories.entrySet()) { String analyzerName = entry.getKey(); AnalyzerProviderFactory analyzerFactoryFactory = entry.getValue(); Settings analyzerSettings = analyzersSettings.get(analyzerName); if (analyzerSettings == null) { analyzerSettings = defaultSettings; } AnalyzerProvider analyzerFactory = analyzerFactoryFactory.create(analyzerName, analyzerSettings); analyzerProviders.put(analyzerName, analyzerFactory); } } if (indicesAnalysisService != null) { for (Map.Entry<String, PreBuiltAnalyzerProviderFactory> entry : indicesAnalysisService.analyzerProviderFactories().entrySet()) { String name = entry.getKey(); Version indexVersion = Version.indexCreated(indexSettings); if (!analyzerProviders.containsKey(name)) { analyzerProviders.put(name, entry.getValue().create(name, Settings.settingsBuilder().put(IndexMetaData.SETTING_VERSION_CREATED, indexVersion).build())); } } } if (!analyzerProviders.containsKey("default")) { analyzerProviders.put("default", new StandardAnalyzerProvider(index, indexSettings, null, "default", Settings.Builder.EMPTY_SETTINGS)); } if (!analyzerProviders.containsKey("default_index")) { analyzerProviders.put("default_index", analyzerProviders.get("default")); } if (!analyzerProviders.containsKey("default_search")) { analyzerProviders.put("default_search", analyzerProviders.get("default")); } if (!analyzerProviders.containsKey("default_search_quoted")) { analyzerProviders.put("default_search_quoted", analyzerProviders.get("default_search")); } Map<String, NamedAnalyzer> analyzers = newHashMap(); for (AnalyzerProvider analyzerFactory : analyzerProviders.values()) { /* * Lucene defaults positionIncrementGap to 0 in all analyzers but * Elasticsearch defaults them to 0 only before version 2.0 * and 100 afterwards so we override the positionIncrementGap if it * doesn't match here. */ int overridePositionIncrementGap = StringFieldMapper.Defaults.positionIncrementGap(Version.indexCreated(indexSettings)); if (analyzerFactory instanceof CustomAnalyzerProvider) { ((CustomAnalyzerProvider) analyzerFactory).build(this); /* * Custom analyzers already default to the correct, version * dependent positionIncrementGap and the user is be able to * configure the positionIncrementGap directly on the analyzer so * we disable overriding the positionIncrementGap to preserve the * user's setting. */ overridePositionIncrementGap = Integer.MIN_VALUE; } Analyzer analyzerF = analyzerFactory.get(); if (analyzerF == null) { throw new IllegalArgumentException("analyzer [" + analyzerFactory.name() + "] created null analyzer"); } NamedAnalyzer analyzer; if (analyzerF instanceof NamedAnalyzer) { // if we got a named analyzer back, use it... analyzer = (NamedAnalyzer) analyzerF; if (overridePositionIncrementGap >= 0 && analyzer.getPositionIncrementGap(analyzer.name()) != overridePositionIncrementGap) { // unless the positionIncrementGap needs to be overridden analyzer = new NamedAnalyzer(analyzer, overridePositionIncrementGap); } } else { analyzer = new NamedAnalyzer(analyzerFactory.name(), analyzerFactory.scope(), analyzerF, overridePositionIncrementGap); } analyzers.put(analyzerFactory.name(), analyzer); String strAliases = indexSettings.get("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); if (strAliases != null) { for (String alias : Strings.commaDelimitedListToStringArray(strAliases)) { analyzers.put(alias, analyzer); } } String[] aliases = indexSettings.getAsArray("index.analysis.analyzer." + analyzerFactory.name() + ".alias"); for (String alias : aliases) { analyzers.put(alias, analyzer); } } NamedAnalyzer defaultAnalyzer = analyzers.get("default"); if (defaultAnalyzer == null) { throw new IllegalArgumentException("no default analyzer configured"); } if (analyzers.containsKey("default_index")) { deprecationLogger.deprecated("setting [index.analysis.analyzer.default_index] is deprecated, use [index.analysis.analyzer.default] instead for index [{}]", index.getName()); } defaultIndexAnalyzer = analyzers.containsKey("default_index") ? analyzers.get("default_index") : defaultAnalyzer; defaultSearchAnalyzer = analyzers.containsKey("default_search") ? analyzers.get("default_search") : defaultAnalyzer; defaultSearchQuoteAnalyzer = analyzers.containsKey("default_search_quote") ? analyzers.get("default_search_quote") : defaultSearchAnalyzer; for (Map.Entry<String, NamedAnalyzer> analyzer : analyzers.entrySet()) { if (analyzer.getKey().startsWith("_")) { throw new IllegalArgumentException("analyzer name must not start with '_'. got \"" + analyzer.getKey() + "\""); } } this.analyzers = ImmutableMap.copyOf(analyzers); } @Override public void close() { for (NamedAnalyzer analyzer : analyzers.values()) { if (analyzer.scope() == AnalyzerScope.INDEX) { try { analyzer.close(); } catch (NullPointerException e) { // because analyzers are aliased, they might be closed several times // an NPE is thrown in this case, so ignore.... } catch (Exception e) { logger.debug("failed to close analyzer " + analyzer); } } } } public NamedAnalyzer analyzer(String name) { NamedAnalyzer analyzer = analyzers.get(name); if (analyzer != null) { return analyzer; } String underscoreName = Strings.toUnderscoreCase(name); analyzer = analyzers.get(underscoreName); if (analyzer != null) { DEPRECATION_LOGGER.deprecated("Deprecated analyzer name [" + name + "], use [" + underscoreName + "] instead"); } return analyzer; } public NamedAnalyzer defaultIndexAnalyzer() { return defaultIndexAnalyzer; } public NamedAnalyzer defaultSearchAnalyzer() { return defaultSearchAnalyzer; } public NamedAnalyzer defaultSearchQuoteAnalyzer() { return defaultSearchQuoteAnalyzer; } public TokenizerFactory tokenizer(String name) { TokenizerFactory tokenizerFactory = tokenizers.get(name); if (tokenizerFactory != null) { return tokenizerFactory; } String underscoreName = Strings.toUnderscoreCase(name); tokenizerFactory = tokenizers.get(underscoreName); if (tokenizerFactory != null) { DEPRECATION_LOGGER.deprecated("Deprecated tokenizer name [" + name + "], use [" + underscoreName + "] instead"); } return tokenizerFactory; } public CharFilterFactory charFilter(String name) { CharFilterFactory charFilterFactory = charFilters.get(name); if (charFilterFactory != null) { return charFilterFactory; } String underscoreName = Strings.toUnderscoreCase(name); charFilterFactory = charFilters.get(underscoreName); if (charFilterFactory != null) { DEPRECATION_LOGGER.deprecated("Deprecated char_filter name [" + name + "], use [" + underscoreName + "] instead"); } return charFilterFactory; } public TokenFilterFactory tokenFilter(String name) { TokenFilterFactory tokenFilterFactory = tokenFilters.get(name); if (tokenFilterFactory != null) { return tokenFilterFactory; } String underscoreName = Strings.toUnderscoreCase(name); tokenFilterFactory = tokenFilters.get(underscoreName); if (tokenFilterFactory != null) { DEPRECATION_LOGGER.deprecated("Deprecated token_filter name [" + name + "], use [" + underscoreName + "] instead"); } return tokenFilterFactory; } }