/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.analysis; import com.carrotsearch.randomizedtesting.generators.RandomPicks; import org.apache.lucene.analysis.MockTokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.elasticsearch.Version; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.index.IndexSettings; import org.elasticsearch.indices.analysis.AnalysisModule; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.indices.analysis.PreBuiltAnalyzers; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.IndexSettingsModule; import org.elasticsearch.test.VersionUtils; import java.io.IOException; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; import static java.util.Collections.emptyMap; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.instanceOf; public class AnalysisRegistryTests extends ESTestCase { private Environment emptyEnvironment; private AnalysisRegistry emptyRegistry; private IndexSettings emptyIndexSettingsOfCurrentVersion; private static AnalyzerProvider<?> analyzerProvider(final String name) { return new PreBuiltAnalyzerProvider(name, AnalyzerScope.INDEX, new EnglishAnalyzer()); } @Override public void setUp() throws Exception { super.setUp(); emptyEnvironment = new Environment(Settings.builder() .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build()); emptyRegistry = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()); emptyIndexSettingsOfCurrentVersion = IndexSettingsModule.newIndexSettings("index", Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .build()); } public void testDefaultAnalyzers() throws IOException { Version version = VersionUtils.randomVersion(random()); Settings settings = Settings .builder() .put(IndexMetaData.SETTING_VERSION_CREATED, version) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); IndexAnalyzers indexAnalyzers = emptyRegistry.build(idxSettings); assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); } public void testOverrideDefaultAnalyzer() throws IOException { Version version = VersionUtils.randomVersion(random()); Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); IndexAnalyzers indexAnalyzers = emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings), singletonMap("default", analyzerProvider("default")) , emptyMap(), emptyMap(), emptyMap(), emptyMap()); assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); } public void testOverrideDefaultIndexAnalyzerIsUnsupported() { Version version = VersionUtils.randomVersionBetween(random(), Version.V_5_0_0_alpha1, Version.CURRENT); Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); AnalyzerProvider<?> defaultIndex = new PreBuiltAnalyzerProvider("default_index", AnalyzerScope.INDEX, new EnglishAnalyzer()); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings), singletonMap("default_index", defaultIndex), emptyMap(), emptyMap(), emptyMap(), emptyMap())); assertTrue(e.getMessage().contains("[index.analysis.analyzer.default_index] is not supported")); } public void testOverrideDefaultSearchAnalyzer() { Version version = VersionUtils.randomVersion(random()); Settings settings = Settings.builder().put(IndexMetaData.SETTING_VERSION_CREATED, version).build(); IndexAnalyzers indexAnalyzers = emptyRegistry.build(IndexSettingsModule.newIndexSettings("index", settings), singletonMap("default_search", analyzerProvider("default_search")), emptyMap(), emptyMap(), emptyMap(), emptyMap()); assertThat(indexAnalyzers.getDefaultIndexAnalyzer().analyzer(), instanceOf(StandardAnalyzer.class)); assertThat(indexAnalyzers.getDefaultSearchAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); assertThat(indexAnalyzers.getDefaultSearchQuoteAnalyzer().analyzer(), instanceOf(EnglishAnalyzer.class)); } /** * Tests that {@code camelCase} filter names and {@code snake_case} filter names don't collide. */ public void testConfigureCamelCaseTokenFilter() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT) .put("index.analysis.filter.testFilter.type", "mock") .put("index.analysis.filter.test_filter.type", "mock") .put("index.analysis.analyzer.custom_analyzer_with_camel_case.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_analyzer_with_camel_case.filter", "lowercase", "testFilter") .put("index.analysis.analyzer.custom_analyzer_with_snake_case.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_analyzer_with_snake_case.filter", "lowercase", "test_filter").build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); /* The snake_case version of the name should not filter out any stopwords while the * camelCase version will filter out English stopwords. */ AnalysisPlugin plugin = new AnalysisPlugin() { class MockFactory extends AbstractTokenFilterFactory { MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) { super(indexSettings, name, settings); } @Override public TokenStream create(TokenStream tokenStream) { if (name().equals("test_filter")) { return new MockTokenFilter(tokenStream, MockTokenFilter.EMPTY_STOPSET); } return new MockTokenFilter(tokenStream, MockTokenFilter.ENGLISH_STOPSET); } } @Override public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() { return singletonMap("mock", MockFactory::new); } }; IndexAnalyzers indexAnalyzers = new AnalysisModule(new Environment(settings), singletonList(plugin)).getAnalysisRegistry() .build(idxSettings); // This shouldn't contain English stopwords try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_with_camel_case")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "has a foo"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); assertTrue(tokenStream.incrementToken()); assertEquals("has", charTermAttribute.toString()); assertTrue(tokenStream.incrementToken()); assertEquals("foo", charTermAttribute.toString()); assertFalse(tokenStream.incrementToken()); } // This *should* contain English stopwords try (NamedAnalyzer custom_analyser = indexAnalyzers.get("custom_analyzer_with_snake_case")) { assertNotNull(custom_analyser); TokenStream tokenStream = custom_analyser.tokenStream("foo", "has a foo"); tokenStream.reset(); CharTermAttribute charTermAttribute = tokenStream.addAttribute(CharTermAttribute.class); assertTrue(tokenStream.incrementToken()); assertEquals("has", charTermAttribute.toString()); assertTrue(tokenStream.incrementToken()); assertEquals("a", charTermAttribute.toString()); assertTrue(tokenStream.incrementToken()); assertEquals("foo", charTermAttribute.toString()); assertFalse(tokenStream.incrementToken()); } } public void testBuiltInAnalyzersAreCached() throws IOException { Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build(); Settings indexSettings = Settings.builder() .put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT).build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings); IndexAnalyzers indexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()) .build(idxSettings); IndexAnalyzers otherIndexAnalyzers = new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()) .build(idxSettings); final int numIters = randomIntBetween(5, 20); for (int i = 0; i < numIters; i++) { PreBuiltAnalyzers preBuiltAnalyzers = RandomPicks.randomFrom(random(), PreBuiltAnalyzers.values()); assertSame(indexAnalyzers.get(preBuiltAnalyzers.name()), otherIndexAnalyzers.get(preBuiltAnalyzers.name())); } } public void testPreConfiguredTokenFiltersAreCached() throws IOException { AtomicBoolean built = new AtomicBoolean(false); PreConfiguredTokenFilter assertsBuiltOnce = PreConfiguredTokenFilter.singleton("asserts_built_once", false, tokenStream -> { if (false == built.compareAndSet(false, true)) { fail("Attempted to build the token filter twice when it should have been cached"); } return new MockTokenFilter(tokenStream, MockTokenFilter.EMPTY_STOPSET); }); try (AnalysisRegistry registryWithPreBuiltTokenFilter = new AnalysisRegistry(emptyEnvironment, emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), singletonMap("asserts_built_once", assertsBuiltOnce))) { IndexAnalyzers indexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion); IndexAnalyzers otherIndexAnalyzers = registryWithPreBuiltTokenFilter.build(emptyIndexSettingsOfCurrentVersion); assertSame(indexAnalyzers.get("asserts_built_once"), otherIndexAnalyzers.get("asserts_built_once")); } } public void testNoTypeOrTokenizerErrorMessage() throws IOException { Version version = VersionUtils.randomVersion(random()); Settings settings = Settings .builder() .put(IndexMetaData.SETTING_VERSION_CREATED, version) .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .putArray("index.analysis.analyzer.test_analyzer.filter", new String[] {"lowercase", "stop", "shingle"}) .putArray("index.analysis.analyzer.test_analyzer.char_filter", new String[] {"html_strip"}) .build(); IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", settings); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> new AnalysisRegistry(new Environment(settings), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap(), emptyMap()) .build(idxSettings)); assertThat(e.getMessage(), equalTo("analyzer [test_analyzer] must specify either an analyzer type, or a tokenizer")); } public void testCloseIndexAnalyzersMultipleTimes() throws IOException { IndexAnalyzers indexAnalyzers = emptyRegistry.build(emptyIndexSettingsOfCurrentVersion); indexAnalyzers.close(); indexAnalyzers.close(); } }