/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.analysis; import org.apache.lucene.analysis.MockLowerCaseFilter; import org.apache.lucene.util.BytesRef; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.env.Environment; import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider; import org.elasticsearch.plugins.AnalysisPlugin; import org.elasticsearch.test.ESTestCase; import org.elasticsearch.test.ESTokenStreamTestCase; import java.io.IOException; import java.io.Reader; import java.util.List; import java.util.Map; import static java.util.Collections.singletonList; import static java.util.Collections.singletonMap; public class CustomNormalizerTests extends ESTokenStreamTestCase { private static final AnalysisPlugin MOCK_ANALYSIS_PLUGIN = new MockAnalysisPlugin(); public void testBasics() throws IOException { Settings settings = Settings.builder() .putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN); assertNull(analysis.indexAnalyzers.get("my_normalizer")); NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer"); assertNotNull(normalizer); assertEquals("my_normalizer", normalizer.name()); assertTokenStreamContents(normalizer.tokenStream("foo", "Cet été-là"), new String[] {"cet été-là"}); assertEquals(new BytesRef("cet été-là"), normalizer.normalize("foo", "Cet été-là")); } public void testUnknownType() { Settings settings = Settings.builder() .put("index.analysis.normalizer.my_normalizer.type", "foobar") .putArray("index.analysis.normalizer.my_normalizer.filter", "lowercase", "asciifolding") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings)); assertEquals("Unknown normalizer type [foobar] for [my_normalizer]", e.getMessage()); } public void testTokenizer() throws IOException { Settings settings = Settings.builder() .put("index.analysis.normalizer.my_normalizer.tokenizer", "keyword") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings)); assertEquals("Custom normalizer [my_normalizer] cannot configure a tokenizer", e.getMessage()); } public void testCharFilters() throws IOException { Settings settings = Settings.builder() .put("index.analysis.char_filter.my_mapping.type", "mock_char_filter") .putArray("index.analysis.normalizer.my_normalizer.char_filter", "my_mapping") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); ESTestCase.TestAnalysis analysis = AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN); assertNull(analysis.indexAnalyzers.get("my_normalizer")); NamedAnalyzer normalizer = analysis.indexAnalyzers.getNormalizer("my_normalizer"); assertNotNull(normalizer); assertEquals("my_normalizer", normalizer.name()); assertTokenStreamContents(normalizer.tokenStream("foo", "abc"), new String[] {"zbc"}); assertEquals(new BytesRef("zbc"), normalizer.normalize("foo", "abc")); } public void testIllegalFilters() throws IOException { Settings settings = Settings.builder() .putArray("index.analysis.normalizer.my_normalizer.filter", "mock_forbidden") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings, MOCK_ANALYSIS_PLUGIN)); assertEquals("Custom normalizer [my_normalizer] may not use filter [mock_forbidden]", e.getMessage()); } public void testIllegalCharFilters() throws IOException { Settings settings = Settings.builder() .putArray("index.analysis.normalizer.my_normalizer.char_filter", "html_strip") .put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()) .build(); IllegalArgumentException e = expectThrows(IllegalArgumentException.class, () -> AnalysisTestsHelper.createTestAnalysisFromSettings(settings)); assertEquals("Custom normalizer [my_normalizer] may not use char filter [html_strip]", e.getMessage()); } private static class MockAnalysisPlugin implements AnalysisPlugin { @Override public List<PreConfiguredTokenFilter> getPreConfiguredTokenFilters() { return singletonList(PreConfiguredTokenFilter.singleton("mock_forbidden", false, MockLowerCaseFilter::new)); } @Override public Map<String, AnalysisProvider<CharFilterFactory>> getCharFilters() { return singletonMap("mock_char_filter", (indexSettings, env, name, settings) -> { class Factory implements CharFilterFactory, MultiTermAwareComponent { @Override public String name() { return name; } @Override public Reader create(Reader reader) { return new Reader() { @Override public int read(char[] cbuf, int off, int len) throws IOException { int result = reader.read(cbuf, off, len); for (int i = off; i < result; i++) { if (cbuf[i] == 'a') { cbuf[i] = 'z'; } } return result; } @Override public void close() throws IOException { reader.close(); } }; } @Override public Object getMultiTermComponent() { return this; } } return new Factory(); }); } } }