/* * Licensed to CRATE Technology GmbH ("Crate") under one or more contributor * license agreements. See the NOTICE file distributed with this work for * additional information regarding copyright ownership. Crate licenses * this file to you under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. * * However, if you have executed another commercial license agreement * with Crate these terms will supersede the license and you may use the * software solely pursuant to the terms of the relevant commercial agreement. */ package io.crate.analyze; import io.crate.test.integration.CrateDummyClusterServiceUnitTest; import io.crate.testing.SQLExecutor; import org.elasticsearch.common.settings.Settings; import org.junit.Before; import org.junit.Test; import static org.hamcrest.Matchers.*; public class CreateAnalyzerAnalyzerTest extends CrateDummyClusterServiceUnitTest { private SQLExecutor e; @Before public void prepare() { e = SQLExecutor.builder(clusterService).enableDefaultTables().build(); } @Test public void testCreateAnalyzerSimple() throws Exception { AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a1 (tokenizer lowercase)"); assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class)); CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement; assertEquals("a1", createAnalyzerAnalysis.ident()); assertEquals("lowercase", createAnalyzerAnalysis.tokenizerDefinition().v1()); assertEquals(Settings.EMPTY, createAnalyzerAnalysis.tokenizerDefinition().v2()); // be sure build succeeds createAnalyzerAnalysis.buildSettings(); } @Test public void testCreateAnalyzerWithCustomTokenizer() throws Exception { AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a2 (" + " tokenizer tok2 with (" + " type='ngram'," + " \"min_ngram\"=2," + " \"token_chars\"=['letter', 'digits']" + " )" + ")"); assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class)); CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement; assertEquals("a2", createAnalyzerAnalysis.ident()); assertEquals("a2_tok2", createAnalyzerAnalysis.tokenizerDefinition().v1()); assertThat( createAnalyzerAnalysis.tokenizerDefinition().v2().getAsMap(), allOf( hasEntry("index.analysis.tokenizer.a2_tok2.type", "ngram"), hasEntry("index.analysis.tokenizer.a2_tok2.min_ngram", "2"), hasEntry("index.analysis.tokenizer.a2_tok2.token_chars.0", "letter"), hasEntry("index.analysis.tokenizer.a2_tok2.token_chars.1", "digits") ) ); // be sure build succeeds createAnalyzerAnalysis.buildSettings(); } @Test public void testCreateAnalyzerWithCharFilters() throws Exception { AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a3 (" + " tokenizer lowercase," + " char_filters (" + " \"html_strip\"," + " my_mapping WITH (" + " type='mapping'," + " mappings=['ph=>f', 'ß=>ss', 'ö=>oe']" + " )" + " )" + ")"); assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class)); CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement; assertEquals("a3", createAnalyzerAnalysis.ident()); assertEquals("lowercase", createAnalyzerAnalysis.tokenizerDefinition().v1()); assertThat( createAnalyzerAnalysis.charFilters().keySet(), containsInAnyOrder("html_strip", "a3_my_mapping") ); assertThat( createAnalyzerAnalysis.charFilters().get("a3_my_mapping").getAsMap(), hasEntry("index.analysis.char_filter.a3_my_mapping.type", "mapping") ); assertThat( createAnalyzerAnalysis.charFilters().get("a3_my_mapping") .getAsArray("index.analysis.char_filter.a3_my_mapping.mappings"), arrayContainingInAnyOrder("ph=>f", "ß=>ss", "ö=>oe") ); // be sure build succeeds createAnalyzerAnalysis.buildSettings(); } @Test public void testCreateAnalyzerWithTokenFilters() throws Exception { AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a11 (" + " TOKENIZER standard," + " TOKEN_FILTERS (" + " lowercase," + " mystop WITH (" + " type='stop'," + " stopword=['the', 'over']" + " )" + " )" + ")"); assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class)); CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement; assertEquals("a11", createAnalyzerAnalysis.ident()); assertEquals("standard", createAnalyzerAnalysis.tokenizerDefinition().v1()); assertThat( createAnalyzerAnalysis.tokenFilters().keySet(), containsInAnyOrder("lowercase", "a11_mystop") ); assertThat( createAnalyzerAnalysis.tokenFilters().get("a11_mystop").getAsMap(), hasEntry("index.analysis.filter.a11_mystop.type", "stop") ); assertThat( createAnalyzerAnalysis.tokenFilters().get("a11_mystop") .getAsArray("index.analysis.filter.a11_mystop.stopword"), arrayContainingInAnyOrder("the", "over") ); // be sure build succeeds createAnalyzerAnalysis.buildSettings(); } @Test public void testCreateAnalyzerExtendingBuiltin() throws Exception { AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a4 EXTENDS " + "german WITH (" + " \"stop_words\"=['der', 'die', 'das']" + ")"); assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class)); CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement; assertEquals("a4", createAnalyzerAnalysis.ident()); assertEquals("german", createAnalyzerAnalysis.extendedAnalyzerName()); assertThat( createAnalyzerAnalysis.genericAnalyzerSettings().getAsArray("index.analysis.analyzer.a4.stop_words"), arrayContainingInAnyOrder("der", "die", "das") ); // be sure build succeeds createAnalyzerAnalysis.buildSettings(); } @Test public void createAnalyzerWithoutTokenizer() throws Exception { CreateAnalyzerAnalyzedStatement analysis = e.analyze( "CREATE ANALYZER a6 (" + " char_filters (" + " \"html_strip\"" + " )," + " token_filters (" + " lowercase" + " )" + ")"); expectedException.expect(UnsupportedOperationException.class); expectedException.expectMessage("Tokenizer missing from non-extended analyzer"); analysis.buildSettings(); } @Test public void overrideDefaultAnalyzer() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Overriding the default analyzer is forbidden"); e.analyze("CREATE ANALYZER \"default\" (" + " TOKENIZER whitespace" + ")"); } @Test public void overrideBuiltInAnalyzer() { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("Cannot override builtin analyzer 'keyword'"); e.analyze("CREATE ANALYZER \"keyword\" (" + " char_filters (" + " html_strip" + " )," + " tokenizer standard" + ")"); } @Test public void missingParameterInCharFilter() throws Exception { expectedException.expect(IllegalArgumentException.class); expectedException.expectMessage("CHAR_FILTER of type 'mapping' needs additional parameters"); CreateAnalyzerAnalyzedStatement analysis = e.analyze( "CREATE ANALYZER my_mapping_analyzer (" + " char_filters (" + " \"mapping\"" + " )," + " TOKENIZER whitespace" + ")"); analysis.buildSettings(); } }