CreateAnalyzerAnalyzerTest.java example

Explorer
crate-master
/*
 * Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
 * license agreements.  See the NOTICE file distributed with this work for
 * additional information regarding copyright ownership.  Crate licenses
 * this file to you under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.  You may
 * obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * However, if you have executed another commercial license agreement
 * with Crate these terms will supersede the license and you may use the
 * software solely pursuant to the terms of the relevant commercial agreement.
 */

package io.crate.analyze;

import io.crate.test.integration.CrateDummyClusterServiceUnitTest;
import io.crate.testing.SQLExecutor;
import org.elasticsearch.common.settings.Settings;
import org.junit.Before;
import org.junit.Test;

import static org.hamcrest.Matchers.*;

public class CreateAnalyzerAnalyzerTest extends CrateDummyClusterServiceUnitTest {

    private SQLExecutor e;

    @Before
    public void prepare() {
        e = SQLExecutor.builder(clusterService).enableDefaultTables().build();
    }

    @Test
    public void testCreateAnalyzerSimple() throws Exception {
        AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a1 (tokenizer lowercase)");
        assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class));
        CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement;

        assertEquals("a1", createAnalyzerAnalysis.ident());
        assertEquals("lowercase", createAnalyzerAnalysis.tokenizerDefinition().v1());
        assertEquals(Settings.EMPTY, createAnalyzerAnalysis.tokenizerDefinition().v2());

        // be sure build succeeds
        createAnalyzerAnalysis.buildSettings();
    }

    @Test
    public void testCreateAnalyzerWithCustomTokenizer() throws Exception {
        AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a2 (" +
                                                      "   tokenizer tok2 with (" +
                                                      "       type='ngram'," +
                                                      "       \"min_ngram\"=2," +
                                                      "       \"token_chars\"=['letter', 'digits']" +
                                                      "   )" +
                                                      ")");
        assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class));
        CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement;

        assertEquals("a2", createAnalyzerAnalysis.ident());
        assertEquals("a2_tok2", createAnalyzerAnalysis.tokenizerDefinition().v1());
        assertThat(
            createAnalyzerAnalysis.tokenizerDefinition().v2().getAsMap(),
            allOf(
                hasEntry("index.analysis.tokenizer.a2_tok2.type", "ngram"),
                hasEntry("index.analysis.tokenizer.a2_tok2.min_ngram", "2"),
                hasEntry("index.analysis.tokenizer.a2_tok2.token_chars.0", "letter"),
                hasEntry("index.analysis.tokenizer.a2_tok2.token_chars.1", "digits")
            )
        );

        // be sure build succeeds
        createAnalyzerAnalysis.buildSettings();
    }

    @Test
    public void testCreateAnalyzerWithCharFilters() throws Exception {
        AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a3 (" +
                                                      "   tokenizer lowercase," +
                                                      "   char_filters (" +
                                                      "       \"html_strip\"," +
                                                      "       my_mapping WITH (" +
                                                      "           type='mapping'," +
                                                      "           mappings=['ph=>f', 'ß=>ss', 'ö=>oe']" +
                                                      "       )" +
                                                      "   )" +
                                                      ")");
        assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class));
        CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement;

        assertEquals("a3", createAnalyzerAnalysis.ident());
        assertEquals("lowercase", createAnalyzerAnalysis.tokenizerDefinition().v1());

        assertThat(
            createAnalyzerAnalysis.charFilters().keySet(),
            containsInAnyOrder("html_strip", "a3_my_mapping")
        );

        assertThat(
            createAnalyzerAnalysis.charFilters().get("a3_my_mapping").getAsMap(),
            hasEntry("index.analysis.char_filter.a3_my_mapping.type", "mapping")
        );
        assertThat(
            createAnalyzerAnalysis.charFilters().get("a3_my_mapping")
                .getAsArray("index.analysis.char_filter.a3_my_mapping.mappings"),
            arrayContainingInAnyOrder("ph=>f", "ß=>ss", "ö=>oe")
        );

        // be sure build succeeds
        createAnalyzerAnalysis.buildSettings();
    }

    @Test
    public void testCreateAnalyzerWithTokenFilters() throws Exception {
        AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a11 (" +
                                                      "  TOKENIZER standard," +
                                                      "  TOKEN_FILTERS (" +
                                                      "    lowercase," +
                                                      "    mystop WITH (" +
                                                      "      type='stop'," +
                                                      "      stopword=['the', 'over']" +
                                                      "    )" +
                                                      "  )" +
                                                      ")");
        assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class));
        CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement;

        assertEquals("a11", createAnalyzerAnalysis.ident());
        assertEquals("standard", createAnalyzerAnalysis.tokenizerDefinition().v1());

        assertThat(
            createAnalyzerAnalysis.tokenFilters().keySet(),
            containsInAnyOrder("lowercase", "a11_mystop")
        );

        assertThat(
            createAnalyzerAnalysis.tokenFilters().get("a11_mystop").getAsMap(),
            hasEntry("index.analysis.filter.a11_mystop.type", "stop")
        );
        assertThat(
            createAnalyzerAnalysis.tokenFilters().get("a11_mystop")
                .getAsArray("index.analysis.filter.a11_mystop.stopword"),
            arrayContainingInAnyOrder("the", "over")
        );

        // be sure build succeeds
        createAnalyzerAnalysis.buildSettings();
    }

    @Test
    public void testCreateAnalyzerExtendingBuiltin() throws Exception {
        AnalyzedStatement analyzedStatement = e.analyze("CREATE ANALYZER a4 EXTENDS " +
                                                      "german WITH (" +
                                                      "   \"stop_words\"=['der', 'die', 'das']" +
                                                      ")");
        assertThat(analyzedStatement, instanceOf(CreateAnalyzerAnalyzedStatement.class));
        CreateAnalyzerAnalyzedStatement createAnalyzerAnalysis = (CreateAnalyzerAnalyzedStatement) analyzedStatement;

        assertEquals("a4", createAnalyzerAnalysis.ident());
        assertEquals("german", createAnalyzerAnalysis.extendedAnalyzerName());

        assertThat(
            createAnalyzerAnalysis.genericAnalyzerSettings().getAsArray("index.analysis.analyzer.a4.stop_words"),
            arrayContainingInAnyOrder("der", "die", "das")
        );

        // be sure build succeeds
        createAnalyzerAnalysis.buildSettings();
    }

    @Test
    public void createAnalyzerWithoutTokenizer() throws Exception {
        CreateAnalyzerAnalyzedStatement analysis = e.analyze(
            "CREATE ANALYZER a6 (" +
            "  char_filters (" +
            "    \"html_strip\"" +
            "  )," +
            "  token_filters (" +
            "    lowercase" +
            "  )" +
            ")");
        expectedException.expect(UnsupportedOperationException.class);
        expectedException.expectMessage("Tokenizer missing from non-extended analyzer");
        analysis.buildSettings();
    }

    @Test
    public void overrideDefaultAnalyzer() {
        expectedException.expect(IllegalArgumentException.class);
        expectedException.expectMessage("Overriding the default analyzer is forbidden");
        e.analyze("CREATE ANALYZER \"default\" (" +
                "  TOKENIZER whitespace" +
                ")");
    }

    @Test
    public void overrideBuiltInAnalyzer() {
        expectedException.expect(IllegalArgumentException.class);
        expectedException.expectMessage("Cannot override builtin analyzer 'keyword'");
        e.analyze("CREATE ANALYZER \"keyword\" (" +
                "  char_filters (" +
                "    html_strip" +
                "  )," +
                "  tokenizer standard" +
                ")");
    }

    @Test
    public void missingParameterInCharFilter() throws Exception {
        expectedException.expect(IllegalArgumentException.class);
        expectedException.expectMessage("CHAR_FILTER of type 'mapping' needs additional parameters");
        CreateAnalyzerAnalyzedStatement analysis = e.analyze(
            "CREATE ANALYZER my_mapping_analyzer (" +
            "  char_filters (" +
            "    \"mapping\"" +
            "  )," +
            "  TOKENIZER whitespace" +
            ")");
        analysis.buildSettings();
    }
}