/*
* Licensed to CRATE Technology GmbH ("Crate") under one or more contributor
* license agreements. See the NOTICE file distributed with this work for
* additional information regarding copyright ownership. Crate licenses
* this file to you under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may
* obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations
* under the License.
*
* However, if you have executed another commercial license agreement
* with Crate these terms will supersede the license and you may use the
* software solely pursuant to the terms of the relevant commercial agreement.
*/
package io.crate.analyze;
import io.crate.metadata.FulltextAnalyzerResolver;
import org.elasticsearch.common.Nullable;
import org.elasticsearch.common.collect.Tuple;
import org.elasticsearch.common.settings.Settings;
import java.io.IOException;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import static io.crate.metadata.settings.AnalyzerSettings.CUSTOM_ANALYSIS_SETTINGS_PREFIX;
public class CreateAnalyzerAnalyzedStatement extends AbstractDDLAnalyzedStatement {
private final FulltextAnalyzerResolver fulltextAnalyzerResolver;
private String ident;
private String extendedAnalyzerName = null;
private Settings extendedCustomAnalyzer = null;
private Settings genericAnalyzerSettings = null;
private Settings.Builder genericAnalyzerSettingsBuilder = Settings.builder();
private Tuple<String, Settings> tokenizerDefinition = null;
private Map<String, Settings> charFilters = new HashMap<>();
private Map<String, Settings> tokenFilters = new HashMap<>();
public CreateAnalyzerAnalyzedStatement(FulltextAnalyzerResolver fulltextAnalyzerResolver) {
this.fulltextAnalyzerResolver = fulltextAnalyzerResolver;
}
@Override
public <C, R> R accept(AnalyzedStatementVisitor<C, R> analyzedStatementVisitor, C context) {
return analyzedStatementVisitor.visitCreateAnalyzerStatement(this, context);
}
public FulltextAnalyzerResolver analyzerService() {
return fulltextAnalyzerResolver;
}
public void ident(String ident) {
if (ident.equalsIgnoreCase("default")) {
throw new IllegalArgumentException("Overriding the default analyzer is forbidden");
}
if (fulltextAnalyzerResolver.hasBuiltInAnalyzer(ident)) {
throw new IllegalArgumentException(String.format(Locale.ENGLISH,
"Cannot override builtin analyzer '%s'", ident));
}
this.ident = ident;
}
public String ident() {
return ident;
}
public void extendedAnalyzer(String name) {
if (!fulltextAnalyzerResolver.hasAnalyzer(name)) {
throw new IllegalArgumentException(String.format(Locale.ENGLISH,
"Extended Analyzer '%s' does not exist", name));
}
extendedAnalyzerName = name;
// resolve custom Analyzer, if any
extendedCustomAnalyzer = fulltextAnalyzerResolver.getCustomAnalyzer(name);
}
@Nullable
public String extendedAnalyzerName() {
return extendedAnalyzerName;
}
public void tokenDefinition(String name, Settings settings) {
tokenizerDefinition = new Tuple<>(name, settings);
}
@Nullable
public Tuple<String, Settings> tokenizerDefinition() {
return tokenizerDefinition;
}
public Settings genericAnalyzerSettings() {
if (genericAnalyzerSettings == null) {
genericAnalyzerSettings = genericAnalyzerSettingsBuilder.build();
}
return genericAnalyzerSettings;
}
public Settings.Builder genericAnalyzerSettingsBuilder() {
return genericAnalyzerSettingsBuilder;
}
public void addTokenFilter(String name, Settings settings) {
tokenFilters.put(name, settings);
}
@Nullable
public Map<String, Settings> tokenFilters() {
return tokenFilters;
}
public void addCharFilter(String name, Settings settings) {
charFilters.put(name, settings);
}
@Nullable
public Map<String, Settings> charFilters() {
return charFilters;
}
public boolean extendsCustomAnalyzer() {
return extendedAnalyzerName != null
&& extendedCustomAnalyzer != null
&& extendedCustomAnalyzer.get(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.type",
extendedAnalyzerName)).equals("custom");
}
public boolean extendsBuiltInAnalyzer() {
return extendedAnalyzerName != null && (extendedCustomAnalyzer == null ||
!extendedCustomAnalyzer.get(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.type",
extendedAnalyzerName)).equals("custom"));
}
/**
* create analyzer settings - possibly referencing charFilters, tokenFilters, tokenizers defined here
*
* @return Settings describing a custom or extended builtin-analyzer
*/
private Settings analyzerSettings() {
Settings.Builder builder = Settings.builder();
if (extendsCustomAnalyzer()) {
// use analyzer-settings from extended analyzer only
Settings stripped = extendedCustomAnalyzer.getByPrefix(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s", extendedAnalyzerName));
for (Map.Entry<String, String> entry : stripped.getAsMap().entrySet()) {
builder.put(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s%s", ident, entry.getKey()), entry.getValue());
}
if (tokenizerDefinition == null) {
// set tokenizer if not defined in extending analyzer
String extendedTokenizerName = extendedCustomAnalyzer.get(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.tokenizer", extendedAnalyzerName));
if (extendedTokenizerName != null) {
Settings extendedTokenizerSettings = fulltextAnalyzerResolver.getCustomTokenizer(extendedTokenizerName);
if (extendedTokenizerSettings != null) {
tokenizerDefinition = new Tuple<>(extendedTokenizerName, extendedTokenizerSettings);
} else {
tokenizerDefinition = new Tuple<>(extendedTokenizerName, Settings.EMPTY);
}
}
}
if (tokenFilters.isEmpty()) {
// only use inherited tokenfilters if none are defined in extending analyzer
String[] extendedTokenFilterNames = extendedCustomAnalyzer.getAsArray(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.filter", extendedAnalyzerName));
for (int i = 0; i < extendedTokenFilterNames.length; i++) {
Settings extendedTokenFilterSettings = fulltextAnalyzerResolver.getCustomTokenFilter(extendedTokenFilterNames[i]);
if (extendedTokenFilterSettings != null) {
tokenFilters.put(extendedTokenFilterNames[i], extendedTokenFilterSettings);
} else {
tokenFilters.put(extendedTokenFilterNames[i], Settings.EMPTY);
}
}
}
if (charFilters.isEmpty()) {
// only use inherited charfilters if none are defined in extending analyzer
String[] extendedCustomCharFilterNames = extendedCustomAnalyzer.getAsArray(String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.char_filter", extendedAnalyzerName));
for (int i = 0; i < extendedCustomCharFilterNames.length; i++) {
Settings extendedCustomCharFilterSettings = fulltextAnalyzerResolver.getCustomCharFilter(extendedCustomCharFilterNames[i]);
if (extendedCustomCharFilterSettings != null) {
charFilters.put(extendedCustomCharFilterNames[i], extendedCustomCharFilterSettings);
} else {
charFilters.put(extendedCustomCharFilterNames[i], Settings.EMPTY);
}
}
}
} else if (extendsBuiltInAnalyzer()) {
// generic properties for extending builtin analyzers
if (genericAnalyzerSettings() != null) {
builder.put(genericAnalyzerSettings());
}
}
// analyzer type
String analyzerType = "custom";
if (extendsBuiltInAnalyzer()) {
if (extendedCustomAnalyzer != null) {
analyzerType = extendedCustomAnalyzer.get(
String.format(Locale.ENGLISH, "index.analysis.analyzer.%s.type", extendedAnalyzerName)
);
} else {
// direct extending builtin analyzer, use name as type
analyzerType = extendedAnalyzerName;
}
}
builder.put(
getSettingsKey("index.analysis.analyzer.%s.type", ident),
analyzerType
);
if (tokenizerDefinition != null) {
builder.put(
getSettingsKey("index.analysis.analyzer.%s.tokenizer", ident),
tokenizerDefinition.v1()
);
} else if (!extendsBuiltInAnalyzer()) {
throw new UnsupportedOperationException("Tokenizer missing from non-extended analyzer");
}
if (charFilters.size() > 0) {
String[] charFilterNames = charFilters.keySet().toArray(new String[charFilters.size()]);
builder.putArray(
getSettingsKey("index.analysis.analyzer.%s.char_filter", ident),
charFilterNames
);
}
if (tokenFilters.size() > 0) {
String[] tokenFilterNames = tokenFilters.keySet().toArray(new String[tokenFilters.size()]);
builder.putArray(
getSettingsKey("index.analysis.analyzer.%s.filter", ident),
tokenFilterNames
);
}
return builder.build();
}
/**
* build settings ready for putting into clusterstate
*
* @return the analyzer settings corresponding to the analyzed <tt>CREATE ANALYZER</tt> statement
* @throws org.elasticsearch.common.settings.SettingsException in case we can't build the settings yet
*/
public Settings buildSettings() throws IOException {
Settings.Builder builder = Settings.builder();
String encodedAnalyzerSettings = FulltextAnalyzerResolver.encodeSettings(analyzerSettings()).utf8ToString();
builder.put(
CUSTOM_ANALYSIS_SETTINGS_PREFIX + "analyzer." + ident,
encodedAnalyzerSettings
);
// TODO: save original SQL statement, so it can be displayed at information_schema.routines
// set source
/*
builder.put(
String.format(Locale.ENGLISH, "%s.analyzer.%s.%s",
Constants.CUSTOM_ANALYSIS_SETTINGS_PREFIX, ident,
AnalyzerVisitor.SQL_STATEMENT_KEY),
sql_stmt
);
*/
if (tokenizerDefinition != null && !tokenizerDefinition.v2().getAsMap().isEmpty()) {
builder.put(
CUSTOM_ANALYSIS_SETTINGS_PREFIX + "tokenizer." + tokenizerDefinition.v1(),
FulltextAnalyzerResolver.encodeSettings(tokenizerDefinition.v2()).utf8ToString()
);
}
for (Map.Entry<String, Settings> tokenFilterDefinition : tokenFilters.entrySet()) {
if (!tokenFilterDefinition.getValue().getAsMap().isEmpty()) {
builder.put(
CUSTOM_ANALYSIS_SETTINGS_PREFIX + "filter." + tokenFilterDefinition.getKey(),
FulltextAnalyzerResolver.encodeSettings(tokenFilterDefinition.getValue()).utf8ToString()
);
}
}
for (Map.Entry<String, Settings> charFilterDefinition : charFilters.entrySet()) {
if (!charFilterDefinition.getValue().getAsMap().isEmpty()) {
builder.put(
CUSTOM_ANALYSIS_SETTINGS_PREFIX + "char_filter." + charFilterDefinition.getKey(),
FulltextAnalyzerResolver.encodeSettings(charFilterDefinition.getValue()).utf8ToString()
);
}
}
return builder.build();
}
public static String getSettingsKey(String suffix, Object... formatArgs) {
if (formatArgs != null) {
suffix = String.format(Locale.ENGLISH, suffix, formatArgs);
}
return suffix;
}
}