/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.action.admin.indices;
import org.apache.lucene.analysis.MockTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.elasticsearch.Version;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequest;
import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse;
import org.elasticsearch.action.admin.indices.analyze.TransportAnalyzeAction;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.UUIDs;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.env.Environment;
import org.elasticsearch.index.IndexSettings;
import org.elasticsearch.index.analysis.AbstractTokenFilterFactory;
import org.elasticsearch.index.analysis.AnalysisRegistry;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.mapper.AllFieldMapper;
import org.elasticsearch.indices.analysis.AnalysisModule;
import org.elasticsearch.indices.analysis.AnalysisModule.AnalysisProvider;
import org.elasticsearch.plugins.AnalysisPlugin;
import org.elasticsearch.test.ESTestCase;
import org.elasticsearch.test.IndexSettingsModule;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import static java.util.Collections.singletonList;
import static java.util.Collections.singletonMap;
/**
* Tests for {@link TransportAnalyzeAction}. See the rest tests in the {@code analysis-common} module for places where this code gets a ton
* more exercise.
*/
public class TransportAnalyzeActionTests extends ESTestCase {
private IndexAnalyzers indexAnalyzers;
private AnalysisRegistry registry;
private Environment environment;
@Override
public void setUp() throws Exception {
super.setUp();
Settings settings = Settings.builder().put(Environment.PATH_HOME_SETTING.getKey(), createTempDir().toString()).build();
Settings indexSettings = Settings.builder()
.put(IndexMetaData.SETTING_VERSION_CREATED, Version.CURRENT)
.put(IndexMetaData.SETTING_INDEX_UUID, UUIDs.randomBase64UUID())
.put("index.analysis.analyzer.custom_analyzer.tokenizer", "standard")
.put("index.analysis.analyzer.custom_analyzer.filter", "mock").build();
IndexSettings idxSettings = IndexSettingsModule.newIndexSettings("index", indexSettings);
environment = new Environment(settings);
AnalysisPlugin plugin = new AnalysisPlugin() {
class MockFactory extends AbstractTokenFilterFactory {
MockFactory(IndexSettings indexSettings, Environment env, String name, Settings settings) {
super(indexSettings, name, settings);
}
@Override
public TokenStream create(TokenStream tokenStream) {
return new MockTokenFilter(tokenStream, MockTokenFilter.ENGLISH_STOPSET);
}
}
@Override
public Map<String, AnalysisProvider<TokenFilterFactory>> getTokenFilters() {
return singletonMap("mock", MockFactory::new);
}
};
registry = new AnalysisModule(environment, singletonList(plugin)).getAnalysisRegistry();
indexAnalyzers = registry.build(idxSettings);
}
/**
* Test behavior when the named analysis component isn't defined on the index. In that case we should build with defaults.
*/
public void testNoIndexAnalyzers() throws IOException {
// Refer to an analyzer by its type so we get its default configuration
AnalyzeRequest request = new AnalyzeRequest();
request.analyzer("standard");
request.text("the quick brown fox");
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, null, registry, environment);
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
assertEquals(4, tokens.size());
// Refer to a token filter by its type so we get its default configuration
request.analyzer(null);
request.tokenizer("whitespace");
request.addTokenFilter("mock");
request.text("the qu1ck brown fox");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("qu1ck", tokens.get(0).getTerm());
assertEquals("brown", tokens.get(1).getTerm());
assertEquals("fox", tokens.get(2).getTerm());
// Refer to a char filter by its type so we get its default configuration
request.analyzer(null);
request.tokenizer("whitespace");
request.addCharFilter("html_strip");
request.addTokenFilter("mock");
request.text("<p>the qu1ck brown fox</p>");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, randomBoolean() ? indexAnalyzers : null, registry, environment);
tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("qu1ck", tokens.get(0).getTerm());
assertEquals("brown", tokens.get(1).getTerm());
assertEquals("fox", tokens.get(2).getTerm());
}
public void testFillsAttributes() throws IOException {
AnalyzeRequest request = new AnalyzeRequest();
request.analyzer("standard");
request.text("the 1 brown fox");
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, null, registry, environment);
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
assertEquals(4, tokens.size());
assertEquals("the", tokens.get(0).getTerm());
assertEquals(0, tokens.get(0).getStartOffset());
assertEquals(3, tokens.get(0).getEndOffset());
assertEquals(0, tokens.get(0).getPosition());
assertEquals("<ALPHANUM>", tokens.get(0).getType());
assertEquals("1", tokens.get(1).getTerm());
assertEquals(4, tokens.get(1).getStartOffset());
assertEquals(5, tokens.get(1).getEndOffset());
assertEquals(1, tokens.get(1).getPosition());
assertEquals("<NUM>", tokens.get(1).getType());
assertEquals("brown", tokens.get(2).getTerm());
assertEquals(6, tokens.get(2).getStartOffset());
assertEquals(11, tokens.get(2).getEndOffset());
assertEquals(2, tokens.get(2).getPosition());
assertEquals("<ALPHANUM>", tokens.get(2).getType());
assertEquals("fox", tokens.get(3).getTerm());
assertEquals(12, tokens.get(3).getStartOffset());
assertEquals(15, tokens.get(3).getEndOffset());
assertEquals(3, tokens.get(3).getPosition());
assertEquals("<ALPHANUM>", tokens.get(3).getType());
}
public void testWithIndexAnalyzers() throws IOException {
AnalyzeRequest request = new AnalyzeRequest();
request.text("the quick brown fox");
request.analyzer("custom_analyzer");
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("quick", tokens.get(0).getTerm());
assertEquals("brown", tokens.get(1).getTerm());
assertEquals("fox", tokens.get(2).getTerm());
request.analyzer("standard");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
tokens = analyze.getTokens();
assertEquals(4, tokens.size());
assertEquals("the", tokens.get(0).getTerm());
assertEquals("quick", tokens.get(1).getTerm());
assertEquals("brown", tokens.get(2).getTerm());
assertEquals("fox", tokens.get(3).getTerm());
// Switch the analyzer out for just a tokenizer
request.analyzer(null);
request.tokenizer("standard");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
tokens = analyze.getTokens();
assertEquals(4, tokens.size());
assertEquals("the", tokens.get(0).getTerm());
assertEquals("quick", tokens.get(1).getTerm());
assertEquals("brown", tokens.get(2).getTerm());
assertEquals("fox", tokens.get(3).getTerm());
// Now try applying our token filter
request.addTokenFilter("mock");
analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
tokens = analyze.getTokens();
assertEquals(3, tokens.size());
assertEquals("quick", tokens.get(0).getTerm());
assertEquals("brown", tokens.get(1).getTerm());
assertEquals("fox", tokens.get(2).getTerm());
}
public void testGetIndexAnalyserWithoutIndexAnalyzers() throws IOException {
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.analyzer("custom_analyzer")
.text("the qu1ck brown fox-dog"),
AllFieldMapper.NAME, null, null, registry, environment));
assertEquals(e.getMessage(), "failed to find global analyzer [custom_analyzer]");
}
public void testUnknown() throws IOException {
boolean notGlobal = randomBoolean();
IllegalArgumentException e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.analyzer("foobar")
.text("the qu1ck brown fox"),
AllFieldMapper.NAME, null, notGlobal ? indexAnalyzers : null, registry, environment));
if (notGlobal) {
assertEquals(e.getMessage(), "failed to find analyzer [foobar]");
} else {
assertEquals(e.getMessage(), "failed to find global analyzer [foobar]");
}
e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.tokenizer("foobar")
.text("the qu1ck brown fox"),
AllFieldMapper.NAME, null, notGlobal ? indexAnalyzers : null, registry, environment));
if (notGlobal) {
assertEquals(e.getMessage(), "failed to find tokenizer under [foobar]");
} else {
assertEquals(e.getMessage(), "failed to find global tokenizer under [foobar]");
}
e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.tokenizer("whitespace")
.addTokenFilter("foobar")
.text("the qu1ck brown fox"),
AllFieldMapper.NAME, null, notGlobal ? indexAnalyzers : null, registry, environment));
if (notGlobal) {
assertEquals(e.getMessage(), "failed to find token filter under [foobar]");
} else {
assertEquals(e.getMessage(), "failed to find global token filter under [foobar]");
}
e = expectThrows(IllegalArgumentException.class,
() -> TransportAnalyzeAction.analyze(
new AnalyzeRequest()
.tokenizer("whitespace")
.addTokenFilter("lowercase")
.addCharFilter("foobar")
.text("the qu1ck brown fox"),
AllFieldMapper.NAME, null, notGlobal ? indexAnalyzers : null, registry, environment));
if (notGlobal) {
assertEquals(e.getMessage(), "failed to find char filter under [foobar]");
} else {
assertEquals(e.getMessage(), "failed to find global char filter under [foobar]");
}
}
public void testNonPreBuildTokenFilter() throws IOException {
AnalyzeRequest request = new AnalyzeRequest();
request.tokenizer("whitespace");
request.addTokenFilter("min_hash");
request.text("the quick brown fox");
AnalyzeResponse analyze = TransportAnalyzeAction.analyze(request, AllFieldMapper.NAME, null, indexAnalyzers, registry, environment);
List<AnalyzeResponse.AnalyzeToken> tokens = analyze.getTokens();
int default_hash_count = 1;
int default_bucket_size = 512;
int default_hash_set_size = 1;
assertEquals(default_hash_count * default_bucket_size * default_hash_set_size, tokens.size());
}
}