/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.indices.analyze; import org.elasticsearch.action.admin.indices.alias.Alias; import org.elasticsearch.action.admin.indices.analyze.AnalyzeRequestBuilder; import org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse; import org.elasticsearch.test.ESIntegTestCase; import org.junit.Test; import java.io.IOException; import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.hamcrest.Matchers.*; /** * */ public class AnalyzeActionIT extends ESIntegTestCase { @Test public void simpleAnalyzerTests() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); ensureGreen(); for (int i = 0; i < 10; i++) { AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "this is a test").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); assertThat(token.getTerm(), equalTo("this")); assertThat(token.getStartOffset(), equalTo(0)); assertThat(token.getEndOffset(), equalTo(4)); assertThat(token.getPosition(), equalTo(0)); token = analyzeResponse.getTokens().get(1); assertThat(token.getTerm(), equalTo("is")); assertThat(token.getStartOffset(), equalTo(5)); assertThat(token.getEndOffset(), equalTo(7)); assertThat(token.getPosition(), equalTo(1)); token = analyzeResponse.getTokens().get(2); assertThat(token.getTerm(), equalTo("a")); assertThat(token.getStartOffset(), equalTo(8)); assertThat(token.getEndOffset(), equalTo(9)); assertThat(token.getPosition(), equalTo(2)); token = analyzeResponse.getTokens().get(3); assertThat(token.getTerm(), equalTo("test")); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(14)); assertThat(token.getPosition(), equalTo(3)); } } @Test public void analyzeNumericField() throws IOException { assertAcked(prepareCreate("test").addAlias(new Alias("alias")).addMapping("test", "long", "type=long", "double", "type=double")); ensureGreen("test"); try { client().admin().indices().prepareAnalyze(indexOrAlias(), "123").setField("long").get(); fail("shouldn't get here"); } catch (IllegalArgumentException ex) { //all good } try { client().admin().indices().prepareAnalyze(indexOrAlias(), "123.0").setField("double").get(); fail("shouldn't get here"); } catch (IllegalArgumentException ex) { //all good } } @Test public void analyzeWithNoIndex() throws Exception { AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setAnalyzer("simple").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("keyword").setTokenFilters("lowercase").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST").setTokenizer("standard").setTokenFilters("lowercase", "reverse").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); assertThat(token.getTerm(), equalTo("siht")); token = analyzeResponse.getTokens().get(1); assertThat(token.getTerm(), equalTo("si")); token = analyzeResponse.getTokens().get(2); assertThat(token.getTerm(), equalTo("a")); token = analyzeResponse.getTokens().get(3); assertThat(token.getTerm(), equalTo("tset")); analyzeResponse = client().admin().indices().prepareAnalyze("of course").setTokenizer("standard").setTokenFilters("stop").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("course")); assertThat(analyzeResponse.getTokens().get(0).getPosition(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getStartOffset(), equalTo(3)); assertThat(analyzeResponse.getTokens().get(0).getEndOffset(), equalTo(9)); } @Test public void analyzeWithCharFilters() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias")) .setSettings(settingsBuilder().put(indexSettings()) .put("index.analysis.char_filter.custom_mapping.type", "mapping") .putArray("index.analysis.char_filter.custom_mapping.mappings", "ph=>f", "qu=>q") .put("index.analysis.analyzer.custom_with_char_filter.tokenizer", "standard") .putArray("index.analysis.analyzer.custom_with_char_filter.char_filter", "custom_mapping"))); ensureGreen(); AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("<h2><b>THIS</b> IS A</h2> <a href=\"#\">TEST</a>").setTokenizer("standard").setCharFilters("html_strip").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A <b>TEST</b>").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("html_strip").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("this is a test")); analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "jeff quit phish").setTokenizer("keyword").setTokenFilters("lowercase").setCharFilters("custom_mapping").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(1)); assertThat(analyzeResponse.getTokens().get(0).getTerm(), equalTo("jeff qit fish")); analyzeResponse = client().admin().indices().prepareAnalyze(indexOrAlias(), "<a href=\"#\">jeff quit fish</a>").setTokenizer("standard").setCharFilters("html_strip", "custom_mapping").get(); assertThat(analyzeResponse.getTokens().size(), equalTo(3)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(0); assertThat(token.getTerm(), equalTo("jeff")); token = analyzeResponse.getTokens().get(1); assertThat(token.getTerm(), equalTo("qit")); token = analyzeResponse.getTokens().get(2); assertThat(token.getTerm(), equalTo("fish")); } @Test public void analyzerWithFieldOrTypeTests() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); ensureGreen(); client().admin().indices().preparePutMapping("test") .setType("document").setSource("simple", "type=string,analyzer=simple").get(); for (int i = 0; i < 10; i++) { final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze("THIS IS A TEST"); requestBuilder.setIndex(indexOrAlias()); requestBuilder.setField("document.simple"); AnalyzeResponse analyzeResponse = requestBuilder.get(); assertThat(analyzeResponse.getTokens().size(), equalTo(4)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(3); assertThat(token.getTerm(), equalTo("test")); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(14)); } } @Test // issue #5974 public void testThatStandardAndDefaultAnalyzersAreSame() throws Exception { AnalyzeResponse response = client().admin().indices().prepareAnalyze("this is a test").setAnalyzer("standard").get(); assertTokens(response, "this", "is", "a", "test"); response = client().admin().indices().prepareAnalyze("this is a test").setAnalyzer("default").get(); assertTokens(response, "this", "is", "a", "test"); response = client().admin().indices().prepareAnalyze("this is a test").get(); assertTokens(response, "this", "is", "a", "test"); } private void assertTokens(AnalyzeResponse response, String ... tokens) { assertThat(response.getTokens(), hasSize(tokens.length)); for (int i = 0; i < tokens.length; i++) { assertThat(response.getTokens().get(i).getTerm(), is(tokens[i])); } } private static String indexOrAlias() { return randomBoolean() ? "test" : "alias"; } public void testAnalyzerWithMultiValues() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); ensureGreen(); client().admin().indices().preparePutMapping("test") .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"}; final AnalyzeRequestBuilder requestBuilder = client().admin().indices().prepareAnalyze(); requestBuilder.setText(texts); requestBuilder.setIndex(indexOrAlias()); requestBuilder.setField("simple"); AnalyzeResponse analyzeResponse = requestBuilder.get(); assertThat(analyzeResponse.getTokens().size(), equalTo(7)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.getTokens().get(3); assertThat(token.getTerm(), equalTo("test")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(14)); token = analyzeResponse.getTokens().get(5); assertThat(token.getTerm(), equalTo("second")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(19)); assertThat(token.getEndOffset(), equalTo(25)); } @Test public void testDetailAnalyze() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias")) .setSettings( settingsBuilder() .put("index.analysis.char_filter.my_mapping.type", "mapping") .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F") .put("index.analysis.analyzer.test_analyzer.type", "custom") .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100") .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard") .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping") .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball"))); ensureGreen(); for (int i = 0; i < 10; i++) { AnalyzeResponse analyzeResponse = admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText("THIS IS A PHISH") .setExplain(true).setCharFilters("my_mapping").setTokenizer("keyword").setTokenFilters("lowercase").get(); assertThat(analyzeResponse.detail().analyzer(), nullValue()); //charfilters // global charfilter is not change text. assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping")); assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("THIS IS A FISH")); //tokenizer assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword")); assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("THIS IS A FISH")); assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getStartOffset(), equalTo(0)); assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getEndOffset(), equalTo(15)); //tokenfilters assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("this is a fish")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getPosition(), equalTo(0)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getStartOffset(), equalTo(0)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getEndOffset(), equalTo(15)); } } @Test public void testDetailAnalyzeWithNoIndex() throws Exception { //analyzer only AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST") .setExplain(true).setAnalyzer("simple").get(); assertThat(analyzeResponse.detail().tokenizer(), nullValue()); assertThat(analyzeResponse.detail().tokenfilters(), nullValue()); assertThat(analyzeResponse.detail().charfilters(), nullValue()); assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple")); assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4)); } @Test public void testDetailAnalyzeCustomAnalyzerWithNoIndex() throws Exception { //analyzer only AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("THIS IS A TEST") .setExplain(true).setAnalyzer("simple").get(); assertThat(analyzeResponse.detail().tokenizer(), nullValue()); assertThat(analyzeResponse.detail().tokenfilters(), nullValue()); assertThat(analyzeResponse.detail().charfilters(), nullValue()); assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple")); assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(4)); //custom analyzer analyzeResponse = client().admin().indices().prepareAnalyze("<text>THIS IS A TEST</text>") .setExplain(true).setCharFilters("html_strip").setTokenizer("keyword").setTokenFilters("lowercase").get(); assertThat(analyzeResponse.detail().analyzer(), nullValue()); //charfilters // global charfilter is not change text. assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("html_strip")); assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("\nTHIS IS A TEST\n")); //tokenizer assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("keyword")); assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenizer().getTokens()[0].getTerm(), equalTo("\nTHIS IS A TEST\n")); //tokenfilters assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("lowercase")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[0].getTerm(), equalTo("\nthis is a test\n")); //check other attributes analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").get(); assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl")); String[] expectedAttributesKey = { "bytes", "positionLength", "keyword"}; assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length)); Object extendedAttribute; for (String key : expectedAttributesKey) { extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key); assertThat(extendedAttribute, notNullValue()); } } @Test public void testDetailAnalyzeSpecifyAttributes() throws Exception { AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze("This is troubled") .setExplain(true).setTokenizer("standard").setTokenFilters("snowball").setAttributes("keyword").get(); assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(3)); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getTerm(), equalTo("troubl")); String[] expectedAttributesKey = { "keyword"}; assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().size(), equalTo(expectedAttributesKey.length)); Object extendedAttribute; for (String key : expectedAttributesKey) { extendedAttribute = analyzeResponse.detail().tokenfilters()[0].getTokens()[2].getAttributes().get(key); assertThat(extendedAttribute, notNullValue()); } } @Test public void testDetailAnalyzeWithMultiValues() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias"))); ensureGreen(); client().admin().indices().preparePutMapping("test") .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); String[] texts = new String[]{"THIS IS A TEST", "THE SECOND TEXT"}; AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts) .setExplain(true).setField("simple").setText(texts).execute().get(); assertThat(analyzeResponse.detail().analyzer().getName(), equalTo("simple")); assertThat(analyzeResponse.detail().analyzer().getTokens().length, equalTo(7)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().analyzer().getTokens()[3]; assertThat(token.getTerm(), equalTo("test")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(14)); token = analyzeResponse.detail().analyzer().getTokens()[5]; assertThat(token.getTerm(), equalTo("second")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(19)); assertThat(token.getEndOffset(), equalTo(25)); } @Test public void testDetailAnalyzeWithMultiValuesWithCustomAnalyzer() throws Exception { assertAcked(prepareCreate("test").addAlias(new Alias("alias")) .setSettings( settingsBuilder() .put("index.analysis.char_filter.my_mapping.type", "mapping") .putArray("index.analysis.char_filter.my_mapping.mappings", "PH=>F") .put("index.analysis.analyzer.test_analyzer.type", "custom") .put("index.analysis.analyzer.test_analyzer.position_increment_gap", "100") .put("index.analysis.analyzer.test_analyzer.tokenizer", "standard") .putArray("index.analysis.analyzer.test_analyzer.char_filter", "my_mapping") .putArray("index.analysis.analyzer.test_analyzer.filter", "snowball", "lowercase"))); ensureGreen(); client().admin().indices().preparePutMapping("test") .setType("document").setSource("simple", "type=string,analyzer=simple,position_increment_gap=100").get(); //only analyzer = String[] texts = new String[]{"this is a PHISH", "the troubled text"}; AnalyzeResponse analyzeResponse = client().admin().indices().prepareAnalyze().setIndex(indexOrAlias()).setText(texts) .setExplain(true).setAnalyzer("test_analyzer").setText(texts).execute().get(); // charfilter assertThat(analyzeResponse.detail().charfilters().length, equalTo(1)); assertThat(analyzeResponse.detail().charfilters()[0].getName(), equalTo("my_mapping")); assertThat(analyzeResponse.detail().charfilters()[0].getTexts().length, equalTo(2)); assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[0], equalTo("this is a FISH")); assertThat(analyzeResponse.detail().charfilters()[0].getTexts()[1], equalTo("the troubled text")); // tokenizer assertThat(analyzeResponse.detail().tokenizer().getName(), equalTo("standard")); assertThat(analyzeResponse.detail().tokenizer().getTokens().length, equalTo(7)); AnalyzeResponse.AnalyzeToken token = analyzeResponse.detail().tokenizer().getTokens()[3]; assertThat(token.getTerm(), equalTo("FISH")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(15)); token = analyzeResponse.detail().tokenizer().getTokens()[5]; assertThat(token.getTerm(), equalTo("troubled")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(20)); assertThat(token.getEndOffset(), equalTo(28)); // tokenfilter(snowball) assertThat(analyzeResponse.detail().tokenfilters().length, equalTo(2)); assertThat(analyzeResponse.detail().tokenfilters()[0].getName(), equalTo("snowball")); assertThat(analyzeResponse.detail().tokenfilters()[0].getTokens().length, equalTo(7)); token = analyzeResponse.detail().tokenfilters()[0].getTokens()[3]; assertThat(token.getTerm(), equalTo("FISH")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(15)); token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5]; assertThat(token.getTerm(), equalTo("troubl")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(20)); assertThat(token.getEndOffset(), equalTo(28)); // tokenfilter(lowercase) assertThat(analyzeResponse.detail().tokenfilters()[1].getName(), equalTo("lowercase")); assertThat(analyzeResponse.detail().tokenfilters()[1].getTokens().length, equalTo(7)); token = analyzeResponse.detail().tokenfilters()[1].getTokens()[3]; assertThat(token.getTerm(), equalTo("fish")); assertThat(token.getPosition(), equalTo(3)); assertThat(token.getStartOffset(), equalTo(10)); assertThat(token.getEndOffset(), equalTo(15)); token = analyzeResponse.detail().tokenfilters()[0].getTokens()[5]; assertThat(token.getTerm(), equalTo("troubl")); assertThat(token.getPosition(), equalTo(105)); assertThat(token.getStartOffset(), equalTo(20)); assertThat(token.getEndOffset(), equalTo(28)); } public void testNonExistTokenizer() { try { AnalyzeResponse analyzeResponse = client().admin().indices() .prepareAnalyze("this is a test") .setAnalyzer("not_exist_analyzer") .get(); fail("shouldn't get here"); } catch (Throwable t) { assertThat(t, instanceOf(IllegalArgumentException.class)); assertThat(t.getMessage(), startsWith("failed to find analyzer")); } } }