/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.suggest; import com.google.common.base.Charsets; import com.google.common.io.Resources; import org.elasticsearch.ElasticsearchException; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.*; import org.elasticsearch.action.suggest.SuggestRequestBuilder; import org.elasticsearch.action.suggest.SuggestResponse; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.search.suggest.SuggestBuilder.SuggestionBuilder; import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder; import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.DirectCandidateGenerator; import org.elasticsearch.search.suggest.term.TermSuggestionBuilder; import org.elasticsearch.test.ESIntegTestCase; import org.elasticsearch.test.hamcrest.ElasticsearchAssertions; import org.junit.Test; import java.io.IOException; import java.util.*; import java.util.concurrent.ExecutionException; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.settings.Settings.settingsBuilder; import static org.elasticsearch.index.query.QueryBuilders.matchQuery; import static org.elasticsearch.search.suggest.SuggestBuilders.phraseSuggestion; import static org.elasticsearch.search.suggest.SuggestBuilders.termSuggestion; import static org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder.candidateGenerator; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*; import static org.hamcrest.Matchers.*; /** * Integration tests for term and phrase suggestions. Many of these tests many requests that vary only slightly from one another. Where * possible these tests should declare for the first request, make the request, modify the configuration for the next request, make that * request, modify again, request again, etc. This makes it very obvious what changes between requests. */ public class SuggestSearchIT extends ESIntegTestCase { @Test // see #3196 public void testSuggestAcrossMultipleIndices() throws IOException { createIndex("test"); ensureGreen(); index("test", "type1", "1", "text", "abcd"); index("test", "type1", "2", "text", "aacd"); index("test", "type1", "3", "text", "abbd"); index("test", "type1", "4", "text", "abcc"); refresh(); TermSuggestionBuilder termSuggest = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("abcd") .field("text"); logger.info("--> run suggestions with one index"); searchSuggest( termSuggest); createIndex("test_1"); ensureGreen(); index("test_1", "type1", "1", "text", "ab cd"); index("test_1", "type1", "2", "text", "aa cd"); index("test_1", "type1", "3", "text", "ab bd"); index("test_1", "type1", "4", "text", "ab cc"); refresh(); termSuggest = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("ab cd") .minWordLength(1) .field("text"); logger.info("--> run suggestions with two indices"); searchSuggest( termSuggest); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("text").field("type", "string").field("analyzer", "keyword").endObject() .endObject() .endObject().endObject(); assertAcked(prepareCreate("test_2").addMapping("type1", mapping)); ensureGreen(); index("test_2", "type1", "1", "text", "ab cd"); index("test_2", "type1", "2", "text", "aa cd"); index("test_2", "type1", "3", "text", "ab bd"); index("test_2", "type1", "4", "text", "ab cc"); index("test_2", "type1", "1", "text", "abcd"); index("test_2", "type1", "2", "text", "aacd"); index("test_2", "type1", "3", "text", "abbd"); index("test_2", "type1", "4", "text", "abcc"); refresh(); termSuggest = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("ab cd") .minWordLength(1) .field("text"); logger.info("--> run suggestions with three indices"); try { searchSuggest( termSuggest); fail(" can not suggest across multiple indices with different analysis chains"); } catch (ReduceSearchPhaseException ex) { assertThat(ex.getCause(), instanceOf(IllegalStateException.class)); assertThat(ex.getCause().getMessage(), anyOf(endsWith("Suggest entries have different sizes actual [1] expected [2]"), endsWith("Suggest entries have different sizes actual [2] expected [1]"))); } catch (IllegalStateException ex) { assertThat(ex.getMessage(), anyOf(endsWith("Suggest entries have different sizes actual [1] expected [2]"), endsWith("Suggest entries have different sizes actual [2] expected [1]"))); } termSuggest = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("ABCD") .minWordLength(1) .field("text"); logger.info("--> run suggestions with four indices"); try { searchSuggest( termSuggest); fail(" can not suggest across multiple indices with different analysis chains"); } catch (ReduceSearchPhaseException ex) { assertThat(ex.getCause(), instanceOf(IllegalStateException.class)); assertThat(ex.getCause().getMessage(), anyOf(endsWith("Suggest entries have different text actual [ABCD] expected [abcd]"), endsWith("Suggest entries have different text actual [abcd] expected [ABCD]"))); } catch (IllegalStateException ex) { assertThat(ex.getMessage(), anyOf(endsWith("Suggest entries have different text actual [ABCD] expected [abcd]"), endsWith("Suggest entries have different text actual [abcd] expected [ABCD]"))); } } @Test // see #3037 public void testSuggestModes() throws IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(SETTING_NUMBER_OF_SHARDS, 1) .put(SETTING_NUMBER_OF_REPLICAS, 0) .put("index.analysis.analyzer.biword.tokenizer", "standard") .putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase") .put("index.analysis.filter.shingler.type", "shingle") .put("index.analysis.filter.shingler.min_shingle_size", 2) .put("index.analysis.filter.shingler.max_shingle_size", 3)); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("name") .field("type", "multi_field") .startObject("fields") .startObject("name") .field("type", "string") .endObject() .startObject("shingled") .field("type", "string") .field("analyzer", "biword") .field("search_analyzer", "standard") .endObject() .endObject() .endObject() .endObject() .endObject().endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); index("test", "type1", "1", "name", "I like iced tea"); index("test", "type1", "2", "name", "I like tea."); index("test", "type1", "3", "name", "I like ice cream."); refresh(); DirectCandidateGenerator generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2); PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("did_you_mean").field("name.shingled") .addCandidateGenerator(generator) .gramSize(3); Suggest searchSuggest = searchSuggest( "ice tea", phraseSuggestion); assertSuggestion(searchSuggest, 0, "did_you_mean", "iced tea"); generator.suggestMode(null); searchSuggest = searchSuggest( "ice tea", phraseSuggestion); assertSuggestionSize(searchSuggest, 0, 0, "did_you_mean"); } @Test // see #2729 public void testSizeOneShard() throws Exception { prepareCreate("test").setSettings( SETTING_NUMBER_OF_SHARDS, 1, SETTING_NUMBER_OF_REPLICAS, 0).get(); ensureGreen(); for (int i = 0; i < 15; i++) { index("test", "type1", Integer.toString(i), "text", "abc" + i); } refresh(); SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "spellchecker")).get(); assertThat("didn't ask for suggestions but got some", search.getSuggest(), nullValue()); TermSuggestionBuilder termSuggestion = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("abcd") .field("text") .size(10); Suggest suggest = searchSuggest( termSuggestion); assertSuggestion(suggest, 0, "test", 10, "abc0"); termSuggestion.text("abcd").shardSize(5); suggest = searchSuggest( termSuggestion); assertSuggestion(suggest, 0, "test", 5, "abc0"); } @Test public void testUnmappedField() throws IOException, InterruptedException, ExecutionException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put("index.analysis.analyzer.biword.tokenizer", "standard") .putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase") .put("index.analysis.filter.shingler.type", "shingle") .put("index.analysis.filter.shingler.min_shingle_size", 2) .put("index.analysis.filter.shingler.max_shingle_size", 3)); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("properties") .startObject("name") .field("type", "multi_field") .startObject("fields") .startObject("name") .field("type", "string") .endObject() .startObject("shingled") .field("type", "string") .field("analyzer", "biword") .field("search_analyzer", "standard") .endObject() .endObject() .endObject() .endObject() .endObject().endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); indexRandom(true, client().prepareIndex("test", "type1").setSource("name", "I like iced tea"), client().prepareIndex("test", "type1").setSource("name", "I like tea."), client().prepareIndex("test", "type1").setSource("name", "I like ice cream.")); refresh(); PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("did_you_mean").field("name.shingled") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2)) .gramSize(3); Suggest searchSuggest = searchSuggest( "ice tea", phraseSuggestion); assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea"); phraseSuggestion.field("nosuchField"); { SearchRequestBuilder suggestBuilder = client().prepareSearch().setSize(0); suggestBuilder.setSuggestText("tetsting sugestion"); suggestBuilder.addSuggestion(phraseSuggestion); assertThrows(suggestBuilder, SearchPhaseExecutionException.class); } { SearchRequestBuilder suggestBuilder = client().prepareSearch().setSize(0); suggestBuilder.setSuggestText("tetsting sugestion"); suggestBuilder.addSuggestion(phraseSuggestion); assertThrows(suggestBuilder, SearchPhaseExecutionException.class); } } @Test public void testSimple() throws Exception { createIndex("test"); ensureGreen(); index("test", "type1", "1", "text", "abcd"); index("test", "type1", "2", "text", "aacd"); index("test", "type1", "3", "text", "abbd"); index("test", "type1", "4", "text", "abcc"); refresh(); SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "spellcecker")).get(); assertThat("didn't ask for suggestions but got some", search.getSuggest(), nullValue()); TermSuggestionBuilder termSuggest = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("abcd") .field("text"); Suggest suggest = searchSuggest( termSuggest); assertSuggestion(suggest, 0, "test", "aacd", "abbd", "abcc"); assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd")); suggest = searchSuggest( termSuggest); assertSuggestion(suggest, 0, "test", "aacd","abbd", "abcc"); assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd")); } @Test public void testEmpty() throws Exception { createIndex("test"); ensureGreen(); index("test", "type1", "1", "foo", "bar"); refresh(); TermSuggestionBuilder termSuggest = termSuggestion("test") .suggestMode("always") // Always, otherwise the results can vary between requests. .text("abcd") .field("text"); Suggest suggest = searchSuggest( termSuggest); assertSuggestionSize(suggest, 0, 0, "test"); assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd")); suggest = searchSuggest( termSuggest); assertSuggestionSize(suggest, 0, 0, "test"); assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd")); } @Test public void testWithMultipleCommands() throws Exception { createIndex("test"); ensureGreen(); index("test", "typ1", "1", "field1", "prefix_abcd", "field2", "prefix_efgh"); index("test", "typ1", "2", "field1", "prefix_aacd", "field2", "prefix_eeeh"); index("test", "typ1", "3", "field1", "prefix_abbd", "field2", "prefix_efff"); index("test", "typ1", "4", "field1", "prefix_abcc", "field2", "prefix_eggg"); refresh(); Suggest suggest = searchSuggest( termSuggestion("size1") .size(1).text("prefix_abcd").maxTermFreq(10).prefixLength(1).minDocFreq(0) .field("field1").suggestMode("always"), termSuggestion("field2") .field("field2").text("prefix_eeeh prefix_efgh") .maxTermFreq(10).minDocFreq(0).suggestMode("always"), termSuggestion("accuracy") .field("field2").text("prefix_efgh").setAccuracy(1f) .maxTermFreq(10).minDocFreq(0).suggestMode("always")); assertSuggestion(suggest, 0, "size1", "prefix_aacd"); assertThat(suggest.getSuggestion("field2").getEntries().get(0).getText().string(), equalTo("prefix_eeeh")); assertSuggestion(suggest, 0, "field2", "prefix_efgh"); assertThat(suggest.getSuggestion("field2").getEntries().get(1).getText().string(), equalTo("prefix_efgh")); assertSuggestion(suggest, 1, "field2", "prefix_eeeh", "prefix_efff", "prefix_eggg"); assertSuggestionSize(suggest, 0, 0, "accuracy"); } @Test public void testSizeAndSort() throws Exception { createIndex("test"); ensureGreen(); Map<String, Integer> termsAndDocCount = new HashMap<>(); termsAndDocCount.put("prefix_aaad", 20); termsAndDocCount.put("prefix_abbb", 18); termsAndDocCount.put("prefix_aaca", 16); termsAndDocCount.put("prefix_abba", 14); termsAndDocCount.put("prefix_accc", 12); termsAndDocCount.put("prefix_addd", 10); termsAndDocCount.put("prefix_abaa", 8); termsAndDocCount.put("prefix_dbca", 6); termsAndDocCount.put("prefix_cbad", 4); termsAndDocCount.put("prefix_aacd", 1); termsAndDocCount.put("prefix_abcc", 1); termsAndDocCount.put("prefix_accd", 1); for (Map.Entry<String, Integer> entry : termsAndDocCount.entrySet()) { for (int i = 0; i < entry.getValue(); i++) { index("test", "type1", entry.getKey() + i, "field1", entry.getKey()); } } refresh(); Suggest suggest = searchSuggest( "prefix_abcd", termSuggestion("size3SortScoreFirst") .size(3).minDocFreq(0).field("field1").suggestMode("always"), termSuggestion("size10SortScoreFirst") .size(10).minDocFreq(0).field("field1").suggestMode("always").shardSize(50), termSuggestion("size3SortScoreFirstMaxEdits1") .maxEdits(1) .size(10).minDocFreq(0).field("field1").suggestMode("always"), termSuggestion("size10SortFrequencyFirst") .size(10).sort("frequency").shardSize(1000) .minDocFreq(0).field("field1").suggestMode("always")); // The commented out assertions fail sometimes because suggestions are based off of shard frequencies instead of index frequencies. assertSuggestion(suggest, 0, "size3SortScoreFirst", "prefix_aacd", "prefix_abcc", "prefix_accd"); assertSuggestion(suggest, 0, "size10SortScoreFirst", 10, "prefix_aacd", "prefix_abcc", "prefix_accd" /*, "prefix_aaad" */); assertSuggestion(suggest, 0, "size3SortScoreFirstMaxEdits1", "prefix_aacd", "prefix_abcc", "prefix_accd"); assertSuggestion(suggest, 0, "size10SortFrequencyFirst", "prefix_aaad", "prefix_abbb", "prefix_aaca", "prefix_abba", "prefix_accc", "prefix_addd", "prefix_abaa", "prefix_dbca", "prefix_cbad", "prefix_aacd"); // assertThat(suggest.get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_abcc")); // assertThat(suggest.get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_accd")); } @Test // see #2817 public void testStopwordsOnlyPhraseSuggest() throws IOException { assertAcked(prepareCreate("test").addMapping("typ1", "body", "type=string,analyzer=stopwd").setSettings( settingsBuilder() .put("index.analysis.analyzer.stopwd.tokenizer", "whitespace") .putArray("index.analysis.analyzer.stopwd.filter", "stop") )); ensureGreen(); index("test", "typ1", "1", "body", "this is a test"); refresh(); Suggest searchSuggest = searchSuggest( "a an the", phraseSuggestion("simple_phrase").field("body").gramSize(1) .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always")) .size(1)); assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase"); } @Test public void testPrefixLength() throws IOException { // Stopped here CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(SETTING_NUMBER_OF_SHARDS, 1) .put("index.analysis.analyzer.reverse.tokenizer", "standard") .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse") .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") .putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", false) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 2)); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("_all").field("store", "yes").field("termVector", "with_positions_offsets").endObject() .startObject("properties") .startObject("body").field("type", "string").field("analyzer", "body").endObject() .startObject("body_reverse").field("type", "string").field("analyzer", "reverse").endObject() .startObject("bigram").field("type", "string").field("analyzer", "bigram").endObject() .endObject() .endObject().endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); index("test", "type1", "1", "body", "hello world"); index("test", "type1", "2", "body", "hello world"); index("test", "type1", "3", "body", "hello words"); refresh(); Suggest searchSuggest = searchSuggest( "hello word", phraseSuggestion("simple_phrase").field("body") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").prefixLength(4).minWordLength(1).suggestMode("always")) .size(1).confidence(1.0f)); assertSuggestion(searchSuggest, 0, "simple_phrase", "hello words"); searchSuggest = searchSuggest( "hello word", phraseSuggestion("simple_phrase").field("body") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").prefixLength(2).minWordLength(1).suggestMode("always")) .size(1).confidence(1.0f)); assertSuggestion(searchSuggest, 0, "simple_phrase", "hello world"); } @Test @Nightly public void testMarvelHerosPhraseSuggest() throws IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put("index.analysis.analyzer.reverse.tokenizer", "standard") .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse") .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") .putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", false) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 2)); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1") .startObject("_all") .field("store", "yes") .field("termVector", "with_positions_offsets") .endObject() .startObject("properties") .startObject("body"). field("type", "string"). field("analyzer", "body") .endObject() .startObject("body_reverse"). field("type", "string"). field("analyzer", "reverse") .endObject() .startObject("bigram"). field("type", "string"). field("analyzer", "bigram") .endObject() .endObject() .endObject().endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); for (String line: Resources.readLines(SuggestSearchIT.class.getResource("/config/names.txt"), Charsets.UTF_8)) { index("test", "type1", line, "body", line, "body_reverse", line, "bigram", line); } refresh(); PhraseSuggestionBuilder phraseSuggest = phraseSuggestion("simple_phrase") .field("bigram").gramSize(2).analyzer("body") .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")) .size(1); Suggest searchSuggest = searchSuggest( "american ame", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "american ace"); assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("american ame")); phraseSuggest.realWordErrorLikelihood(0.95f); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); // Check the "text" field this one time. assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel")); // Ask for highlighting phraseSuggest.highlight("<em>", "</em>"); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getHighlighted().string(), equalTo("<em>xorr</em> the <em>god</em> jewel")); // pass in a correct phrase phraseSuggest.highlight(null, null).confidence(0f).size(1).maxErrors(0.5f); searchSuggest = searchSuggest( "Xorr the God-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); // pass in a correct phrase - set confidence to 2 phraseSuggest.confidence(2f); searchSuggest = searchSuggest( "Xorr the God-Jewel", phraseSuggest); assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase"); // pass in a correct phrase - set confidence to 0.99 phraseSuggest.confidence(0.99f); searchSuggest = searchSuggest( "Xorr the God-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); //test reverse suggestions with pre & post filter phraseSuggest .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")) .addCandidateGenerator(candidateGenerator("body_reverse").minWordLength(1).suggestMode("always").preFilter("reverse").postFilter("reverse")); searchSuggest = searchSuggest( "xor the yod-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); // set all mass to trigrams (not indexed) phraseSuggest.clearCandidateGenerators() .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")) .smoothingModel(new PhraseSuggestionBuilder.LinearInterpolation(1,0,0)); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase"); // set all mass to bigrams phraseSuggest.smoothingModel(new PhraseSuggestionBuilder.LinearInterpolation(0,1,0)); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); // distribute mass phraseSuggest.smoothingModel(new PhraseSuggestionBuilder.LinearInterpolation(0.4,0.4,0.2)); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); searchSuggest = searchSuggest( "american ame", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "american ace"); // try all smoothing methods phraseSuggest.smoothingModel(new PhraseSuggestionBuilder.LinearInterpolation(0.4,0.4,0.2)); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); phraseSuggest.smoothingModel(new PhraseSuggestionBuilder.Laplace(0.2)); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); phraseSuggest.smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); // check tokenLimit phraseSuggest.smoothingModel(null).tokenLimit(4); searchSuggest = searchSuggest( "Xor the Got-Jewel", phraseSuggest); assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase"); phraseSuggest.tokenLimit(15).smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)); searchSuggest = searchSuggest( "Xor the Got-Jewel Xor the Got-Jewel Xor the Got-Jewel", phraseSuggest); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel xorr the god jewel xorr the god jewel"); // Check the name this time because we're repeating it which is funky assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Xor the Got-Jewel Xor the Got-Jewel Xor the Got-Jewel")); } @Test public void testSizePararm() throws IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(SETTING_NUMBER_OF_SHARDS, 1) .put("index.analysis.analyzer.reverse.tokenizer", "standard") .putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse") .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") .putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", false) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 2)); XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type1") .startObject("_all") .field("store", "yes") .field("termVector", "with_positions_offsets") .endObject() .startObject("properties") .startObject("body") .field("type", "string") .field("analyzer", "body") .endObject() .startObject("body_reverse") .field("type", "string") .field("analyzer", "reverse") .endObject() .startObject("bigram") .field("type", "string") .field("analyzer", "bigram") .endObject() .endObject() .endObject() .endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); String line = "xorr the god jewel"; index("test", "type1", "1", "body", line, "body_reverse", line, "bigram", line); line = "I got it this time"; index("test", "type1", "2", "body", line, "body_reverse", line, "bigram", line); refresh(); PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("simple_phrase") .realWordErrorLikelihood(0.95f) .field("bigram") .gramSize(2) .analyzer("body") .addCandidateGenerator(candidateGenerator("body").minWordLength(1).prefixLength(1).suggestMode("always").size(1).accuracy(0.1f)) .smoothingModel(new PhraseSuggestionBuilder.StupidBackoff(0.1)) .maxErrors(1.0f) .size(5); Suggest searchSuggest = searchSuggest( "Xorr the Gut-Jewel", phraseSuggestion); assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase"); // we allow a size of 2 now on the shard generator level so "god" will be found since it's LD2 phraseSuggestion.clearCandidateGenerators() .addCandidateGenerator(candidateGenerator("body").minWordLength(1).prefixLength(1).suggestMode("always").size(2).accuracy(0.1f)); searchSuggest = searchSuggest( "Xorr the Gut-Jewel", phraseSuggestion); assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel"); } @Test @Nightly public void testPhraseBoundaryCases() throws IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, 1) // to get reliable statistics we should put this all into one shard .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase") .put("index.analysis.analyzer.bigram.tokenizer", "standard") .putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase") .put("index.analysis.analyzer.ngram.tokenizer", "standard") .putArray("index.analysis.analyzer.ngram.filter", "my_shingle2", "lowercase") .put("index.analysis.analyzer.myDefAnalyzer.tokenizer", "standard") .putArray("index.analysis.analyzer.myDefAnalyzer.filter", "shingle", "lowercase") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", false) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 2) .put("index.analysis.filter.my_shingle2.type", "shingle") .put("index.analysis.filter.my_shingle2.output_unigrams", true) .put("index.analysis.filter.my_shingle2.min_shingle_size", 2) .put("index.analysis.filter.my_shingle2.max_shingle_size", 2)); XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject().startObject("type1") .startObject("_all").field("store", "yes").field("termVector", "with_positions_offsets").endObject() .startObject("properties") .startObject("body").field("type", "string").field("analyzer", "body").endObject() .startObject("bigram").field("type", "string").field("analyzer", "bigram").endObject() .startObject("ngram").field("type", "string").field("analyzer", "ngram").endObject() .endObject() .endObject().endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); for (String line: Resources.readLines(SuggestSearchIT.class.getResource("/config/names.txt"), Charsets.UTF_8)) { index("test", "type1", line, "body", line, "bigram", line, "ngram", line); } refresh(); NumShards numShards = getNumShards("test"); // Lets make sure some things throw exceptions PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("simple_phrase") .field("bigram") .analyzer("body") .addCandidateGenerator(candidateGenerator("does_not_exist").minWordLength(1).suggestMode("always")) .realWordErrorLikelihood(0.95f) .maxErrors(0.5f) .size(1); try { searchSuggest( "Xor the Got-Jewel", numShards.numPrimaries, phraseSuggestion); fail("field does not exists"); } catch (SearchPhaseExecutionException e) {} phraseSuggestion.clearCandidateGenerators().analyzer(null); try { searchSuggest( "Xor the Got-Jewel", numShards.numPrimaries, phraseSuggestion); fail("analyzer does only produce ngrams"); } catch (SearchPhaseExecutionException e) { } phraseSuggestion.analyzer("bigram"); try { searchSuggest( "Xor the Got-Jewel", numShards.numPrimaries, phraseSuggestion); fail("analyzer does only produce ngrams"); } catch (SearchPhaseExecutionException e) { } // Now we'll make sure some things don't phraseSuggestion.forceUnigrams(false); searchSuggest( "Xor the Got-Jewel", phraseSuggestion); // Field doesn't produce unigrams but the analyzer does phraseSuggestion.forceUnigrams(true).field("bigram").analyzer("ngram"); searchSuggest( "Xor the Got-Jewel", phraseSuggestion); phraseSuggestion.field("ngram").analyzer("myDefAnalyzer") .addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always")); Suggest suggest = searchSuggest( "Xor the Got-Jewel", phraseSuggestion); // "xorr the god jewel" and and "xorn the god jewel" have identical scores (we are only using unigrams to score), so we tie break by // earlier term (xorn): assertSuggestion(suggest, 0, "simple_phrase", "xorn the god jewel"); phraseSuggestion.analyzer(null); suggest = searchSuggest( "Xor the Got-Jewel", phraseSuggestion); // In this case xorr has a better score than xorn because we set the field back to the default (my_shingle2) analyzer, so the // probability that the term is not in the dictionary but is NOT a misspelling is relatively high in this case compared to the // others that have no n-gram with the other terms in the phrase :) you can set this realWorldErrorLikelyhood assertSuggestion(suggest, 0, "simple_phrase", "xorr the god jewel"); } @Test public void testDifferentShardSize() throws Exception { createIndex("test"); ensureGreen(); indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "foobar1").setRouting("1"), client().prepareIndex("test", "type1", "2").setSource("field1", "foobar2").setRouting("2"), client().prepareIndex("test", "type1", "3").setSource("field1", "foobar3").setRouting("3")); Suggest suggest = searchSuggest( "foobar", termSuggestion("simple") .size(10).minDocFreq(0).field("field1").suggestMode("always")); ElasticsearchAssertions.assertSuggestionSize(suggest, 0, 3, "simple"); } @Test // see #3469 public void testShardFailures() throws IOException, InterruptedException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put("index.analysis.analyzer.suggest.tokenizer", "standard") .putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler") .put("index.analysis.filter.shingler.type", "shingle") .put("index.analysis.filter.shingler.min_shingle_size", 2) .put("index.analysis.filter.shingler.max_shingle_size", 5) .put("index.analysis.filter.shingler.output_unigrams", true)); XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type2") .startObject("properties") .startObject("name") .field("type", "multi_field") .startObject("fields") .startObject("name") .field("type", "string") .field("analyzer", "suggest") .endObject() .endObject() .endObject() .endObject() .endObject().endObject(); assertAcked(builder.addMapping("type2", mapping)); ensureGreen(); index("test", "type2", "1", "foo", "bar"); index("test", "type2", "2", "foo", "bar"); index("test", "type2", "3", "foo", "bar"); index("test", "type2", "4", "foo", "bar"); index("test", "type2", "5", "foo", "bar"); index("test", "type2", "1", "name", "Just testing the suggestions api"); index("test", "type2", "2", "name", "An other title about equal length"); // Note that the last document has to have about the same length as the other or cutoff rechecking will remove the useful suggestion. refresh(); // When searching on a shard with a non existing mapping, we should fail SearchRequestBuilder request = client().prepareSearch().setSize(0) .setSuggestText("tetsting sugestion") .addSuggestion(phraseSuggestion("did_you_mean").field("fielddoesnotexist").maxErrors(5.0f)); assertThrows(request, SearchPhaseExecutionException.class); // When searching on a shard which does not hold yet any document of an existing type, we should not fail SearchResponse searchResponse = client().prepareSearch().setSize(0) .setSuggestText("tetsting sugestion") .addSuggestion(phraseSuggestion("did_you_mean").field("name").maxErrors(5.0f)) .get(); ElasticsearchAssertions.assertNoFailures(searchResponse); ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions"); } @Test // see #3469 public void testEmptyShards() throws IOException, InterruptedException { XContentBuilder mappingBuilder = XContentFactory.jsonBuilder(). startObject(). startObject("type1"). startObject("properties"). startObject("name"). field("type", "multi_field"). startObject("fields"). startObject("name"). field("type", "string"). field("analyzer", "suggest"). endObject(). endObject(). endObject(). endObject(). endObject(). endObject(); assertAcked(prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put("index.analysis.analyzer.suggest.tokenizer", "standard") .putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler") .put("index.analysis.filter.shingler.type", "shingle") .put("index.analysis.filter.shingler.min_shingle_size", 2) .put("index.analysis.filter.shingler.max_shingle_size", 5) .put("index.analysis.filter.shingler.output_unigrams", true)).addMapping("type1", mappingBuilder)); ensureGreen(); index("test", "type2", "1", "foo", "bar"); index("test", "type2", "2", "foo", "bar"); index("test", "type1", "1", "name", "Just testing the suggestions api"); index("test", "type1", "2", "name", "An other title about equal length"); refresh(); SearchResponse searchResponse = client().prepareSearch() .setSize(0) .setSuggestText("tetsting sugestion") .addSuggestion(phraseSuggestion("did_you_mean").field("name").maxErrors(5.0f)) .get(); assertNoFailures(searchResponse); assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions"); } /** * Searching for a rare phrase shouldn't provide any suggestions if confidence > 1. This was possible before we rechecked the cutoff * score during the reduce phase. Failures don't occur every time - maybe two out of five tries but we don't repeat it to save time. */ @Test public void testSearchForRarePhrase() throws IOException { // If there isn't enough chaf per shard then shards can become unbalanced, making the cutoff recheck this is testing do more harm then good. int chafPerShard = 100; CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put("index.analysis.analyzer.body.tokenizer", "standard") .putArray("index.analysis.analyzer.body.filter", "lowercase", "my_shingle") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", true) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 2)); XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type1") .startObject("_all") .field("store", "yes") .field("termVector", "with_positions_offsets") .endObject() .startObject("properties") .startObject("body") .field("type", "string") .field("analyzer", "body") .endObject() .endObject() .endObject() .endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); NumShards test = getNumShards("test"); List<String> phrases = new ArrayList<>(); Collections.addAll(phrases, "nobel prize", "noble gases", "somethingelse prize", "pride and joy", "notes are fun"); for (int i = 0; i < 8; i++) { phrases.add("noble somethingelse" + i); } for (int i = 0; i < test.numPrimaries * chafPerShard; i++) { phrases.add("chaff" + i); } for (String phrase: phrases) { index("test", "type1", phrase, "body", phrase); } refresh(); Suggest searchSuggest = searchSuggest("nobel prize", phraseSuggestion("simple_phrase") .field("body") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f)) .confidence(2f) .maxErrors(5f) .size(1)); assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase"); searchSuggest = searchSuggest("noble prize", phraseSuggestion("simple_phrase") .field("body") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f)) .confidence(2f) .maxErrors(5f) .size(1)); assertSuggestion(searchSuggest, 0, 0, "simple_phrase", "nobel prize"); } /** * If the suggester finds tons of options then picking the right one is slow without <<<INSERT SOLUTION HERE>>>. */ @Test @Nightly public void suggestWithManyCandidates() throws InterruptedException, ExecutionException, IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable. .put("index.analysis.analyzer.text.tokenizer", "standard") .putArray("index.analysis.analyzer.text.filter", "lowercase", "my_shingle") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", true) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 3)); XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type1") .startObject("properties") .startObject("title") .field("type", "string") .field("analyzer", "text") .endObject() .endObject() .endObject() .endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); List<String> titles = new ArrayList<>(); // We're going to be searching for: // united states house of representatives elections in washington 2006 // But we need to make sure we generate a ton of suggestions so we add a bunch of candidates. // Many of these candidates are drawn from page names on English Wikipedia. // Tons of different options very near the exact query term titles.add("United States House of Representatives Elections in Washington 1789"); for (int year = 1790; year < 2014; year+= 2) { titles.add("United States House of Representatives Elections in Washington " + year); } // Six of these are near enough to be viable suggestions, just not the top one // But we can't stop there! Titles that are just a year are pretty common so lets just add one per year // since 0. Why not? for (int year = 0; year < 2015; year++) { titles.add(Integer.toString(year)); } // That ought to provide more less good candidates for the last term // Now remove or add plural copies of every term we can titles.add("State"); titles.add("Houses of Parliament"); titles.add("Representative Government"); titles.add("Election"); // Now some possessive titles.add("Washington's Birthday"); // And some conjugation titles.add("Unified Modeling Language"); titles.add("Unite Against Fascism"); titles.add("Stated Income Tax"); titles.add("Media organizations housed within colleges"); // And other stuff titles.add("Untied shoelaces"); titles.add("Unit circle"); titles.add("Untitled"); titles.add("Unicef"); titles.add("Unrated"); titles.add("UniRed"); titles.add("Jalan Uniten–Dengkil"); // Highway in Malaysia titles.add("UNITAS"); titles.add("UNITER"); titles.add("Un-Led-Ed"); titles.add("STATS LLC"); titles.add("Staples"); titles.add("Skates"); titles.add("Statues of the Liberators"); titles.add("Staten Island"); titles.add("Statens Museum for Kunst"); titles.add("Hause"); // The last name or the German word, whichever. titles.add("Hose"); titles.add("Hoses"); titles.add("Howse Peak"); titles.add("The Hoose-Gow"); titles.add("Hooser"); titles.add("Electron"); titles.add("Electors"); titles.add("Evictions"); titles.add("Coronal mass ejection"); titles.add("Wasington"); // A film? titles.add("Warrington"); // A town in England titles.add("Waddington"); // Lots of places have this name titles.add("Watlington"); // Ditto titles.add("Waplington"); // Yup, also a town titles.add("Washing of the Spears"); // Book for (char c = 'A'; c <= 'Z'; c++) { // Can't forget lists, glorious lists! titles.add("List of former members of the United States House of Representatives (" + c + ")"); // Lots of people are named Washington <Middle Initial>. LastName titles.add("Washington " + c + ". Lastname"); // Lets just add some more to be evil titles.add("United " + c); titles.add("States " + c); titles.add("House " + c); titles.add("Elections " + c); titles.add("2006 " + c); titles.add(c + " United"); titles.add(c + " States"); titles.add(c + " House"); titles.add(c + " Elections"); titles.add(c + " 2006"); } List<IndexRequestBuilder> builders = new ArrayList<>(); for (String title: titles) { builders.add(client().prepareIndex("test", "type1").setSource("title", title)); } indexRandom(true, builders); PhraseSuggestionBuilder suggest = phraseSuggestion("title") .field("title") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("title") .suggestMode("always") .maxTermFreq(.99f) .size(1000) // Setting a silly high size helps of generate a larger list of candidates for testing. .maxInspections(1000) // This too ) .confidence(0f) .maxErrors(2f) .shardSize(30000) .size(30000); Suggest searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", suggest); assertSuggestion(searchSuggest, 0, 0, "title", "united states house of representatives elections in washington 2006"); assertSuggestionSize(searchSuggest, 0, 25480, "title"); // Just to prove that we've run through a ton of options suggest.size(1); long start = System.currentTimeMillis(); searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", suggest); long total = System.currentTimeMillis() - start; assertSuggestion(searchSuggest, 0, 0, "title", "united states house of representatives elections in washington 2006"); // assertThat(total, lessThan(1000L)); // Takes many seconds without fix - just for debugging } @Test public void testPhraseSuggesterCollate() throws InterruptedException, ExecutionException, IOException { CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(settingsBuilder() .put(indexSettings()) .put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable. .put("index.analysis.analyzer.text.tokenizer", "standard") .putArray("index.analysis.analyzer.text.filter", "lowercase", "my_shingle") .put("index.analysis.filter.my_shingle.type", "shingle") .put("index.analysis.filter.my_shingle.output_unigrams", true) .put("index.analysis.filter.my_shingle.min_shingle_size", 2) .put("index.analysis.filter.my_shingle.max_shingle_size", 3)); XContentBuilder mapping = XContentFactory.jsonBuilder() .startObject() .startObject("type1") .startObject("properties") .startObject("title") .field("type", "string") .field("analyzer", "text") .endObject() .endObject() .endObject() .endObject(); assertAcked(builder.addMapping("type1", mapping)); ensureGreen(); List<String> titles = new ArrayList<>(); titles.add("United States House of Representatives Elections in Washington 2006"); titles.add("United States House of Representatives Elections in Washington 2005"); titles.add("State"); titles.add("Houses of Parliament"); titles.add("Representative Government"); titles.add("Election"); List<IndexRequestBuilder> builders = new ArrayList<>(); for (String title: titles) { builders.add(client().prepareIndex("test", "type1").setSource("title", title)); } indexRandom(true, builders); // suggest without collate PhraseSuggestionBuilder suggest = phraseSuggestion("title") .field("title") .addCandidateGenerator(PhraseSuggestionBuilder.candidateGenerator("title") .suggestMode("always") .maxTermFreq(.99f) .size(10) .maxInspections(200) ) .confidence(0f) .maxErrors(2f) .shardSize(30000) .size(10); Suggest searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", suggest); assertSuggestionSize(searchSuggest, 0, 10, "title"); // suggest with collate String filterString = XContentFactory.jsonBuilder() .startObject() .startObject("match_phrase") .field("{{field}}", "{{suggestion}}") .endObject() .endObject() .string(); PhraseSuggestionBuilder filteredQuerySuggest = suggest.collateQuery(filterString); filteredQuerySuggest.collateParams(Collections.singletonMap("field", (Object)"title")); searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", filteredQuerySuggest); assertSuggestionSize(searchSuggest, 0, 2, "title"); // collate suggest with no result (boundary case) searchSuggest = searchSuggest("Elections of Representatives Parliament", filteredQuerySuggest); assertSuggestionSize(searchSuggest, 0, 0, "title"); NumShards numShards = getNumShards("test"); // collate suggest with bad query String incorrectFilterString = XContentFactory.jsonBuilder() .startObject() .startObject("test") .field("title", "{{suggestion}}") .endObject() .endObject() .string(); PhraseSuggestionBuilder incorrectFilteredSuggest = suggest.collateQuery(incorrectFilterString); try { searchSuggest("united states house of representatives elections in washington 2006", numShards.numPrimaries, incorrectFilteredSuggest); fail("Post query error has been swallowed"); } catch(ElasticsearchException e) { // expected } // suggest with collation String filterStringAsFilter = XContentFactory.jsonBuilder() .startObject() .startObject("query") .startObject("match_phrase") .field("title", "{{suggestion}}") .endObject() .endObject() .endObject() .string(); PhraseSuggestionBuilder filteredFilterSuggest = suggest.collateQuery(filterStringAsFilter); searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", filteredFilterSuggest); assertSuggestionSize(searchSuggest, 0, 2, "title"); // collate suggest with bad query String filterStr = XContentFactory.jsonBuilder() .startObject() .startObject("pprefix") .field("title", "{{suggestion}}") .endObject() .endObject() .string(); PhraseSuggestionBuilder in = suggest.collateQuery(filterStr); try { searchSuggest("united states house of representatives elections in washington 2006", numShards.numPrimaries, in); fail("Post filter error has been swallowed"); } catch(ElasticsearchException e) { //expected } // collate script failure due to no additional params String collateWithParams = XContentFactory.jsonBuilder() .startObject() .startObject("{{query_type}}") .field("{{query_field}}", "{{suggestion}}") .endObject() .endObject() .string(); PhraseSuggestionBuilder phraseSuggestWithNoParams = suggest.collateQuery(collateWithParams); try { searchSuggest("united states house of representatives elections in washington 2006", numShards.numPrimaries, phraseSuggestWithNoParams); fail("Malformed query (lack of additional params) should fail"); } catch (ElasticsearchException e) { // expected } // collate script with additional params Map<String, Object> params = new HashMap<>(); params.put("query_type", "match_phrase"); params.put("query_field", "title"); PhraseSuggestionBuilder phraseSuggestWithParams = suggest.collateQuery(collateWithParams).collateParams(params); searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParams); assertSuggestionSize(searchSuggest, 0, 2, "title"); // collate query request with prune set to true PhraseSuggestionBuilder phraseSuggestWithParamsAndReturn = suggest.collateQuery(collateWithParams).collateParams(params).collatePrune(true); searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", phraseSuggestWithParamsAndReturn); assertSuggestionSize(searchSuggest, 0, 10, "title"); assertSuggestionPhraseCollateMatchExists(searchSuggest, "title", 2); } protected Suggest searchSuggest(SuggestionBuilder<?>... suggestion) { return searchSuggest(null, suggestion); } protected Suggest searchSuggest(String suggestText, SuggestionBuilder<?>... suggestions) { return searchSuggest(suggestText, 0, suggestions); } protected Suggest searchSuggest(String suggestText, int expectShardsFailed, SuggestionBuilder<?>... suggestions) { if (randomBoolean()) { SearchRequestBuilder builder = client().prepareSearch().setSize(0); if (suggestText != null) { builder.setSuggestText(suggestText); } for (SuggestionBuilder<?> suggestion : suggestions) { builder.addSuggestion(suggestion); } SearchResponse actionGet = builder.execute().actionGet(); assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed)); return actionGet.getSuggest(); } else { SuggestRequestBuilder builder = client().prepareSuggest(); if (suggestText != null) { builder.setSuggestText(suggestText); } for (SuggestionBuilder<?> suggestion : suggestions) { builder.addSuggestion(suggestion); } SuggestResponse actionGet = builder.execute().actionGet(); assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed)); if (expectShardsFailed > 0) { throw new SearchPhaseExecutionException("suggest", "Suggest execution failed", new ShardSearchFailure[0]); } return actionGet.getSuggest(); } } }