/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.suggest;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.bytes.BytesArray;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.plugins.ScriptPlugin;
import org.elasticsearch.script.CompiledScript;
import org.elasticsearch.script.ExecutableScript;
import org.elasticsearch.script.ScriptEngine;
import org.elasticsearch.script.SearchScript;
import org.elasticsearch.search.lookup.SearchLookup;
import org.elasticsearch.search.suggest.phrase.DirectCandidateGeneratorBuilder;
import org.elasticsearch.search.suggest.phrase.Laplace;
import org.elasticsearch.search.suggest.phrase.LinearInterpolation;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder;
import org.elasticsearch.search.suggest.phrase.StupidBackoff;
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder;
import org.elasticsearch.search.suggest.term.TermSuggestionBuilder.SuggestMode;
import org.elasticsearch.test.ESIntegTestCase;
import org.elasticsearch.test.hamcrest.ElasticsearchAssertions;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.search.suggest.SuggestBuilders.phraseSuggestion;
import static org.elasticsearch.search.suggest.SuggestBuilders.termSuggestion;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestion;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionPhraseCollateMatchExists;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSuggestionSize;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows;
import static org.hamcrest.Matchers.anyOf;
import static org.hamcrest.Matchers.endsWith;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.instanceOf;
import static org.hamcrest.Matchers.nullValue;
/**
* Integration tests for term and phrase suggestions. Many of these tests many requests that vary only slightly from one another. Where
* possible these tests should declare for the first request, make the request, modify the configuration for the next request, make that
* request, modify again, request again, etc. This makes it very obvious what changes between requests.
*/
public class SuggestSearchIT extends ESIntegTestCase {
// see #3196
public void testSuggestAcrossMultipleIndices() throws IOException {
createIndex("test");
ensureGreen();
index("test", "type1", "1", "text", "abcd");
index("test", "type1", "2", "text", "aacd");
index("test", "type1", "3", "text", "abbd");
index("test", "type1", "4", "text", "abcc");
refresh();
TermSuggestionBuilder termSuggest = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("abcd");
logger.info("--> run suggestions with one index");
searchSuggest("test", termSuggest);
createIndex("test_1");
ensureGreen();
index("test_1", "type1", "1", "text", "ab cd");
index("test_1", "type1", "2", "text", "aa cd");
index("test_1", "type1", "3", "text", "ab bd");
index("test_1", "type1", "4", "text", "ab cc");
refresh();
termSuggest = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("ab cd")
.minWordLength(1);
logger.info("--> run suggestions with two indices");
searchSuggest("test", termSuggest);
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("text").field("type", "text").field("analyzer", "keyword").endObject()
.endObject()
.endObject().endObject();
assertAcked(prepareCreate("test_2").addMapping("type1", mapping));
ensureGreen();
index("test_2", "type1", "1", "text", "ab cd");
index("test_2", "type1", "2", "text", "aa cd");
index("test_2", "type1", "3", "text", "ab bd");
index("test_2", "type1", "4", "text", "ab cc");
index("test_2", "type1", "1", "text", "abcd");
index("test_2", "type1", "2", "text", "aacd");
index("test_2", "type1", "3", "text", "abbd");
index("test_2", "type1", "4", "text", "abcc");
refresh();
termSuggest = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("ab cd")
.minWordLength(1);
logger.info("--> run suggestions with three indices");
try {
searchSuggest("test", termSuggest);
fail(" can not suggest across multiple indices with different analysis chains");
} catch (SearchPhaseExecutionException ex) {
assertThat(ex.getCause(), instanceOf(IllegalStateException.class));
assertThat(ex.getCause().getMessage(),
anyOf(endsWith("Suggest entries have different sizes actual [1] expected [2]"),
endsWith("Suggest entries have different sizes actual [2] expected [1]")));
} catch (IllegalStateException ex) {
assertThat(ex.getMessage(), anyOf(endsWith("Suggest entries have different sizes actual [1] expected [2]"),
endsWith("Suggest entries have different sizes actual [2] expected [1]")));
}
termSuggest = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("ABCD")
.minWordLength(1);
logger.info("--> run suggestions with four indices");
try {
searchSuggest("test", termSuggest);
fail(" can not suggest across multiple indices with different analysis chains");
} catch (SearchPhaseExecutionException ex) {
assertThat(ex.getCause(), instanceOf(IllegalStateException.class));
assertThat(ex.getCause().getMessage(), anyOf(endsWith("Suggest entries have different text actual [ABCD] expected [abcd]"),
endsWith("Suggest entries have different text actual [abcd] expected [ABCD]")));
} catch (IllegalStateException ex) {
assertThat(ex.getMessage(), anyOf(endsWith("Suggest entries have different text actual [ABCD] expected [abcd]"),
endsWith("Suggest entries have different text actual [abcd] expected [ABCD]")));
}
}
// see #3037
public void testSuggestModes() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put(SETTING_NUMBER_OF_REPLICAS, 0)
.put("index.analysis.analyzer.biword.tokenizer", "standard")
.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase")
.put("index.analysis.filter.shingler.type", "shingle")
.put("index.analysis.filter.shingler.min_shingle_size", 2)
.put("index.analysis.filter.shingler.max_shingle_size", 3));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "text")
.startObject("fields")
.startObject("shingled")
.field("type", "text")
.field("analyzer", "biword")
.field("search_analyzer", "standard")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
index("test", "type1", "1", "name", "I like iced tea");
index("test", "type1", "2", "name", "I like tea.");
index("test", "type1", "3", "name", "I like ice cream.");
refresh();
DirectCandidateGeneratorBuilder generator = candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always")
.maxEdits(2);
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("name.shingled")
.addCandidateGenerator(generator)
.gramSize(3);
Suggest searchSuggest = searchSuggest("ice tea", "did_you_mean", phraseSuggestion);
assertSuggestion(searchSuggest, 0, "did_you_mean", "iced tea");
generator.suggestMode(null);
searchSuggest = searchSuggest( "ice tea", "did_you_mean", phraseSuggestion);
assertSuggestionSize(searchSuggest, 0, 0, "did_you_mean");
}
/**
* Creates a new {@link DirectCandidateGeneratorBuilder}
*
* @param field
* the field this candidate generator operates on.
*/
private DirectCandidateGeneratorBuilder candidateGenerator(String field) {
return new DirectCandidateGeneratorBuilder(field);
}
// see #2729
public void testSizeOneShard() throws Exception {
prepareCreate("test").setSettings(
SETTING_NUMBER_OF_SHARDS, 1,
SETTING_NUMBER_OF_REPLICAS, 0).get();
ensureGreen();
for (int i = 0; i < 15; i++) {
index("test", "type1", Integer.toString(i), "text", "abc" + i);
}
refresh();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "spellchecker")).get();
assertThat("didn't ask for suggestions but got some", search.getSuggest(), nullValue());
TermSuggestionBuilder termSuggestion = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("abcd")
.size(10);
Suggest suggest = searchSuggest("test", termSuggestion);
assertSuggestion(suggest, 0, "test", 10, "abc0");
termSuggestion.text("abcd").shardSize(5);
suggest = searchSuggest("test", termSuggestion);
assertSuggestion(suggest, 0, "test", 5, "abc0");
}
public void testUnmappedField() throws IOException, InterruptedException, ExecutionException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.biword.tokenizer", "standard")
.putArray("index.analysis.analyzer.biword.filter", "shingler", "lowercase")
.put("index.analysis.filter.shingler.type", "shingle")
.put("index.analysis.filter.shingler.min_shingle_size", 2)
.put("index.analysis.filter.shingler.max_shingle_size", 3));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("name")
.field("type", "text")
.startObject("fields")
.startObject("shingled")
.field("type", "text")
.field("analyzer", "biword")
.field("search_analyzer", "standard")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
indexRandom(true, client().prepareIndex("test", "type1").setSource("name", "I like iced tea"),
client().prepareIndex("test", "type1").setSource("name", "I like tea."),
client().prepareIndex("test", "type1").setSource("name", "I like ice cream."));
refresh();
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("name.shingled")
.addCandidateGenerator(candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3);
Suggest searchSuggest = searchSuggest("ice tea", "did_you_mean", phraseSuggestion);
assertSuggestion(searchSuggest, 0, 0, "did_you_mean", "iced tea");
phraseSuggestion = phraseSuggestion("nosuchField")
.addCandidateGenerator(candidateGenerator("name").prefixLength(0).minWordLength(0).suggestMode("always").maxEdits(2))
.gramSize(3);
{
SearchRequestBuilder searchBuilder = client().prepareSearch().setSize(0);
searchBuilder.suggest(new SuggestBuilder().setGlobalText("tetsting sugestion").addSuggestion("did_you_mean", phraseSuggestion));
assertThrows(searchBuilder, SearchPhaseExecutionException.class);
}
{
SearchRequestBuilder searchBuilder = client().prepareSearch().setSize(0);
searchBuilder.suggest(new SuggestBuilder().setGlobalText("tetsting sugestion").addSuggestion("did_you_mean", phraseSuggestion));
assertThrows(searchBuilder, SearchPhaseExecutionException.class);
}
}
public void testSimple() throws Exception {
createIndex("test");
ensureGreen();
index("test", "type1", "1", "text", "abcd");
index("test", "type1", "2", "text", "aacd");
index("test", "type1", "3", "text", "abbd");
index("test", "type1", "4", "text", "abcc");
refresh();
SearchResponse search = client().prepareSearch().setQuery(matchQuery("text", "spellcecker")).get();
assertThat("didn't ask for suggestions but got some", search.getSuggest(), nullValue());
TermSuggestionBuilder termSuggest = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("abcd");
Suggest suggest = searchSuggest("test", termSuggest);
assertSuggestion(suggest, 0, "test", "aacd", "abbd", "abcc");
assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd"));
suggest = searchSuggest("test", termSuggest);
assertSuggestion(suggest, 0, "test", "aacd","abbd", "abcc");
assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd"));
}
public void testEmpty() throws Exception {
createIndex("test");
ensureGreen();
index("test", "type1", "1", "text", "bar");
refresh();
TermSuggestionBuilder termSuggest = termSuggestion("text")
.suggestMode(SuggestMode.ALWAYS) // Always, otherwise the results can vary between requests.
.text("abcd");
Suggest suggest = searchSuggest("test", termSuggest);
assertSuggestionSize(suggest, 0, 0, "test");
assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd"));
suggest = searchSuggest("test", termSuggest);
assertSuggestionSize(suggest, 0, 0, "test");
assertThat(suggest.getSuggestion("test").getEntries().get(0).getText().string(), equalTo("abcd"));
}
public void testWithMultipleCommands() throws Exception {
createIndex("test");
ensureGreen();
index("test", "typ1", "1", "field1", "prefix_abcd", "field2", "prefix_efgh");
index("test", "typ1", "2", "field1", "prefix_aacd", "field2", "prefix_eeeh");
index("test", "typ1", "3", "field1", "prefix_abbd", "field2", "prefix_efff");
index("test", "typ1", "4", "field1", "prefix_abcc", "field2", "prefix_eggg");
refresh();
Map<String, SuggestionBuilder<?>> suggestions = new HashMap<>();
suggestions.put("size1", termSuggestion("field1")
.size(1).text("prefix_abcd").maxTermFreq(10).prefixLength(1).minDocFreq(0)
.suggestMode(SuggestMode.ALWAYS));
suggestions.put("field2", termSuggestion("field2")
.text("prefix_eeeh prefix_efgh")
.maxTermFreq(10).minDocFreq(0).suggestMode(SuggestMode.ALWAYS));
suggestions.put("accuracy", termSuggestion("field2")
.text("prefix_efgh").accuracy(1f)
.maxTermFreq(10).minDocFreq(0).suggestMode(SuggestMode.ALWAYS));
Suggest suggest = searchSuggest(null, 0, suggestions);
assertSuggestion(suggest, 0, "size1", "prefix_aacd");
assertThat(suggest.getSuggestion("field2").getEntries().get(0).getText().string(), equalTo("prefix_eeeh"));
assertSuggestion(suggest, 0, "field2", "prefix_efgh");
assertThat(suggest.getSuggestion("field2").getEntries().get(1).getText().string(), equalTo("prefix_efgh"));
assertSuggestion(suggest, 1, "field2", "prefix_eeeh", "prefix_efff", "prefix_eggg");
assertSuggestionSize(suggest, 0, 0, "accuracy");
}
public void testSizeAndSort() throws Exception {
createIndex("test");
ensureGreen();
Map<String, Integer> termsAndDocCount = new HashMap<>();
termsAndDocCount.put("prefix_aaad", 20);
termsAndDocCount.put("prefix_abbb", 18);
termsAndDocCount.put("prefix_aaca", 16);
termsAndDocCount.put("prefix_abba", 14);
termsAndDocCount.put("prefix_accc", 12);
termsAndDocCount.put("prefix_addd", 10);
termsAndDocCount.put("prefix_abaa", 8);
termsAndDocCount.put("prefix_dbca", 6);
termsAndDocCount.put("prefix_cbad", 4);
termsAndDocCount.put("prefix_aacd", 1);
termsAndDocCount.put("prefix_abcc", 1);
termsAndDocCount.put("prefix_accd", 1);
for (Entry<String, Integer> entry : termsAndDocCount.entrySet()) {
for (int i = 0; i < entry.getValue(); i++) {
index("test", "type1", entry.getKey() + i, "field1", entry.getKey());
}
}
refresh();
Map<String, SuggestionBuilder<?>> suggestions = new HashMap<>();
suggestions.put("size3SortScoreFirst", termSuggestion("field1")
.size(3).minDocFreq(0).suggestMode(SuggestMode.ALWAYS));
suggestions.put("size10SortScoreFirst", termSuggestion("field1")
.size(10).minDocFreq(0).suggestMode(SuggestMode.ALWAYS).shardSize(50));
suggestions.put("size3SortScoreFirstMaxEdits1", termSuggestion("field1")
.maxEdits(1)
.size(10).minDocFreq(0).suggestMode(SuggestMode.ALWAYS));
suggestions.put("size10SortFrequencyFirst", termSuggestion("field1")
.size(10).sort(SortBy.FREQUENCY).shardSize(1000)
.minDocFreq(0).suggestMode(SuggestMode.ALWAYS));
Suggest suggest = searchSuggest("prefix_abcd", 0, suggestions);
// The commented out assertions fail sometimes because suggestions are based off of shard frequencies instead of index frequencies.
assertSuggestion(suggest, 0, "size3SortScoreFirst", "prefix_aacd", "prefix_abcc", "prefix_accd");
assertSuggestion(suggest, 0, "size10SortScoreFirst", 10, "prefix_aacd", "prefix_abcc", "prefix_accd" /*, "prefix_aaad" */);
assertSuggestion(suggest, 0, "size3SortScoreFirstMaxEdits1", "prefix_aacd", "prefix_abcc", "prefix_accd");
assertSuggestion(suggest, 0, "size10SortFrequencyFirst", "prefix_aaad", "prefix_abbb", "prefix_aaca", "prefix_abba",
"prefix_accc", "prefix_addd", "prefix_abaa", "prefix_dbca", "prefix_cbad", "prefix_aacd");
// assertThat(suggest.get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_abcc"));
// assertThat(suggest.get(3).getSuggestedWords().get("prefix_abcd").get(4).getTerm(), equalTo("prefix_accd"));
}
// see #2817
public void testStopwordsOnlyPhraseSuggest() throws IOException {
assertAcked(prepareCreate("test").addMapping("typ1", "body", "type=text,analyzer=stopwd").setSettings(
Settings.builder()
.put("index.analysis.analyzer.stopwd.tokenizer", "whitespace")
.putArray("index.analysis.analyzer.stopwd.filter", "stop")
));
ensureGreen();
index("test", "typ1", "1", "body", "this is a test");
refresh();
Suggest searchSuggest = searchSuggest( "a an the", "simple_phrase",
phraseSuggestion("body").gramSize(1)
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"))
.size(1));
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
}
public void testPrefixLength() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put("index.analysis.analyzer.reverse.tokenizer", "standard")
.putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse")
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", false)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("body").field("type", "text").field("analyzer", "body").endObject()
.startObject("body_reverse").field("type", "text").field("analyzer", "reverse").endObject()
.startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
index("test", "type1", "1", "body", "hello world");
index("test", "type1", "2", "body", "hello world");
index("test", "type1", "3", "body", "hello words");
refresh();
Suggest searchSuggest = searchSuggest( "hello word", "simple_phrase",
phraseSuggestion("body")
.addCandidateGenerator(candidateGenerator("body").prefixLength(4).minWordLength(1).suggestMode("always"))
.size(1).confidence(1.0f));
assertSuggestion(searchSuggest, 0, "simple_phrase", "hello words");
searchSuggest = searchSuggest( "hello word", "simple_phrase",
phraseSuggestion("body")
.addCandidateGenerator(candidateGenerator("body").prefixLength(2).minWordLength(1).suggestMode("always"))
.size(1).confidence(1.0f));
assertSuggestion(searchSuggest, 0, "simple_phrase", "hello world");
}
public void testBasicPhraseSuggest() throws IOException, URISyntaxException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.reverse.tokenizer", "standard")
.putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse")
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", false)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 2)
.put("index.number_of_shards", 1));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type1")
.startObject("properties")
.startObject("body").
field("type", "text").
field("analyzer", "body")
.endObject()
.startObject("body_reverse").
field("type", "text").
field("analyzer", "reverse")
.endObject()
.startObject("bigram").
field("type", "text").
field("analyzer", "bigram")
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
String[] strings = new String[]{
"Arthur, King of the Britons",
"Sir Lancelot the Brave",
"Patsy, Arthur's Servant",
"Sir Robin the Not-Quite-So-Brave-as-Sir-Lancelot",
"Sir Bedevere the Wise",
"Sir Galahad the Pure",
"Miss Islington, the Witch",
"Zoot",
"Leader of Robin's Minstrels",
"Old Crone",
"Frank, the Historian",
"Frank's Wife",
"Dr. Piglet",
"Dr. Winston",
"Sir Robin (Stand-in)",
"Knight Who Says Ni",
"Police sergeant who stops the film",
};
for (String line : strings) {
index("test", "type1", line, "body", line, "body_reverse", line, "bigram", line);
}
refresh();
PhraseSuggestionBuilder phraseSuggest = phraseSuggestion("bigram").gramSize(2).analyzer("body")
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"))
.size(1);
Suggest searchSuggest = searchSuggest( "Frank's Wise", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "frank's wife");
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(), equalTo("Frank's Wise"));
phraseSuggest.realWordErrorLikelihood(0.95f);
searchSuggest = searchSuggest( "Artur, Kinh of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// Check the "text" field this one time.
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(),
equalTo("Artur, Kinh of the Britons"));
// Ask for highlighting
phraseSuggest.highlight("<em>", "</em>");
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getOptions().get(0).getHighlighted().string(),
equalTo("<em>arthur</em> king of the <em>britons</em>"));
// pass in a correct phrase
phraseSuggest.highlight(null, null).confidence(0f).size(1).maxErrors(0.5f);
searchSuggest = searchSuggest( "Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// pass in a correct phrase - set confidence to 2
phraseSuggest.confidence(2f);
searchSuggest = searchSuggest( "Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
// pass in a correct phrase - set confidence to 0.99
phraseSuggest.confidence(0.99f);
searchSuggest = searchSuggest( "Arthur, King of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
//test reverse suggestions with pre & post filter
phraseSuggest
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"))
.addCandidateGenerator(candidateGenerator("body_reverse").minWordLength(1).suggestMode("always").preFilter("reverse")
.postFilter("reverse"));
searchSuggest = searchSuggest( "Artur, Ging of the Britons", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// set all mass to trigrams (not indexed)
phraseSuggest.clearCandidateGenerators()
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"))
.smoothingModel(new LinearInterpolation(1,0,0));
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
// set all mass to bigrams
phraseSuggest.smoothingModel(new LinearInterpolation(0,1,0));
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// distribute mass
phraseSuggest.smoothingModel(new LinearInterpolation(0.4,0.4,0.2));
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
searchSuggest = searchSuggest( "Frank's Wise", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "frank's wife");
// try all smoothing methods
phraseSuggest.smoothingModel(new LinearInterpolation(0.4,0.4,0.2));
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
phraseSuggest.smoothingModel(new Laplace(0.2));
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
phraseSuggest.smoothingModel(new StupidBackoff(0.1));
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase",phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "arthur king of the britons");
// check tokenLimit
phraseSuggest.smoothingModel(null).tokenLimit(4);
searchSuggest = searchSuggest( "Artur, King of the Britns", "simple_phrase", phraseSuggest);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
phraseSuggest.tokenLimit(15).smoothingModel(new StupidBackoff(0.1));
searchSuggest = searchSuggest( "Sir Bedever the Wife Sir Bedever the Wife Sir Bedever the Wife", "simple_phrase", phraseSuggest);
assertSuggestion(searchSuggest, 0, "simple_phrase", "sir bedevere the wise sir bedevere the wise sir bedevere the wise");
// Check the name this time because we're repeating it which is funky
assertThat(searchSuggest.getSuggestion("simple_phrase").getEntries().get(0).getText().string(),
equalTo("Sir Bedever the Wife Sir Bedever the Wife Sir Bedever the Wife"));
}
public void testSizeParam() throws IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(SETTING_NUMBER_OF_SHARDS, 1)
.put("index.analysis.analyzer.reverse.tokenizer", "standard")
.putArray("index.analysis.analyzer.reverse.filter", "lowercase", "reverse")
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", false)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("properties")
.startObject("body")
.field("type", "text")
.field("analyzer", "body")
.endObject()
.startObject("body_reverse")
.field("type", "text")
.field("analyzer", "reverse")
.endObject()
.startObject("bigram")
.field("type", "text")
.field("analyzer", "bigram")
.endObject()
.endObject()
.endObject()
.endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
String line = "xorr the god jewel";
index("test", "type1", "1", "body", line, "body_reverse", line, "bigram", line);
line = "I got it this time";
index("test", "type1", "2", "body", line, "body_reverse", line, "bigram", line);
refresh();
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("bigram")
.realWordErrorLikelihood(0.95f)
.gramSize(2)
.analyzer("body")
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).prefixLength(1).suggestMode("always").size(1)
.accuracy(0.1f))
.smoothingModel(new StupidBackoff(0.1))
.maxErrors(1.0f)
.size(5);
Suggest searchSuggest = searchSuggest("Xorr the Gut-Jewel", "simple_phrase", phraseSuggestion);
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
// we allow a size of 2 now on the shard generator level so "god" will be found since it's LD2
phraseSuggestion.clearCandidateGenerators()
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).prefixLength(1).suggestMode("always").size(2)
.accuracy(0.1f));
searchSuggest = searchSuggest( "Xorr the Gut-Jewel", "simple_phrase", phraseSuggestion);
assertSuggestion(searchSuggest, 0, "simple_phrase", "xorr the god jewel");
}
public void testPhraseBoundaryCases() throws IOException, URISyntaxException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings()).put(SETTING_NUMBER_OF_SHARDS, 1) // to get reliable statistics we should put this all into one shard
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase")
.put("index.analysis.analyzer.bigram.tokenizer", "standard")
.putArray("index.analysis.analyzer.bigram.filter", "my_shingle", "lowercase")
.put("index.analysis.analyzer.ngram.tokenizer", "standard")
.putArray("index.analysis.analyzer.ngram.filter", "my_shingle2", "lowercase")
.put("index.analysis.analyzer.myDefAnalyzer.tokenizer", "standard")
.putArray("index.analysis.analyzer.myDefAnalyzer.filter", "shingle", "lowercase")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", false)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 2)
.put("index.analysis.filter.my_shingle2.type", "shingle")
.put("index.analysis.filter.my_shingle2.output_unigrams", true)
.put("index.analysis.filter.my_shingle2.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle2.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject().startObject("type1")
.startObject("properties")
.startObject("body").field("type", "text").field("analyzer", "body").endObject()
.startObject("bigram").field("type", "text").field("analyzer", "bigram").endObject()
.startObject("ngram").field("type", "text").field("analyzer", "ngram").endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
String[] strings = new String[]{
"Xorr the God-Jewel",
"Grog the God-Crusher",
"Xorn",
"Walter Newell",
"Wanda Maximoff",
"Captain America",
"American Ace",
"Wundarr the Aquarian",
"Will o' the Wisp",
"Xemnu the Titan"
};
for (String line : strings) {
index("test", "type1", line, "body", line, "bigram", line, "ngram", line);
}
refresh();
NumShards numShards = getNumShards("test");
// Lets make sure some things throw exceptions
PhraseSuggestionBuilder phraseSuggestion = phraseSuggestion("bigram")
.analyzer("body")
.addCandidateGenerator(candidateGenerator("does_not_exist").minWordLength(1).suggestMode("always"))
.realWordErrorLikelihood(0.95f)
.maxErrors(0.5f)
.size(1);
phraseSuggestion.clearCandidateGenerators().analyzer(null);
try {
searchSuggest("xor the got-jewel", numShards.numPrimaries, Collections.singletonMap("simple_phrase", phraseSuggestion));
fail("analyzer does only produce ngrams");
} catch (SearchPhaseExecutionException e) {
}
phraseSuggestion.analyzer("bigram");
try {
searchSuggest("xor the got-jewel", numShards.numPrimaries, Collections.singletonMap("simple_phrase", phraseSuggestion));
fail("analyzer does only produce ngrams");
} catch (SearchPhaseExecutionException e) {
}
// Now we'll make sure some things don't
phraseSuggestion.forceUnigrams(false);
searchSuggest( "xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
// Field doesn't produce unigrams but the analyzer does
phraseSuggestion.forceUnigrams(true).analyzer("ngram");
searchSuggest( "xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
phraseSuggestion = phraseSuggestion("ngram")
.analyzer("myDefAnalyzer")
.forceUnigrams(true)
.realWordErrorLikelihood(0.95f)
.maxErrors(0.5f)
.size(1)
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always"));
Suggest suggest = searchSuggest( "xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
// "xorr the god jewel" and and "xorn the god jewel" have identical scores (we are only using unigrams to score), so we tie break by
// earlier term (xorn):
assertSuggestion(suggest, 0, "simple_phrase", "xorn the god jewel");
phraseSuggestion.analyzer(null);
suggest = searchSuggest( "xor the got-jewel", 0, Collections.singletonMap("simple_phrase", phraseSuggestion));
// In this case xorr has a better score than xorn because we set the field back to the default (my_shingle2) analyzer, so the
// probability that the term is not in the dictionary but is NOT a misspelling is relatively high in this case compared to the
// others that have no n-gram with the other terms in the phrase :) you can set this realWorldErrorLikelyhood
assertSuggestion(suggest, 0, "simple_phrase", "xorr the god jewel");
}
public void testDifferentShardSize() throws Exception {
createIndex("test");
ensureGreen();
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource("field1", "foobar1").setRouting("1"),
client().prepareIndex("test", "type1", "2").setSource("field1", "foobar2").setRouting("2"),
client().prepareIndex("test", "type1", "3").setSource("field1", "foobar3").setRouting("3"));
Suggest suggest = searchSuggest( "foobar", "simple",
termSuggestion("field1")
.size(10).minDocFreq(0).suggestMode(SuggestMode.ALWAYS));
ElasticsearchAssertions.assertSuggestionSize(suggest, 0, 3, "simple");
}
// see #3469
public void testShardFailures() throws IOException, InterruptedException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.suggest.tokenizer", "standard")
.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler")
.put("index.analysis.filter.shingler.type", "shingle")
.put("index.analysis.filter.shingler.min_shingle_size", 2)
.put("index.analysis.filter.shingler.max_shingle_size", 5)
.put("index.analysis.filter.shingler.output_unigrams", true));
XContentBuilder mapping = XContentFactory.jsonBuilder().startObject().startObject("type2")
.startObject("properties")
.startObject("name")
.field("type", "text")
.field("analyzer", "suggest")
.endObject()
.endObject()
.endObject().endObject();
assertAcked(builder.addMapping("type2", mapping));
ensureGreen();
index("test", "type2", "1", "foo", "bar");
index("test", "type2", "2", "foo", "bar");
index("test", "type2", "3", "foo", "bar");
index("test", "type2", "4", "foo", "bar");
index("test", "type2", "5", "foo", "bar");
index("test", "type2", "1", "name", "Just testing the suggestions api");
index("test", "type2", "2", "name", "An other title about equal length");
// Note that the last document has to have about the same length as the other or cutoff rechecking will remove the useful suggestion
refresh();
// When searching on a shard with a non existing mapping, we should fail
SearchRequestBuilder request = client().prepareSearch().setSize(0)
.suggest(
new SuggestBuilder().setGlobalText("tetsting sugestion").addSuggestion("did_you_mean",
phraseSuggestion("fielddoesnotexist").maxErrors(5.0f)));
assertThrows(request, SearchPhaseExecutionException.class);
// When searching on a shard which does not hold yet any document of an existing type, we should not fail
SearchResponse searchResponse = client().prepareSearch().setSize(0)
.suggest(
new SuggestBuilder().setGlobalText("tetsting sugestion").addSuggestion("did_you_mean",
phraseSuggestion("name").maxErrors(5.0f)))
.get();
ElasticsearchAssertions.assertNoFailures(searchResponse);
ElasticsearchAssertions.assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
}
// see #3469
public void testEmptyShards() throws IOException, InterruptedException {
XContentBuilder mappingBuilder = XContentFactory.jsonBuilder().
startObject().
startObject("type1").
startObject("properties").
startObject("name").
field("type", "text").
field("analyzer", "suggest").
endObject().
endObject().
endObject().
endObject();
assertAcked(prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.suggest.tokenizer", "standard")
.putArray("index.analysis.analyzer.suggest.filter", "standard", "lowercase", "shingler")
.put("index.analysis.filter.shingler.type", "shingle")
.put("index.analysis.filter.shingler.min_shingle_size", 2)
.put("index.analysis.filter.shingler.max_shingle_size", 5)
.put("index.analysis.filter.shingler.output_unigrams", true)).addMapping("type1", mappingBuilder));
ensureGreen();
index("test", "type1", "11", "foo", "bar");
index("test", "type1", "12", "foo", "bar");
index("test", "type1", "1", "name", "Just testing the suggestions api");
index("test", "type1", "2", "name", "An other title about equal length");
refresh();
SearchResponse searchResponse = client().prepareSearch()
.setSize(0)
.suggest(
new SuggestBuilder().setGlobalText("tetsting sugestion").addSuggestion("did_you_mean",
phraseSuggestion("name").maxErrors(5.0f)))
.get();
assertNoFailures(searchResponse);
assertSuggestion(searchResponse.getSuggest(), 0, 0, "did_you_mean", "testing suggestions");
}
/**
* Searching for a rare phrase shouldn't provide any suggestions if confidence > 1. This was possible before we rechecked the cutoff
* score during the reduce phase. Failures don't occur every time - maybe two out of five tries but we don't repeat it to save time.
*/
public void testSearchForRarePhrase() throws IOException {
// If there isn't enough chaf per shard then shards can become unbalanced, making the cutoff recheck this is testing do more harm
// then good.
int chafPerShard = 100;
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put("index.analysis.analyzer.body.tokenizer", "standard")
.putArray("index.analysis.analyzer.body.filter", "lowercase", "my_shingle")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", true)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 2));
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("properties")
.startObject("body")
.field("type", "text")
.field("analyzer", "body")
.endObject()
.endObject()
.endObject()
.endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
NumShards test = getNumShards("test");
List<String> phrases = new ArrayList<>();
Collections.addAll(phrases, "nobel prize", "noble gases", "somethingelse prize", "pride and joy", "notes are fun");
for (int i = 0; i < 8; i++) {
phrases.add("noble somethingelse" + i);
}
for (int i = 0; i < test.numPrimaries * chafPerShard; i++) {
phrases.add("chaff" + i);
}
for (String phrase: phrases) {
index("test", "type1", phrase, "body", phrase);
}
refresh();
Suggest searchSuggest = searchSuggest("nobel prize", "simple_phrase", phraseSuggestion("body")
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f))
.confidence(2f)
.maxErrors(5f)
.size(1));
assertSuggestionSize(searchSuggest, 0, 0, "simple_phrase");
searchSuggest = searchSuggest("noble prize", "simple_phrase", phraseSuggestion("body")
.addCandidateGenerator(candidateGenerator("body").minWordLength(1).suggestMode("always").maxTermFreq(.99f))
.confidence(2f)
.maxErrors(5f)
.size(1));
assertSuggestion(searchSuggest, 0, 0, "simple_phrase", "nobel prize");
}
public void testSuggestWithManyCandidates() throws InterruptedException, ExecutionException, IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
.put("index.analysis.analyzer.text.tokenizer", "standard")
.putArray("index.analysis.analyzer.text.filter", "lowercase", "my_shingle")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", true)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 3));
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("properties")
.startObject("title")
.field("type", "text")
.field("analyzer", "text")
.endObject()
.endObject()
.endObject()
.endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
List<String> titles = new ArrayList<>();
// We're going to be searching for:
// united states house of representatives elections in washington 2006
// But we need to make sure we generate a ton of suggestions so we add a bunch of candidates.
// Many of these candidates are drawn from page names on English Wikipedia.
// Tons of different options very near the exact query term
titles.add("United States House of Representatives Elections in Washington 1789");
for (int year = 1790; year < 2014; year+= 2) {
titles.add("United States House of Representatives Elections in Washington " + year);
}
// Six of these are near enough to be viable suggestions, just not the top one
// But we can't stop there! Titles that are just a year are pretty common so lets just add one per year
// since 0. Why not?
for (int year = 0; year < 2015; year++) {
titles.add(Integer.toString(year));
}
// That ought to provide more less good candidates for the last term
// Now remove or add plural copies of every term we can
titles.add("State");
titles.add("Houses of Parliament");
titles.add("Representative Government");
titles.add("Election");
// Now some possessive
titles.add("Washington's Birthday");
// And some conjugation
titles.add("Unified Modeling Language");
titles.add("Unite Against Fascism");
titles.add("Stated Income Tax");
titles.add("Media organizations housed within colleges");
// And other stuff
titles.add("Untied shoelaces");
titles.add("Unit circle");
titles.add("Untitled");
titles.add("Unicef");
titles.add("Unrated");
titles.add("UniRed");
titles.add("Jalan Uniten–Dengkil"); // Highway in Malaysia
titles.add("UNITAS");
titles.add("UNITER");
titles.add("Un-Led-Ed");
titles.add("STATS LLC");
titles.add("Staples");
titles.add("Skates");
titles.add("Statues of the Liberators");
titles.add("Staten Island");
titles.add("Statens Museum for Kunst");
titles.add("Hause"); // The last name or the German word, whichever.
titles.add("Hose");
titles.add("Hoses");
titles.add("Howse Peak");
titles.add("The Hoose-Gow");
titles.add("Hooser");
titles.add("Electron");
titles.add("Electors");
titles.add("Evictions");
titles.add("Coronal mass ejection");
titles.add("Wasington"); // A film?
titles.add("Warrington"); // A town in England
titles.add("Waddington"); // Lots of places have this name
titles.add("Watlington"); // Ditto
titles.add("Waplington"); // Yup, also a town
titles.add("Washing of the Spears"); // Book
for (char c = 'A'; c <= 'Z'; c++) {
// Can't forget lists, glorious lists!
titles.add("List of former members of the United States House of Representatives (" + c + ")");
// Lots of people are named Washington <Middle Initial>. LastName
titles.add("Washington " + c + ". Lastname");
// Lets just add some more to be evil
titles.add("United " + c);
titles.add("States " + c);
titles.add("House " + c);
titles.add("Elections " + c);
titles.add("2006 " + c);
titles.add(c + " United");
titles.add(c + " States");
titles.add(c + " House");
titles.add(c + " Elections");
titles.add(c + " 2006");
}
List<IndexRequestBuilder> builders = new ArrayList<>();
for (String title: titles) {
builders.add(client().prepareIndex("test", "type1").setSource("title", title));
}
indexRandom(true, builders);
PhraseSuggestionBuilder suggest = phraseSuggestion("title")
.addCandidateGenerator(candidateGenerator("title")
.suggestMode("always")
.maxTermFreq(.99f)
.size(1000) // Setting a silly high size helps of generate a larger list of candidates for testing.
.maxInspections(1000) // This too
)
.confidence(0f)
.maxErrors(2f)
.shardSize(30000)
.size(30000);
Suggest searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title", suggest);
assertSuggestion(searchSuggest, 0, 0, "title", "united states house of representatives elections in washington 2006");
assertSuggestionSize(searchSuggest, 0, 25480, "title"); // Just to prove that we've run through a ton of options
suggest.size(1);
long start = System.currentTimeMillis();
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title", suggest);
long total = System.currentTimeMillis() - start;
assertSuggestion(searchSuggest, 0, 0, "title", "united states house of representatives elections in washington 2006");
// assertThat(total, lessThan(1000L)); // Takes many seconds without fix - just for debugging
}
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Collections.singleton(DummyTemplatePlugin.class);
}
public static class DummyTemplatePlugin extends Plugin implements ScriptPlugin {
@Override
public ScriptEngine getScriptEngine(Settings settings) {
return new DummyTemplateScriptEngine();
}
}
public static class DummyTemplateScriptEngine implements ScriptEngine {
// The collate query setter is hard coded to use mustache, so lets lie in this test about the script plugin,
// which makes the collate code thinks mustache is evaluating the query.
public static final String NAME = "mustache";
@Override
public void close() throws IOException {
}
@Override
public String getType() {
return NAME;
}
@Override
public String getExtension() {
return NAME;
}
@Override
public Object compile(String scriptName, String scriptSource, Map<String, String> params) {
return scriptSource;
}
@Override
public ExecutableScript executable(CompiledScript compiledScript, Map<String, Object> params) {
String script = (String) compiledScript.compiled();
for (Entry<String, Object> entry : params.entrySet()) {
script = script.replace("{{" + entry.getKey() + "}}", String.valueOf(entry.getValue()));
}
String result = script;
return new ExecutableScript() {
@Override
public void setNextVar(String name, Object value) {
throw new UnsupportedOperationException("setNextVar not supported");
}
@Override
public Object run() {
return new BytesArray(result);
}
};
}
@Override
public SearchScript search(CompiledScript compiledScript, SearchLookup lookup, Map<String, Object> vars) {
throw new UnsupportedOperationException("search script not supported");
}
@Override
public boolean isInlineScriptEnabled() {
return true;
}
}
public void testPhraseSuggesterCollate() throws InterruptedException, ExecutionException, IOException {
CreateIndexRequestBuilder builder = prepareCreate("test").setSettings(Settings.builder()
.put(indexSettings())
.put(SETTING_NUMBER_OF_SHARDS, 1) // A single shard will help to keep the tests repeatable.
.put("index.analysis.analyzer.text.tokenizer", "standard")
.putArray("index.analysis.analyzer.text.filter", "lowercase", "my_shingle")
.put("index.analysis.filter.my_shingle.type", "shingle")
.put("index.analysis.filter.my_shingle.output_unigrams", true)
.put("index.analysis.filter.my_shingle.min_shingle_size", 2)
.put("index.analysis.filter.my_shingle.max_shingle_size", 3));
XContentBuilder mapping = XContentFactory.jsonBuilder()
.startObject()
.startObject("type1")
.startObject("properties")
.startObject("title")
.field("type", "text")
.field("analyzer", "text")
.endObject()
.endObject()
.endObject()
.endObject();
assertAcked(builder.addMapping("type1", mapping));
ensureGreen();
List<String> titles = new ArrayList<>();
titles.add("United States House of Representatives Elections in Washington 2006");
titles.add("United States House of Representatives Elections in Washington 2005");
titles.add("State");
titles.add("Houses of Parliament");
titles.add("Representative Government");
titles.add("Election");
List<IndexRequestBuilder> builders = new ArrayList<>();
for (String title: titles) {
builders.add(client().prepareIndex("test", "type1").setSource("title", title));
}
indexRandom(true, builders);
// suggest without collate
PhraseSuggestionBuilder suggest = phraseSuggestion("title")
.addCandidateGenerator(new DirectCandidateGeneratorBuilder("title")
.suggestMode("always")
.maxTermFreq(.99f)
.size(10)
.maxInspections(200)
)
.confidence(0f)
.maxErrors(2f)
.shardSize(30000)
.size(10);
Suggest searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title", suggest);
assertSuggestionSize(searchSuggest, 0, 10, "title");
// suggest with collate
String filterString = XContentFactory.jsonBuilder()
.startObject()
.startObject("match_phrase")
.field("{{field}}", "{{suggestion}}")
.endObject()
.endObject()
.string();
PhraseSuggestionBuilder filteredQuerySuggest = suggest.collateQuery(filterString);
filteredQuerySuggest.collateParams(Collections.singletonMap("field", "title"));
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title", filteredQuerySuggest);
assertSuggestionSize(searchSuggest, 0, 2, "title");
// collate suggest with no result (boundary case)
searchSuggest = searchSuggest("Elections of Representatives Parliament", "title", filteredQuerySuggest);
assertSuggestionSize(searchSuggest, 0, 0, "title");
NumShards numShards = getNumShards("test");
// collate suggest with bad query
String incorrectFilterString = XContentFactory.jsonBuilder()
.startObject()
.startObject("test")
.field("title", "{{suggestion}}")
.endObject()
.endObject()
.string();
PhraseSuggestionBuilder incorrectFilteredSuggest = suggest.collateQuery(incorrectFilterString);
Map<String, SuggestionBuilder<?>> namedSuggestion = new HashMap<>();
namedSuggestion.put("my_title_suggestion", incorrectFilteredSuggest);
try {
searchSuggest("united states house of representatives elections in washington 2006", numShards.numPrimaries, namedSuggestion);
fail("Post query error has been swallowed");
} catch(ElasticsearchException e) {
// expected
}
// suggest with collation
String filterStringAsFilter = XContentFactory.jsonBuilder()
.startObject()
.startObject("match_phrase")
.field("title", "{{suggestion}}")
.endObject()
.endObject()
.string();
PhraseSuggestionBuilder filteredFilterSuggest = suggest.collateQuery(filterStringAsFilter);
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title",
filteredFilterSuggest);
assertSuggestionSize(searchSuggest, 0, 2, "title");
// collate suggest with bad query
String filterStr = XContentFactory.jsonBuilder()
.startObject()
.startObject("pprefix")
.field("title", "{{suggestion}}")
.endObject()
.endObject()
.string();
PhraseSuggestionBuilder in = suggest.collateQuery(filterStr);
try {
searchSuggest("united states house of representatives elections in washington 2006", numShards.numPrimaries, namedSuggestion);
fail("Post filter error has been swallowed");
} catch(ElasticsearchException e) {
//expected
}
// collate script failure due to no additional params
String collateWithParams = XContentFactory.jsonBuilder()
.startObject()
.startObject("{{query_type}}")
.field("{{query_field}}", "{{suggestion}}")
.endObject()
.endObject()
.string();
PhraseSuggestionBuilder phraseSuggestWithNoParams = suggest.collateQuery(collateWithParams);
try {
searchSuggest("united states house of representatives elections in washington 2006", numShards.numPrimaries, namedSuggestion);
fail("Malformed query (lack of additional params) should fail");
} catch (ElasticsearchException e) {
// expected
}
// collate script with additional params
Map<String, Object> params = new HashMap<>();
params.put("query_type", "match_phrase");
params.put("query_field", "title");
PhraseSuggestionBuilder phraseSuggestWithParams = suggest.collateQuery(collateWithParams).collateParams(params);
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title",
phraseSuggestWithParams);
assertSuggestionSize(searchSuggest, 0, 2, "title");
// collate query request with prune set to true
PhraseSuggestionBuilder phraseSuggestWithParamsAndReturn = suggest.collateQuery(collateWithParams).collateParams(params)
.collatePrune(true);
searchSuggest = searchSuggest("united states house of representatives elections in washington 2006", "title",
phraseSuggestWithParamsAndReturn);
assertSuggestionSize(searchSuggest, 0, 10, "title");
assertSuggestionPhraseCollateMatchExists(searchSuggest, "title", 2);
}
protected Suggest searchSuggest(String name, SuggestionBuilder<?> suggestion) {
return searchSuggest(null, name, suggestion);
}
protected Suggest searchSuggest(String suggestText, String name, SuggestionBuilder<?> suggestion) {
Map<String, SuggestionBuilder<?>> map = new HashMap<>();
map.put(name, suggestion);
return searchSuggest(suggestText, 0, map);
}
protected Suggest searchSuggest(String suggestText, int expectShardsFailed, Map<String, SuggestionBuilder<?>> suggestions) {
SearchRequestBuilder builder = client().prepareSearch().setSize(0);
SuggestBuilder suggestBuilder = new SuggestBuilder();
if (suggestText != null) {
suggestBuilder.setGlobalText(suggestText);
}
for (Entry<String, SuggestionBuilder<?>> suggestion : suggestions.entrySet()) {
suggestBuilder.addSuggestion(suggestion.getKey(), suggestion.getValue());
}
builder.suggest(suggestBuilder);
SearchResponse actionGet = builder.execute().actionGet();
assertThat(Arrays.toString(actionGet.getShardFailures()), actionGet.getFailedShards(), equalTo(expectShardsFailed));
return actionGet.getSuggest();
}
}