/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.search;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Before;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits;
public class MatchQueryIT extends ESIntegTestCase {
private static final String INDEX = "test";
/**
* Test setup.
*/
@Before
public void setUp() throws Exception {
super.setUp();
CreateIndexRequestBuilder builder = prepareCreate(INDEX).setSettings(
Settings.builder()
.put(indexSettings())
.put("index.analysis.filter.syns.type", "synonym")
.putArray("index.analysis.filter.syns.synonyms", "wtf, what the fudge", "foo, bar baz")
.put("index.analysis.analyzer.lower_syns.type", "custom")
.put("index.analysis.analyzer.lower_syns.tokenizer", "standard")
.putArray("index.analysis.analyzer.lower_syns.filter", "lowercase", "syns")
.put("index.analysis.filter.graphsyns.type", "synonym_graph")
.putArray("index.analysis.filter.graphsyns.synonyms", "wtf, what the fudge", "foo, bar baz")
.put("index.analysis.analyzer.lower_graphsyns.type", "custom")
.put("index.analysis.analyzer.lower_graphsyns.tokenizer", "standard")
.putArray("index.analysis.analyzer.lower_graphsyns.filter", "lowercase", "graphsyns")
);
assertAcked(builder.addMapping(INDEX, createMapping()));
ensureGreen();
}
private List<IndexRequestBuilder> getDocs() {
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("test", "test", "1").setSource("field", "say wtf happened foo"));
builders.add(client().prepareIndex("test", "test", "2").setSource("field", "bar baz what the fudge man"));
builders.add(client().prepareIndex("test", "test", "3").setSource("field", "wtf"));
builders.add(client().prepareIndex("test", "test", "4").setSource("field", "what is the name for fudge"));
builders.add(client().prepareIndex("test", "test", "5").setSource("field", "bar two three"));
builders.add(client().prepareIndex("test", "test", "6").setSource("field", "bar baz two three"));
return builders;
}
/**
* Setup the index mappings for the test index.
*
* @return the json builder with the index mappings
* @throws IOException on error creating mapping json
*/
private XContentBuilder createMapping() throws IOException {
return XContentFactory.jsonBuilder()
.startObject()
.startObject(INDEX)
.startObject("properties")
.startObject("field")
.field("type", "text")
.endObject()
.endObject()
.endObject()
.endObject();
}
public void testSimpleMultiTermPhrase() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// first search using regular synonym field using phrase
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three").analyzer("lower_syns")).get();
// because foo -> "bar baz" where "foo" and "bar" at position 0, "baz" and "two" at position 1.
// "bar two three", "bar baz three", "foo two three", "foo baz three"
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "5"); // we should not match this but we do
// same query using graph should find correct result
searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhraseQuery("field", "foo two three")
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "6");
}
public void testSimpleMultiTermAnd() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// first search using regular synonym field using phrase
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
.operator(Operator.AND).analyzer("lower_syns")).get();
// Old synonyms work fine in that case, but it is coincidental
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
// same query using graph should find correct result
searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "say what the fudge")
.operator(Operator.AND).analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
}
public void testMinShouldMatch() throws ExecutionException, InterruptedException {
indexRandom(true, false, getDocs());
// no min should match
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 6L);
assertSearchHits(searchResponse, "1", "2", "3", "4", "5", "6");
// same query, with min_should_match of 2
searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchQuery("field", "three what the fudge foo")
.operator(Operator.OR).analyzer("lower_graphsyns").minimumShouldMatch("80%")).get();
// three wtf foo = 2 terms, match #1
// three wtf bar baz = 3 terms, match #6
// three what the fudge foo = 4 terms, no match
// three what the fudge bar baz = 4 terms, match #2
assertHitCount(searchResponse, 3L);
assertSearchHits(searchResponse, "1", "2", "6");
}
public void testPhrasePrefix() throws ExecutionException, InterruptedException {
List<IndexRequestBuilder> builders = getDocs();
builders.add(client().prepareIndex("test", "test", "7").setSource("field", "WTFD!"));
builders.add(client().prepareIndex("test", "test", "8").setSource("field", "Weird Al's WHAT THE FUDGESICLE"));
indexRandom(true, false, builders);
SearchResponse searchResponse = client().prepareSearch(INDEX).setQuery(QueryBuilders.matchPhrasePrefixQuery("field", "wtf")
.analyzer("lower_graphsyns")).get();
assertHitCount(searchResponse, 5L);
assertSearchHits(searchResponse, "1", "2", "3", "7", "8");
}
public void testCommonTerms() throws ExecutionException, InterruptedException {
String route = "commonTermsTest";
List<IndexRequestBuilder> builders = getDocs();
for (IndexRequestBuilder indexRequet : builders) {
// route all docs to same shard for this test
indexRequet.setRouting(route);
}
indexRandom(true, false, builders);
// do a search with no cutoff frequency to show which docs should match
SearchResponse searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "bar three happened")
.operator(Operator.OR)).get();
assertHitCount(searchResponse, 4L);
assertSearchHits(searchResponse, "1", "2", "5", "6");
// do same search with cutoff and see less documents match
// in this case, essentially everything but "happened" gets excluded
searchResponse = client().prepareSearch(INDEX)
.setRouting(route)
.setQuery(QueryBuilders.matchQuery("field", "bar three happened")
.operator(Operator.OR).cutoffFrequency(1f)).get();
assertHitCount(searchResponse, 1L);
assertSearchHits(searchResponse, "1");
}
}