/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.morelikethis;
import org.elasticsearch.cluster.health.ClusterHealthStatus;
import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.action.search.SearchPhaseExecutionException;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.client.Client;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentFactory;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder;
import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.test.ESIntegTestCase;
import org.junit.Test;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutionException;
import static org.elasticsearch.client.Requests.*;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS;
import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery;
import static org.elasticsearch.index.query.QueryBuilders.termQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.*;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.notNullValue;
/**
*
*/
public class MoreLikeThisIT extends ESIntegTestCase {
@Test
public void testSimpleMoreLikeThis() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("text", "lucene").endObject())).actionGet();
client().index(indexRequest("test").type("type1").id("2").source(jsonBuilder().startObject().field("text", "lucene release").endObject())).actionGet();
client().admin().indices().refresh(refreshRequest()).actionGet();
logger.info("Running moreLikeThis");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
}
@Test
public void testSimpleMoreLikeOnLongField() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1", "some_long", "type=long"));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("some_long", 1367484649580l).endObject())).actionGet();
client().index(indexRequest("test").type("type2").id("2").source(jsonBuilder().startObject().field("some_long", 0).endObject())).actionGet();
client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("some_long", -666).endObject())).actionGet();
client().admin().indices().refresh(refreshRequest()).actionGet();
logger.info("Running moreLikeThis");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 0l);
}
@Test
public void testMoreLikeThisWithAliases() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Creating aliases alias release");
client().admin().indices().aliases(indexAliasesRequest().addAlias("release", termQuery("text", "release"), "test")).actionGet();
client().admin().indices().aliases(indexAliasesRequest().addAlias("beta", termQuery("text", "beta"), "test")).actionGet();
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("text", "lucene beta").endObject())).actionGet();
client().index(indexRequest("test").type("type1").id("2").source(jsonBuilder().startObject().field("text", "lucene release").endObject())).actionGet();
client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("text", "elasticsearch beta").endObject())).actionGet();
client().index(indexRequest("test").type("type1").id("4").source(jsonBuilder().startObject().field("text", "elasticsearch release").endObject())).actionGet();
client().admin().indices().refresh(refreshRequest()).actionGet();
logger.info("Running moreLikeThis on index");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 2l);
logger.info("Running moreLikeThis on beta shard");
response = client().prepareSearch("beta").setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
logger.info("Running moreLikeThis on release shard");
response = client().prepareSearch("release").setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
assertThat(response.getHits().getAt(0).id(), equalTo("2"));
logger.info("Running moreLikeThis on alias with node client");
response = internalCluster().clientNodeClient().prepareSearch("beta").setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(response, 1l);
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
}
// Issue #14944
public void testMoreLikeThisWithAliasesInLikeDocuments() throws Exception {
String indexName = "foo";
String aliasName = "foo_name";
String typeName = "bar";
String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar")
.startObject("properties")
.endObject()
.endObject().endObject().string();
client().admin().indices().prepareCreate(indexName).addMapping(typeName, mapping).execute().actionGet();
client().admin().indices().aliases(indexAliasesRequest().addAlias(aliasName, indexName)).actionGet();
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
client().index(indexRequest(indexName).type(typeName).id("1").source(jsonBuilder().startObject().field("text", "elasticsearch index").endObject())).actionGet();
client().index(indexRequest(indexName).type(typeName).id("2").source(jsonBuilder().startObject().field("text", "lucene index").endObject())).actionGet();
client().index(indexRequest(indexName).type(typeName).id("3").source(jsonBuilder().startObject().field("text", "elasticsearch index").endObject())).actionGet();
refresh();
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().like(new MoreLikeThisQueryBuilder.Item(aliasName, typeName, "1")).minTermFreq(1).minDocFreq(1))
.get();
assertHitCount(response, 2L);
assertThat(response.getHits().getAt(0).id(), equalTo("3"));
}
@Test
public void testMoreLikeThisIssue2197() throws Exception {
Client client = client();
String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar")
.startObject("properties")
.endObject()
.endObject().endObject().string();
client().admin().indices().prepareCreate("foo").addMapping("bar", mapping).execute().actionGet();
client().prepareIndex("foo", "bar", "1")
.setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject())
.execute().actionGet();
client().admin().indices().prepareRefresh("foo").execute().actionGet();
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
assertNoFailures(response);
assertThat(response, notNullValue());
response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1"))).get();
assertNoFailures(response);
assertThat(response, notNullValue());
}
@Test
// See: https://github.com/elasticsearch/elasticsearch/issues/2489
public void testMoreLikeWithCustomRouting() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar")
.startObject("properties")
.endObject()
.endObject().endObject().string();
client().admin().indices().prepareCreate("foo").addMapping("bar", mapping).execute().actionGet();
ensureGreen();
client().prepareIndex("foo", "bar", "1")
.setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject())
.setRouting("2")
.execute().actionGet();
client().admin().indices().prepareRefresh("foo").execute().actionGet();
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("2"))).get();
assertNoFailures(response);
assertThat(response, notNullValue());
}
@Test
// See issue: https://github.com/elasticsearch/elasticsearch/issues/3039
public void testMoreLikeThisIssueRoutingNotSerialized() throws Exception {
String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar")
.startObject("properties")
.endObject()
.endObject().endObject().string();
assertAcked(prepareCreate("foo", 2,
Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 2).put(SETTING_NUMBER_OF_REPLICAS, 0))
.addMapping("bar", mapping));
ensureGreen();
client().prepareIndex("foo", "bar", "1")
.setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject())
.setRouting("4000")
.execute().actionGet();
client().admin().indices().prepareRefresh("foo").execute().actionGet();
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("foo", "bar", "1").routing("4000"))).get();
assertNoFailures(response);
assertThat(response, notNullValue());
}
@Test
// See issue https://github.com/elasticsearch/elasticsearch/issues/3252
public void testNumericField() throws Exception {
final String[] numericTypes = new String[]{"byte", "short", "integer", "long"};
prepareCreate("test").addMapping("type", jsonBuilder()
.startObject().startObject("type")
.startObject("properties")
.startObject("int_value").field("type", randomFrom(numericTypes)).endObject()
.startObject("string_value").field("type", "string").endObject()
.endObject()
.endObject().endObject()).execute().actionGet();
ensureGreen();
client().prepareIndex("test", "type", "1")
.setSource(jsonBuilder().startObject().field("string_value", "lucene index").field("int_value", 1).endObject())
.execute().actionGet();
client().prepareIndex("test", "type", "2")
.setSource(jsonBuilder().startObject().field("string_value", "elasticsearch index").field("int_value", 42).endObject())
.execute().actionGet();
refresh();
// Implicit list of fields -> ignore numeric fields
SearchResponse searchResponse = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)).get();
assertHitCount(searchResponse, 1l);
// Explicit list of fields including numeric fields -> fail
assertThrows(client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder("string_value", "int_value").addLikeItem(new Item("test", "type", "1")).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
// mlt query with no field -> OK
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery().likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
assertHitCount(searchResponse, 2l);
// mlt query with string fields
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery("string_value").likeText("index").minTermFreq(1).minDocFreq(1)).execute().actionGet();
assertHitCount(searchResponse, 2l);
// mlt query with at least a numeric field -> fail by default
assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery("string_value", "int_value").likeText("index")), SearchPhaseExecutionException.class);
// mlt query with at least a numeric field -> fail by command
assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery("string_value", "int_value").likeText("index").failOnUnsupportedField(true)), SearchPhaseExecutionException.class);
// mlt query with at least a numeric field but fail_on_unsupported_field set to false
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery("string_value", "int_value").likeText("index").minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).get();
assertHitCount(searchResponse, 2l);
// mlt field query on a numeric field -> failure by default
assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery("int_value").likeText("42").minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class);
// mlt field query on a numeric field -> failure by command
assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery("int_value").likeText("42").minTermFreq(1).minDocFreq(1).failOnUnsupportedField(true)),
SearchPhaseExecutionException.class);
// mlt field query on a numeric field but fail_on_unsupported_field set to false
searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery("int_value").likeText("42").minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).execute().actionGet();
assertHitCount(searchResponse, 0l);
}
@Test
public void testSimpleMoreLikeInclude() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
client().index(indexRequest("test").type("type1").id("1").source(
jsonBuilder().startObject()
.field("text", "Apache Lucene is a free/open source information retrieval software library").endObject()))
.actionGet();
client().index(indexRequest("test").type("type1").id("2").source(
jsonBuilder().startObject()
.field("text", "Lucene has been ported to other programming languages").endObject()))
.actionGet();
client().admin().indices().refresh(refreshRequest()).actionGet();
logger.info("Running More Like This with include true");
SearchResponse response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
assertOrderedSearchHits(response, "1", "2");
response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "2")).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get();
assertOrderedSearchHits(response, "2", "1");
logger.info("Running More Like This with include false");
response = client().prepareSearch().setQuery(
new MoreLikeThisQueryBuilder().addLikeItem(new Item("test", "type1", "1")).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get();
assertSearchHits(response, "2");
}
public void testSimpleMoreLikeThisIds() throws Exception {
logger.info("Creating index test");
assertAcked(prepareCreate("test").addMapping("type1",
jsonBuilder().startObject().startObject("type1").startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject()));
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
List<IndexRequestBuilder> builders = new ArrayList<>();
builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene").setId("1"));
builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene release").setId("2"));
builders.add(client().prepareIndex("test", "type1").setSource("text", "apache lucene").setId("3"));
indexRandom(true, builders);
logger.info("Running MoreLikeThis");
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").ids("1").include(true).minTermFreq(1).minDocFreq(1);
SearchResponse mltResponse = client().prepareSearch().setTypes("type1").setQuery(queryBuilder).execute().actionGet();
assertHitCount(mltResponse, 3l);
}
@Test
public void testSimpleMoreLikeThisIdsMultipleTypes() throws Exception {
logger.info("Creating index test");
int numOfTypes = randomIntBetween(2, 10);
CreateIndexRequestBuilder createRequestBuilder = prepareCreate("test");
for (int i = 0; i < numOfTypes; i++) {
createRequestBuilder.addMapping("type" + i, jsonBuilder().startObject().startObject("type" + i).startObject("properties")
.startObject("text").field("type", "string").endObject()
.endObject().endObject().endObject());
}
assertAcked(createRequestBuilder);
logger.info("Running Cluster Health");
assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN));
logger.info("Indexing...");
List<IndexRequestBuilder> builders = new ArrayList<>(numOfTypes);
for (int i = 0; i < numOfTypes; i++) {
builders.add(client().prepareIndex("test", "type" + i).setSource("text", "lucene" + " " + i).setId(String.valueOf(i)));
}
indexRandom(true, builders);
logger.info("Running MoreLikeThis");
MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery("text").include(true).minTermFreq(1).minDocFreq(1)
.addLikeItem(new Item("test", "type0", "0"));
String[] types = new String[numOfTypes];
for (int i = 0; i < numOfTypes; i++) {
types[i] = "type"+i;
}
SearchResponse mltResponse = client().prepareSearch().setTypes(types).setQuery(queryBuilder).execute().actionGet();
assertHitCount(mltResponse, numOfTypes);
}
@Test
public void testMoreLikeThisMultiValueFields() throws Exception {
logger.info("Creating the index ...");
assertAcked(prepareCreate("test")
.addMapping("type1", "text", "type=string,analyzer=keyword")
.setSettings(SETTING_NUMBER_OF_SHARDS, 1));
ensureGreen();
logger.info("Indexing ...");
String[] values = {"aaaa", "bbbb", "cccc", "dddd", "eeee", "ffff", "gggg", "hhhh", "iiii", "jjjj"};
List<IndexRequestBuilder> builders = new ArrayList<>(values.length + 1);
// index one document with all the values
builders.add(client().prepareIndex("test", "type1", "0").setSource("text", values));
// index each document with only one of the values
for (int i = 0; i < values.length; i++) {
builders.add(client().prepareIndex("test", "type1", String.valueOf(i + 1)).setSource("text", values[i]));
}
indexRandom(true, builders);
int maxIters = randomIntBetween(10, 20);
for (int i = 0; i < maxIters; i++) {
int max_query_terms = randomIntBetween(1, values.length);
logger.info("Running More Like This with max_query_terms = %s", max_query_terms);
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("text").ids("0").minTermFreq(1).minDocFreq(1)
.maxQueryTerms(max_query_terms).minimumShouldMatch("0%");
SearchResponse response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).execute().actionGet();
assertSearchResponse(response);
assertHitCount(response, max_query_terms);
}
}
@Test
public void testMinimumShouldMatch() throws ExecutionException, InterruptedException {
logger.info("Creating the index ...");
assertAcked(prepareCreate("test")
.addMapping("type1", "text", "type=string,analyzer=whitespace")
.setSettings(SETTING_NUMBER_OF_SHARDS, 1));
ensureGreen();
logger.info("Indexing with each doc having one less term ...");
List<IndexRequestBuilder> builders = new ArrayList<>();
for (int i = 0; i < 10; i++) {
String text = "";
for (int j = 1; j <= 10 - i; j++) {
text += j + " ";
}
builders.add(client().prepareIndex("test", "type1", i + "").setSource("text", text));
}
indexRandom(true, builders);
logger.info("Testing each minimum_should_match from 0% - 100% with 10% increment ...");
for (int i = 0; i <= 10; i++) {
String minimumShouldMatch = (10 * i) + "%";
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery("text")
.likeText("1 2 3 4 5 6 7 8 9 10")
.minTermFreq(1)
.minDocFreq(1)
.minimumShouldMatch(minimumShouldMatch);
logger.info("Testing with minimum_should_match = " + minimumShouldMatch);
SearchResponse response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
if (minimumShouldMatch.equals("0%")) {
assertHitCount(response, 10);
} else {
assertHitCount(response, 11 - i);
}
}
}
@Test
public void testMoreLikeThisArtificialDocs() throws Exception {
int numFields = randomIntBetween(5, 10);
createIndex("test");
ensureGreen();
logger.info("Indexing a single document ...");
XContentBuilder doc = jsonBuilder().startObject();
for (int i = 0; i < numFields; i++) {
doc.field("field" + i, generateRandomStringArray(5, 10, false) + "a"); // make sure they are not all empty
}
doc.endObject();
indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc));
logger.info("Checking the document matches ...");
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", doc).routing("0")) // routing to ensure we hit the shard with the doc
.minTermFreq(0)
.minDocFreq(0)
.maxQueryTerms(100)
.minimumShouldMatch("100%"); // strict all terms must match!
SearchResponse response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 1);
}
@Test
public void testMoreLikeThisMalformedArtificialDocs() throws Exception {
logger.info("Creating the index ...");
assertAcked(prepareCreate("test")
.addMapping("type1", "text", "type=string,analyzer=whitespace", "date", "type=date"));
ensureGreen("test");
logger.info("Creating an index with a single document ...");
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder()
.startObject()
.field("text", "Hello World!")
.field("date", "2009-01-01")
.endObject()));
logger.info("Checking with a malformed field value ...");
XContentBuilder malformedFieldDoc = jsonBuilder()
.startObject()
.field("text", "Hello World!")
.field("date", "this is not a date!")
.endObject();
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", malformedFieldDoc))
.minTermFreq(0)
.minDocFreq(0)
.minimumShouldMatch("0%");
SearchResponse response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 0);
logger.info("Checking with an empty document ...");
XContentBuilder emptyDoc = jsonBuilder().startObject().endObject();
mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", emptyDoc))
.minTermFreq(0)
.minDocFreq(0)
.minimumShouldMatch("0%");
response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 0);
logger.info("Checking when document is malformed ...");
XContentBuilder malformedDoc = jsonBuilder().startObject();
mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", malformedDoc))
.minTermFreq(0)
.minDocFreq(0)
.minimumShouldMatch("0%");
response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 0);
logger.info("Checking the document matches otherwise ...");
XContentBuilder normalDoc = jsonBuilder()
.startObject()
.field("text", "Hello World!")
.field("date", "1000-01-01") // should be properly parsed but ignored ...
.endObject();
mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", normalDoc))
.minTermFreq(0)
.minDocFreq(0)
.minimumShouldMatch("100%"); // strict all terms must match but date is ignored
response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 1);
}
@Test
public void testMoreLikeThisUnlike() throws ExecutionException, InterruptedException, IOException {
createIndex("test");
ensureGreen();
int numFields = randomIntBetween(5, 10);
logger.info("Create a document that has all the fields.");
XContentBuilder doc = jsonBuilder().startObject();
for (int i = 0; i < numFields; i++) {
doc.field("field"+i, i+"");
}
doc.endObject();
logger.info("Indexing each field value of this document as a single document.");
List<IndexRequestBuilder> builders = new ArrayList<>();
for (int i = 0; i < numFields; i++) {
builders.add(client().prepareIndex("test", "type1", i+"").setSource("field"+i, i+""));
}
indexRandom(true, builders);
logger.info("First check the document matches all indexed docs.");
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", doc))
.minTermFreq(0)
.minDocFreq(0)
.maxQueryTerms(100)
.minimumShouldMatch("0%");
SearchResponse response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, numFields);
logger.info("Now check like this doc, but ignore one doc in the index, then two and so on...");
List<Item> docs = new ArrayList<>();
for (int i = 0; i < numFields; i++) {
docs.add(new Item("test", "type1", i+""));
mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", doc))
.ignoreLike(docs.toArray(Item.EMPTY_ARRAY))
.minTermFreq(0)
.minDocFreq(0)
.maxQueryTerms(100)
.include(true)
.minimumShouldMatch("0%");
response = client().prepareSearch("test").setTypes("type1").setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, numFields - (i + 1));
}
}
@Test
public void testSelectFields() throws IOException, ExecutionException, InterruptedException {
assertAcked(prepareCreate("test")
.addMapping("type1", "text", "type=string,analyzer=whitespace", "text1", "type=string,analyzer=whitespace"));
ensureGreen("test");
indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder()
.startObject()
.field("text", "hello world")
.field("text1", "elasticsearch")
.endObject()),
client().prepareIndex("test", "type1", "2").setSource(jsonBuilder()
.startObject()
.field("text", "goodby moon")
.field("text1", "elasticsearch")
.endObject()));
MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery()
.like(new Item("test", "type1", "1"))
.minTermFreq(0)
.minDocFreq(0)
.include(true)
.minimumShouldMatch("1%");
SearchResponse response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 2);
mltQuery = moreLikeThisQuery("text")
.like(new Item("test", "type1", "1"))
.minTermFreq(0)
.minDocFreq(0)
.include(true)
.minimumShouldMatch("1%");
response = client().prepareSearch("test").setTypes("type1")
.setQuery(mltQuery).get();
assertSearchResponse(response);
assertHitCount(response, 1);
}
}