/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.search.morelikethis; import org.elasticsearch.action.admin.indices.create.CreateIndexRequestBuilder; import org.elasticsearch.action.index.IndexRequestBuilder; import org.elasticsearch.action.search.SearchPhaseExecutionException; import org.elasticsearch.action.search.SearchResponse; import org.elasticsearch.client.Client; import org.elasticsearch.cluster.health.ClusterHealthStatus; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentFactory; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.query.MoreLikeThisQueryBuilder; import org.elasticsearch.index.query.MoreLikeThisQueryBuilder.Item; import org.elasticsearch.index.query.QueryBuilders; import org.elasticsearch.test.ESIntegTestCase; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ExecutionException; import static org.elasticsearch.client.Requests.indexRequest; import static org.elasticsearch.client.Requests.refreshRequest; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_REPLICAS; import static org.elasticsearch.cluster.metadata.IndexMetaData.SETTING_NUMBER_OF_SHARDS; import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder; import static org.elasticsearch.index.query.MoreLikeThisQueryBuilder.ids; import static org.elasticsearch.index.query.QueryBuilders.moreLikeThisQuery; import static org.elasticsearch.index.query.QueryBuilders.termQuery; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHitCount; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertNoFailures; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchHits; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse; import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertThrows; import static org.hamcrest.Matchers.equalTo; import static org.hamcrest.Matchers.notNullValue; public class MoreLikeThisIT extends ESIntegTestCase { public void testSimpleMoreLikeThis() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("text", "lucene").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("2").source(jsonBuilder().startObject().field("text", "lucene release").endObject())).actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); } public void testSimpleMoreLikeOnLongField() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test") .setSettings("index.mapping.single_type", false) .addMapping("type1", "some_long", "type=long")); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("some_long", 1367484649580L).endObject())).actionGet(); client().index(indexRequest("test").type("type2").id("2").source(jsonBuilder().startObject().field("some_long", 0).endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("some_long", -666).endObject())).actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 0L); } public void testMoreLikeThisWithAliases() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Creating aliases alias release"); client().admin().indices().prepareAliases() .addAlias("test", "release", termQuery("text", "release")) .addAlias("test", "beta", termQuery("text", "beta")).get(); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source(jsonBuilder().startObject().field("text", "lucene beta").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("2").source(jsonBuilder().startObject().field("text", "lucene release").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("3").source(jsonBuilder().startObject().field("text", "elasticsearch beta").endObject())).actionGet(); client().index(indexRequest("test").type("type1").id("4").source(jsonBuilder().startObject().field("text", "elasticsearch release").endObject())).actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running moreLikeThis on index"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 2L); logger.info("Running moreLikeThis on beta shard"); response = client().prepareSearch("beta").setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); assertThat(response.getHits().getAt(0).getId(), equalTo("3")); logger.info("Running moreLikeThis on release shard"); response = client().prepareSearch("release").setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); assertThat(response.getHits().getAt(0).getId(), equalTo("2")); logger.info("Running moreLikeThis on alias with node client"); response = internalCluster().coordOnlyNodeClient().prepareSearch("beta").setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 1L); assertThat(response.getHits().getAt(0).getId(), equalTo("3")); } // Issue #14944 public void testMoreLikeThisWithAliasesInLikeDocuments() throws Exception { String indexName = "foo"; String aliasName = "foo_name"; String typeName = "bar"; String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate(indexName).addMapping(typeName, mapping, XContentType.JSON).get(); client().admin().indices().prepareAliases().addAlias(indexName, aliasName).get(); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); client().index(indexRequest(indexName).type(typeName).id("1").source(jsonBuilder().startObject().field("text", "elasticsearch index").endObject())).actionGet(); client().index(indexRequest(indexName).type(typeName).id("2").source(jsonBuilder().startObject().field("text", "lucene index").endObject())).actionGet(); client().index(indexRequest(indexName).type(typeName).id("3").source(jsonBuilder().startObject().field("text", "elasticsearch index").endObject())).actionGet(); refresh(indexName); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item(aliasName, typeName, "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(response, 2L); assertThat(response.getHits().getAt(0).getId(), equalTo("3")); } public void testMoreLikeThisIssue2197() throws Exception { Client client = client(); String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("foo").addMapping("bar", mapping, XContentType.JSON).execute().actionGet(); client().prepareIndex("foo", "bar", "1") .setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject().endObject()) .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); } // Issue #2489 public void testMoreLikeWithCustomRouting() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); client().admin().indices().prepareCreate("foo").addMapping("bar", mapping, XContentType.JSON).execute().actionGet(); ensureGreen(); client().prepareIndex("foo", "bar", "1") .setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject().endObject()) .setRouting("2") .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1").routing("2")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); } // Issue #3039 public void testMoreLikeThisIssueRoutingNotSerialized() throws Exception { String mapping = XContentFactory.jsonBuilder().startObject().startObject("bar") .startObject("properties") .endObject() .endObject().endObject().string(); assertAcked(prepareCreate("foo", 2, Settings.builder().put(SETTING_NUMBER_OF_SHARDS, 2).put(SETTING_NUMBER_OF_REPLICAS, 0)) .addMapping("bar", mapping, XContentType.JSON)); ensureGreen(); client().prepareIndex("foo", "bar", "1") .setSource(jsonBuilder().startObject().startObject("foo").field("bar", "boz").endObject().endObject()) .setRouting("4000") .execute().actionGet(); client().admin().indices().prepareRefresh("foo").execute().actionGet(); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("foo", "bar", "1").routing("4000")})).get(); assertNoFailures(response); assertThat(response, notNullValue()); } // Issue #3252 public void testNumericField() throws Exception { final String[] numericTypes = new String[]{"byte", "short", "integer", "long"}; prepareCreate("test").addMapping("type", jsonBuilder() .startObject().startObject("type") .startObject("properties") .startObject("int_value").field("type", randomFrom(numericTypes)).endObject() .startObject("string_value").field("type", "text").endObject() .endObject() .endObject().endObject()).execute().actionGet(); ensureGreen(); client().prepareIndex("test", "type", "1") .setSource(jsonBuilder().startObject().field("string_value", "lucene index").field("int_value", 1).endObject()) .execute().actionGet(); client().prepareIndex("test", "type", "2") .setSource(jsonBuilder().startObject().field("string_value", "elasticsearch index").field("int_value", 42).endObject()) .execute().actionGet(); refresh(); // Implicit list of fields -> ignore numeric fields SearchResponse searchResponse = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type", "1")}).minTermFreq(1).minDocFreq(1)).get(); assertHitCount(searchResponse, 1L); // Explicit list of fields including numeric fields -> fail assertThrows(client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(new String[] {"string_value", "int_value"}, null, new Item[] {new Item("test", "type", "1")}).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class); // mlt query with no field -> No results (because _all is not enabled) searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"index"}).minTermFreq(1).minDocFreq(1)).execute().actionGet(); assertHitCount(searchResponse, 0L); // mlt query with string fields searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[]{"string_value"}, new String[] {"index"}, null).minTermFreq(1).minDocFreq(1)).execute().actionGet(); assertHitCount(searchResponse, 2L); // mlt query with at least a numeric field -> fail by default assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"string_value", "int_value"}, new String[] {"index"}, null)), SearchPhaseExecutionException.class); // mlt query with at least a numeric field -> fail by command assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"string_value", "int_value"}, new String[] {"index"}, null).failOnUnsupportedField(true)), SearchPhaseExecutionException.class); // mlt query with at least a numeric field but fail_on_unsupported_field set to false searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"string_value", "int_value"}, new String[] {"index"}, null).minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).get(); assertHitCount(searchResponse, 2L); // mlt field query on a numeric field -> failure by default assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"int_value"}, new String[] {"42"}, null).minTermFreq(1).minDocFreq(1)), SearchPhaseExecutionException.class); // mlt field query on a numeric field -> failure by command assertThrows(client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"int_value"}, new String[] {"42"}, null).minTermFreq(1).minDocFreq(1).failOnUnsupportedField(true)), SearchPhaseExecutionException.class); // mlt field query on a numeric field but fail_on_unsupported_field set to false searchResponse = client().prepareSearch().setQuery(moreLikeThisQuery(new String[] {"int_value"}, new String[] {"42"}, null).minTermFreq(1).minDocFreq(1).failOnUnsupportedField(false)).execute().actionGet(); assertHitCount(searchResponse, 0L); } public void testSimpleMoreLikeInclude() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); client().index(indexRequest("test").type("type1").id("1").source( jsonBuilder().startObject() .field("text", "Apache Lucene is a free/open source information retrieval software library").endObject())) .actionGet(); client().index(indexRequest("test").type("type1").id("2").source( jsonBuilder().startObject() .field("text", "Lucene has been ported to other programming languages").endObject())) .actionGet(); client().admin().indices().refresh(refreshRequest()).actionGet(); logger.info("Running More Like This with include true"); SearchResponse response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); assertOrderedSearchHits(response, "1", "2"); response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "2")}).minTermFreq(1).minDocFreq(1).include(true).minimumShouldMatch("0%")).get(); assertOrderedSearchHits(response, "2", "1"); logger.info("Running More Like This with include false"); response = client().prepareSearch().setQuery( new MoreLikeThisQueryBuilder(null, new Item[] {new Item("test", "type1", "1")}).minTermFreq(1).minDocFreq(1).minimumShouldMatch("0%")).get(); assertSearchHits(response, "2"); } public void testSimpleMoreLikeThisIds() throws Exception { logger.info("Creating index test"); assertAcked(prepareCreate("test").addMapping("type1", jsonBuilder().startObject().startObject("type1").startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject())); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); List<IndexRequestBuilder> builders = new ArrayList<>(); builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene").setId("1")); builders.add(client().prepareIndex("test", "type1").setSource("text", "lucene release").setId("2")); builders.add(client().prepareIndex("test", "type1").setSource("text", "apache lucene").setId("3")); indexRandom(true, builders); logger.info("Running MoreLikeThis"); MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery(new String[] {"text"}, null, ids("1")).include(true).minTermFreq(1).minDocFreq(1); SearchResponse mltResponse = client().prepareSearch().setTypes("type1").setQuery(queryBuilder).execute().actionGet(); assertHitCount(mltResponse, 3L); } public void testSimpleMoreLikeThisIdsMultipleTypes() throws Exception { logger.info("Creating index test"); int numOfTypes = randomIntBetween(2, 10); CreateIndexRequestBuilder createRequestBuilder = prepareCreate("test") .setSettings("index.mapping.single_type", false); for (int i = 0; i < numOfTypes; i++) { createRequestBuilder.addMapping("type" + i, jsonBuilder().startObject().startObject("type" + i).startObject("properties") .startObject("text").field("type", "text").endObject() .endObject().endObject().endObject()); } assertAcked(createRequestBuilder); logger.info("Running Cluster Health"); assertThat(ensureGreen(), equalTo(ClusterHealthStatus.GREEN)); logger.info("Indexing..."); List<IndexRequestBuilder> builders = new ArrayList<>(numOfTypes); for (int i = 0; i < numOfTypes; i++) { builders.add(client().prepareIndex("test", "type" + i).setSource("text", "lucene" + " " + i).setId(String.valueOf(i))); } indexRandom(true, builders); logger.info("Running MoreLikeThis"); MoreLikeThisQueryBuilder queryBuilder = QueryBuilders.moreLikeThisQuery(new String[] {"text"}, null, new Item[] {new Item("test", "type0", "0")}).include(true).minTermFreq(1).minDocFreq(1); String[] types = new String[numOfTypes]; for (int i = 0; i < numOfTypes; i++) { types[i] = "type"+i; } SearchResponse mltResponse = client().prepareSearch().setTypes(types).setQuery(queryBuilder).execute().actionGet(); assertHitCount(mltResponse, numOfTypes); } public void testMoreLikeThisMultiValueFields() throws Exception { logger.info("Creating the index ..."); assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=keyword") .setSettings(SETTING_NUMBER_OF_SHARDS, 1)); ensureGreen(); logger.info("Indexing ..."); String[] values = {"aaaa", "bbbb", "cccc", "dddd", "eeee", "ffff", "gggg", "hhhh", "iiii", "jjjj"}; List<IndexRequestBuilder> builders = new ArrayList<>(values.length + 1); // index one document with all the values builders.add(client().prepareIndex("test", "type1", "0").setSource("text", values)); // index each document with only one of the values for (int i = 0; i < values.length; i++) { builders.add(client().prepareIndex("test", "type1", String.valueOf(i + 1)).setSource("text", values[i])); } indexRandom(true, builders); int maxIters = randomIntBetween(10, 20); for (int i = 0; i < maxIters; i++) { int max_query_terms = randomIntBetween(1, values.length); logger.info("Running More Like This with max_query_terms = {}", max_query_terms); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new String[] {"text"}, null, new Item[] {new Item(null, null, "0")}) .minTermFreq(1).minDocFreq(1) .maxQueryTerms(max_query_terms).minimumShouldMatch("0%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).execute().actionGet(); assertSearchResponse(response); assertHitCount(response, max_query_terms); } } public void testMinimumShouldMatch() throws ExecutionException, InterruptedException { logger.info("Creating the index ..."); assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=whitespace") .setSettings(SETTING_NUMBER_OF_SHARDS, 1)); ensureGreen(); logger.info("Indexing with each doc having one less term ..."); List<IndexRequestBuilder> builders = new ArrayList<>(); for (int i = 0; i < 10; i++) { String text = ""; for (int j = 1; j <= 10 - i; j++) { text += j + " "; } builders.add(client().prepareIndex("test", "type1", i + "").setSource("text", text)); } indexRandom(true, builders); logger.info("Testing each minimum_should_match from 0% - 100% with 10% increment ..."); for (int i = 0; i <= 10; i++) { String minimumShouldMatch = (10 * i) + "%"; MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new String[] {"text"}, new String[] {"1 2 3 4 5 6 7 8 9 10"}, null) .minTermFreq(1) .minDocFreq(1) .minimumShouldMatch(minimumShouldMatch); logger.info("Testing with minimum_should_match = {}", minimumShouldMatch); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); if (minimumShouldMatch.equals("0%")) { assertHitCount(response, 10); } else { assertHitCount(response, 11 - i); } } } public void testMoreLikeThisArtificialDocs() throws Exception { int numFields = randomIntBetween(5, 10); createIndex("test"); ensureGreen(); logger.info("Indexing a single document ..."); XContentBuilder doc = jsonBuilder().startObject(); for (int i = 0; i < numFields; i++) { doc.field("field" + i, generateRandomStringArray(5, 10, false) + "a"); // make sure they are not all empty } doc.endObject(); indexRandom(true, client().prepareIndex("test", "type1", "0").setSource(doc)); logger.info("Checking the document matches ..."); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", doc).routing("0")}) // routing to ensure we hit the shard with the doc .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) .minimumShouldMatch("100%"); // strict all terms must match! SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 1); } public void testMoreLikeThisMalformedArtificialDocs() throws Exception { logger.info("Creating the index ..."); assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=whitespace", "date", "type=date")); ensureGreen("test"); logger.info("Creating an index with a single document ..."); indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder() .startObject() .field("text", "Hello World!") .field("date", "2009-01-01") .endObject())); logger.info("Checking with a malformed field value ..."); XContentBuilder malformedFieldDoc = jsonBuilder() .startObject() .field("text", "Hello World!") .field("date", "this is not a date!") .endObject(); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", malformedFieldDoc)}) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 0); logger.info("Checking with an empty document ..."); XContentBuilder emptyDoc = jsonBuilder().startObject().endObject(); mltQuery = moreLikeThisQuery(null, new Item[] {new Item("test", "type1", emptyDoc)}) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("0%"); response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 0); logger.info("Checking the document matches otherwise ..."); XContentBuilder normalDoc = jsonBuilder() .startObject() .field("text", "Hello World!") .field("date", "1000-01-01") // should be properly parsed but ignored ... .endObject(); mltQuery = moreLikeThisQuery(null, new Item[] {new Item("test", "type1", normalDoc)}) .minTermFreq(0) .minDocFreq(0) .minimumShouldMatch("100%"); // strict all terms must match but date is ignored response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 1); } public void testMoreLikeThisUnlike() throws ExecutionException, InterruptedException, IOException { createIndex("test"); ensureGreen(); int numFields = randomIntBetween(5, 10); logger.info("Create a document that has all the fields."); XContentBuilder doc = jsonBuilder().startObject(); for (int i = 0; i < numFields; i++) { doc.field("field"+i, i+""); } doc.endObject(); logger.info("Indexing each field value of this document as a single document."); List<IndexRequestBuilder> builders = new ArrayList<>(); for (int i = 0; i < numFields; i++) { builders.add(client().prepareIndex("test", "type1", i+"").setSource("field"+i, i+"")); } indexRandom(true, builders); logger.info("First check the document matches all indexed docs."); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", doc)}) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) .minimumShouldMatch("0%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, numFields); logger.info("Now check like this doc, but ignore one doc in the index, then two and so on..."); List<Item> docs = new ArrayList<>(numFields); for (int i = 0; i < numFields; i++) { docs.add(new Item("test", "type1", i+"")); mltQuery = moreLikeThisQuery(null, new Item[] {new Item("test", "type1", doc)}) .unlike(docs.toArray(new Item[docs.size()])) .minTermFreq(0) .minDocFreq(0) .maxQueryTerms(100) .include(true) .minimumShouldMatch("0%"); response = client().prepareSearch("test").setTypes("type1").setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, numFields - (i + 1)); } } public void testSelectFields() throws IOException, ExecutionException, InterruptedException { assertAcked(prepareCreate("test") .addMapping("type1", "text", "type=text,analyzer=whitespace", "text1", "type=text,analyzer=whitespace")); ensureGreen("test"); indexRandom(true, client().prepareIndex("test", "type1", "1").setSource(jsonBuilder() .startObject() .field("text", "hello world") .field("text1", "elasticsearch") .endObject()), client().prepareIndex("test", "type1", "2").setSource(jsonBuilder() .startObject() .field("text", "goodby moon") .field("text1", "elasticsearch") .endObject())); MoreLikeThisQueryBuilder mltQuery = moreLikeThisQuery(new Item[] {new Item("test", "type1", "1")}) .minTermFreq(0) .minDocFreq(0) .include(true) .minimumShouldMatch("1%"); SearchResponse response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 2); mltQuery = moreLikeThisQuery(new String[] {"text"}, null, new Item[] {new Item("test", "type1", "1")}) .minTermFreq(0) .minDocFreq(0) .include(true) .minimumShouldMatch("1%"); response = client().prepareSearch("test").setTypes("type1") .setQuery(mltQuery).get(); assertSearchResponse(response); assertHitCount(response, 1); } }