/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.analysis.common;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.index.query.Operator;
import org.elasticsearch.plugins.Plugin;
import org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder;
import org.elasticsearch.test.ESIntegTestCase;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
import static org.elasticsearch.index.query.QueryBuilders.matchPhraseQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchQuery;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
import static org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertHighlight;
import static org.hamcrest.Matchers.equalTo;
import static org.hamcrest.Matchers.startsWith;
public class HighlighterWithAnalyzersTests extends ESIntegTestCase {
@Override
protected Collection<Class<? extends Plugin>> nodePlugins() {
return Arrays.asList(CommonAnalysisPlugin.class);
}
public void testNgramHighlightingWithBrokenPositions() throws IOException {
assertAcked(prepareCreate("test")
.addMapping("test", jsonBuilder()
.startObject()
.startObject("test")
.startObject("properties")
.startObject("name")
.field("type", "text")
.startObject("fields")
.startObject("autocomplete")
.field("type", "text")
.field("analyzer", "autocomplete")
.field("search_analyzer", "search_autocomplete")
.field("term_vector", "with_positions_offsets")
.endObject()
.endObject()
.endObject()
.endObject()
.endObject()
.endObject())
.setSettings(Settings.builder()
.put(indexSettings())
.put("analysis.tokenizer.autocomplete.max_gram", 20)
.put("analysis.tokenizer.autocomplete.min_gram", 1)
.put("analysis.tokenizer.autocomplete.token_chars", "letter,digit")
.put("analysis.tokenizer.autocomplete.type", "nGram")
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.putArray("analysis.filter.wordDelimiter.type_table",
"& => ALPHANUM", "| => ALPHANUM", "! => ALPHANUM",
"? => ALPHANUM", ". => ALPHANUM", "- => ALPHANUM",
"# => ALPHANUM", "% => ALPHANUM", "+ => ALPHANUM",
", => ALPHANUM", "~ => ALPHANUM", ": => ALPHANUM",
"/ => ALPHANUM", "^ => ALPHANUM", "$ => ALPHANUM",
"@ => ALPHANUM", ") => ALPHANUM", "( => ALPHANUM",
"] => ALPHANUM", "[ => ALPHANUM", "} => ALPHANUM",
"{ => ALPHANUM")
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", false)
.put("analysis.filter.wordDelimiter.catenate_words", true)
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.autocomplete.tokenizer", "autocomplete")
.putArray("analysis.analyzer.autocomplete.filter",
"lowercase", "wordDelimiter")
.put("analysis.analyzer.search_autocomplete.tokenizer", "whitespace")
.putArray("analysis.analyzer.search_autocomplete.filter",
"lowercase", "wordDelimiter")));
client().prepareIndex("test", "test", "1")
.setSource("name", "ARCOTEL Hotels Deutschland").get();
refresh();
SearchResponse search = client().prepareSearch("test").setTypes("test")
.setQuery(matchQuery("name.autocomplete", "deut tel").operator(Operator.OR))
.highlighter(new HighlightBuilder().field("name.autocomplete")).get();
assertHighlight(search, 0, "name.autocomplete", 0,
equalTo("ARCO<em>TEL</em> Ho<em>tel</em>s <em>Deut</em>schland"));
}
public void testMultiPhraseCutoff() throws IOException {
/*
* MultiPhraseQuery can literally kill an entire node if there are too many terms in the
* query. We cut off and extract terms if there are more than 16 terms in the query
*/
assertAcked(prepareCreate("test")
.addMapping("test", "body", "type=text,analyzer=custom_analyzer,"
+ "search_analyzer=custom_analyzer,term_vector=with_positions_offsets")
.setSettings(
Settings.builder().put(indexSettings())
.put("analysis.filter.wordDelimiter.type", "word_delimiter")
.put("analysis.filter.wordDelimiter.type.split_on_numerics", false)
.put("analysis.filter.wordDelimiter.generate_word_parts", true)
.put("analysis.filter.wordDelimiter.generate_number_parts", true)
.put("analysis.filter.wordDelimiter.catenate_words", true)
.put("analysis.filter.wordDelimiter.catenate_numbers", true)
.put("analysis.filter.wordDelimiter.catenate_all", false)
.put("analysis.analyzer.custom_analyzer.tokenizer", "whitespace")
.putArray("analysis.analyzer.custom_analyzer.filter",
"lowercase", "wordDelimiter"))
);
ensureGreen();
client().prepareIndex("test", "test", "1")
.setSource("body", "Test: http://www.facebook.com http://elasticsearch.org "
+ "http://xing.com http://cnn.com http://quora.com http://twitter.com this is "
+ "a test for highlighting feature Test: http://www.facebook.com "
+ "http://elasticsearch.org http://xing.com http://cnn.com http://quora.com "
+ "http://twitter.com this is a test for highlighting feature")
.get();
refresh();
SearchResponse search = client().prepareSearch()
.setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com "))
.highlighter(new HighlightBuilder().field("body")).get();
assertHighlight(search, 0, "body", 0, startsWith("<em>Test: http://www.facebook.com</em>"));
search = client()
.prepareSearch()
.setQuery(matchPhraseQuery("body", "Test: http://www.facebook.com "
+ "http://elasticsearch.org http://xing.com http://cnn.com "
+ "http://quora.com http://twitter.com this is a test for highlighting "
+ "feature Test: http://www.facebook.com http://elasticsearch.org "
+ "http://xing.com http://cnn.com http://quora.com http://twitter.com this "
+ "is a test for highlighting feature"))
.highlighter(new HighlightBuilder().field("body")).execute().actionGet();
assertHighlight(search, 0, "body", 0, equalTo("<em>Test</em>: "
+ "<em>http://www.facebook.com</em> <em>http://elasticsearch.org</em> "
+ "<em>http://xing.com</em> <em>http://cnn.com</em> http://quora.com"));
}
}