/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.apache.lucene.search.uhighlight; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.TextField; import org.apache.lucene.index.DirectoryReader; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.RandomIndexWriter; import org.apache.lucene.index.Term; import org.apache.lucene.queries.CommonTermsQuery; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.MatchAllDocsQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.highlight.DefaultEncoder; import org.apache.lucene.search.highlight.Snippet; import org.apache.lucene.store.Directory; import org.elasticsearch.common.Strings; import org.elasticsearch.common.lucene.all.AllTermQuery; import org.elasticsearch.common.lucene.search.MultiPhrasePrefixQuery; import org.elasticsearch.test.ESTestCase; import java.text.BreakIterator; import java.util.Locale; import static org.apache.lucene.search.uhighlight.CustomUnifiedHighlighter.MULTIVAL_SEP_CHAR; import static org.hamcrest.CoreMatchers.equalTo; public class CustomUnifiedHighlighterTests extends ESTestCase { private void assertHighlightOneDoc(String fieldName, String[] inputs, Analyzer analyzer, Query query, Locale locale, BreakIterator breakIterator, int noMatchSize, String[] expectedPassages) throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setMergePolicy(newTieredMergePolicy(random())); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); FieldType ft = new FieldType(TextField.TYPE_STORED); ft.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); ft.freeze(); Document doc = new Document(); for (String input : inputs) { Field field = new Field(fieldName, "", ft); field.setStringValue(input); doc.add(field); } iw.addDocument(doc); DirectoryReader reader = iw.getReader(); IndexSearcher searcher = newSearcher(reader); iw.close(); TopDocs topDocs = searcher.search(new MatchAllDocsQuery(), 1, Sort.INDEXORDER); assertThat(topDocs.totalHits, equalTo(1)); String rawValue = Strings.arrayToDelimitedString(inputs, String.valueOf(MULTIVAL_SEP_CHAR)); CustomUnifiedHighlighter highlighter = new CustomUnifiedHighlighter(searcher, analyzer, new CustomPassageFormatter("<b>", "</b>", new DefaultEncoder()), locale, breakIterator, rawValue, noMatchSize); highlighter.setFieldMatcher((name) -> "text".equals(name)); final Snippet[] snippets = highlighter.highlightField("text", query, topDocs.scoreDocs[0].doc, expectedPassages.length); assertEquals(snippets.length, expectedPassages.length); for (int i = 0; i < snippets.length; i++) { assertEquals(snippets[i].getText(), expectedPassages[i]); } reader.close(); dir.close(); } public void testSimple() throws Exception { final String[] inputs = { "This is a test. Just a test1 highlighting from unified highlighter.", "This is the second highlighting value to perform highlighting on a longer text that gets scored lower.", "This is highlighting the third short highlighting value.", "Just a test4 highlighting from unified highlighter." }; String[] expectedPassages = { "Just a test1 <b>highlighting</b> from unified highlighter.", "This is the second <b>highlighting</b> value to perform <b>highlighting</b> on a" + " longer text that gets scored lower.", "This is <b>highlighting</b> the third short <b>highlighting</b> value.", "Just a test4 <b>highlighting</b> from unified highlighter." }; Query query = new TermQuery(new Term("text", "highlighting")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, expectedPassages); } public void testNoMatchSize() throws Exception { final String[] inputs = { "This is a test. Just a test highlighting from unified. Feel free to ignore." }; Query query = new TermQuery(new Term("body", "highlighting")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 100, inputs); } public void testMultiPhrasePrefixQuery() throws Exception { final String[] inputs = { "The quick brown fox." }; final String[] outputs = { "The <b>quick</b> <b>brown</b> <b>fox</b>." }; MultiPhrasePrefixQuery query = new MultiPhrasePrefixQuery(); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fo")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); } public void testAllTermQuery() throws Exception { final String[] inputs = { "The quick brown fox." }; final String[] outputs = { "The quick brown <b>fox</b>." }; AllTermQuery query = new AllTermQuery(new Term("text", "fox")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); } public void testCommonTermsQuery() throws Exception { final String[] inputs = { "The quick brown fox." }; final String[] outputs = { "The <b>quick</b> <b>brown</b> <b>fox</b>." }; CommonTermsQuery query = new CommonTermsQuery(BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, 128); query.add(new Term("text", "quick")); query.add(new Term("text", "brown")); query.add(new Term("text", "fox")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BreakIterator.getSentenceInstance(Locale.ROOT), 0, outputs); } public void testSentenceBoundedBreakIterator() throws Exception { final String[] inputs = { "The quick brown fox in a long sentence with another quick brown fox. " + "Another sentence with brown fox." }; final String[] outputs = { "The <b>quick</b> <b>brown</b>", "<b>fox</b> in a long", "with another <b>quick</b>", "<b>brown</b> <b>fox</b>.", "sentence with <b>brown</b>", "<b>fox</b>.", }; BooleanQuery query = new BooleanQuery.Builder() .add(new TermQuery(new Term("text", "quick")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("text", "brown")), BooleanClause.Occur.SHOULD) .add(new TermQuery(new Term("text", "fox")), BooleanClause.Occur.SHOULD) .build(); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs); } public void testRepeat() throws Exception { final String[] inputs = { "Fun fun fun fun fun fun fun fun fun fun" }; final String[] outputs = { "<b>Fun</b> <b>fun</b> <b>fun</b>", "<b>fun</b> <b>fun</b>", "<b>fun</b> <b>fun</b> <b>fun</b>", "<b>fun</b> <b>fun</b>" }; Query query = new TermQuery(new Term("text", "fun")); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs); query = new PhraseQuery.Builder() .add(new Term("text", "fun")) .add(new Term("text", "fun")) .build(); assertHighlightOneDoc("text", inputs, new StandardAnalyzer(), query, Locale.ROOT, BoundedBreakIteratorScanner.getSentence(Locale.ROOT, 10), 0, outputs); } }