/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.search.uhighlight; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.MockAnalyzer; import org.apache.lucene.analysis.MockTokenizer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.*; import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Sort; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.util.LuceneTestCase; import org.junit.After; import org.junit.Before; import org.junit.Test; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.BitSet; import java.util.List; import java.util.Map; /** * Tests highlighting for matters *expressly* relating to term vectors. * <p> * This test DOES NOT represent all testing for highlighting when term vectors are used. Other tests pick the offset * source at random (to include term vectors) and in-effect test term vectors generally. */ @LuceneTestCase.SuppressCodecs({"MockFixedIntBlock", "MockVariableIntBlock", "MockSep", "MockRandom", "Lucene3x"}) @LuceneTestCase.SuppressSysoutChecks(bugUrl = "")//Gradle interferes with this Lucene test rule public class TestUnifiedHighlighterTermVec extends LuceneTestCase { private Analyzer indexAnalyzer; private Directory dir; @Before public void doBefore() throws IOException { indexAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true);//whitespace, punctuation, lowercase dir = newDirectory(); } @After public void doAfter() throws IOException { dir.close(); } public void testFetchTermVecsOncePerDoc() throws IOException { RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); // Declare some number of fields with random field type; but at least one will have term vectors. final int numTvFields = 1 + random().nextInt(3); List<String> fields = new ArrayList<>(numTvFields); List<FieldType> fieldTypes = new ArrayList<>(numTvFields); for (int i = 0; i < numTvFields; i++) { fields.add("body" + i); fieldTypes.add(UHTestHelper.randomFieldType(random())); } //ensure at least one has TVs by setting one randomly to it: fieldTypes.set(random().nextInt(fieldTypes.size()), UHTestHelper.tvType); final int numDocs = 1 + random().nextInt(3); for (int i = 0; i < numDocs; i++) { Document doc = new Document(); for (String field : fields) { doc.add(new Field(field, "some test text", UHTestHelper.tvType)); } iw.addDocument(doc); } // Wrap the reader to ensure we only fetch TVs once per doc DirectoryReader originalReader = iw.getReader(); IndexReader ir = new AssertOnceTermVecDirectoryReader(originalReader); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); BooleanQuery.Builder queryBuilder = new BooleanQuery.Builder(); for (String field : fields) { queryBuilder.add(new TermQuery(new Term(field, "test")), BooleanClause.Occur.MUST); } BooleanQuery query = queryBuilder.build(); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); assertEquals(numDocs, topDocs.totalHits); Map<String, String[]> fieldToSnippets = highlighter.highlightFields(fields.toArray(new String[numTvFields]), query, topDocs); String[] expectedSnippetsByDoc = new String[numDocs]; Arrays.fill(expectedSnippetsByDoc, "some <b>test</b> text"); for (String field : fields) { assertArrayEquals(expectedSnippetsByDoc, fieldToSnippets.get(field)); } ir.close(); } private static class AssertOnceTermVecDirectoryReader extends FilterDirectoryReader { static final SubReaderWrapper SUB_READER_WRAPPER = new SubReaderWrapper() { @Override public LeafReader wrap(LeafReader reader) { return new FilterLeafReader(reader) { BitSet seenDocIDs = new BitSet(); @Override public Fields getTermVectors(int docID) throws IOException { // if we're invoked by ParallelLeafReader then we can't do our assertion. TODO see LUCENE-6868 if (calledBy(ParallelLeafReader.class) == false && calledBy(CheckIndex.class) == false) { assertFalse("Should not request TVs for doc more than once.", seenDocIDs.get(docID)); seenDocIDs.set(docID); } return super.getTermVectors(docID); } @Override public CacheHelper getCoreCacheHelper() { return null; } @Override public CacheHelper getReaderCacheHelper() { return null; } }; } }; AssertOnceTermVecDirectoryReader(DirectoryReader in) throws IOException { super(in, SUB_READER_WRAPPER); } @Override protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException { return new AssertOnceTermVecDirectoryReader(in); } @Override public CacheHelper getReaderCacheHelper() { return null; } } private static boolean calledBy(Class<?> clazz) { for (StackTraceElement stackTraceElement : Thread.currentThread().getStackTrace()) { if (stackTraceElement.getClassName().equals(clazz.getName())) return true; } return false; } @Test(expected = IllegalArgumentException.class) public void testUserFailedToIndexOffsets() throws IOException { FieldType fieldType = new FieldType(UHTestHelper.tvType); // note: it's indexed too fieldType.setStoreTermVectorPositions(random().nextBoolean()); fieldType.setStoreTermVectorOffsets(false); RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer); Document doc = new Document(); doc.add(new Field("body", "term vectors", fieldType)); iw.addDocument(doc); IndexReader ir = iw.getReader(); iw.close(); IndexSearcher searcher = newSearcher(ir); UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer); TermQuery query = new TermQuery(new Term("body", "vectors")); TopDocs topDocs = searcher.search(query, 10, Sort.INDEXORDER); try { highlighter.highlight("body", query, topDocs, 1);//should throw } finally { ir.close(); } } }