/* * Copyright (c) 2011 LinkedIn, Inc * * Licensed under the Apache License, Version 2.0 (the "License"); you may not * use this file except in compliance with the License. You may obtain a copy of * the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations under * the License. */ package com.flaptor.indextank.search; import static com.flaptor.util.TestInfo.TestType.SYSTEM; import static com.flaptor.util.TestInfo.TestType.UNIT; import java.io.File; import java.io.IOException; import com.flaptor.indextank.DocumentStoringIndexer; import com.flaptor.indextank.IndexTankTestCase; import com.flaptor.indextank.index.Document; import com.flaptor.indextank.index.IndexEngine; import com.flaptor.indextank.query.ParseException; import com.flaptor.indextank.query.PrefixTermQuery; import com.flaptor.indextank.query.AndQuery; import com.flaptor.indextank.query.Query; import com.flaptor.indextank.query.TermQuery; import com.flaptor.util.FileUtil; import com.flaptor.util.TestInfo; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Maps; public class SnippetSearcherTest extends IndexTankTestCase { private File tempDir; private IndexEngine indexEngine; private DocumentStoringIndexer indexer; private SnippetSearcher searcher; @Override protected void setUp() throws Exception { super.setUp(); this.tempDir = FileUtil.createTempDir("indextank","testcase"); this.indexEngine = new IndexEngine(this.tempDir, 11234, 5, false, 5, IndexEngine.SuggestValues.NO, IndexEngine.StorageValues.RAM, 0, null, false, "dummyCode", "TEST-environment"); this.indexer = new DocumentStoringIndexer(indexEngine.getIndexer(), indexEngine.getStorage()); this.searcher = new SnippetSearcher(indexEngine.getSearcher(), indexEngine.getStorage(), indexEngine.getParser()); } @Override protected void tearDown() throws Exception { super.tearDown(); } private void indexVeryBigDoc() { double timestampBoost = System.currentTimeMillis() / 1000.0; String docId = "docid"; Document doc = new Document(ImmutableMap.of("text", largeText())); indexer.add(docId, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap()); } private String largeText() { StringBuilder sb = new StringBuilder(); for (int i = 0; i < 1000; i++) { for (int j = 0; j < 30; j++) { sb.append(' '); sb.append("term"); sb.append(j + i*30); } sb.append('.'); } return sb.toString(); } private String multipleLines(){ StringBuilder sb = new StringBuilder(); for (int i = 0; i < 1000; i++) { sb.append("linestart"); for (int j = 0; j < 30; j++) { sb.append(' '); sb.append("term"); sb.append(j + i*30); } sb.append('\n'); } return sb.toString(); }; @TestInfo(testType=SYSTEM) public void testSnippet() throws IOException, ParseException, InterruptedException { indexVeryBigDoc(); long t = System.currentTimeMillis(); SearchResults srs = searcher.search(new Query(new TermQuery("text","term29925"),null,null),0,10, 0, ImmutableMap.of("snippet_fields", "text")); srs.getResults().iterator().next(); long dt = System.currentTimeMillis() - t; //this test is ignored since it fails on some machines //assertTrue("Snippetting took too long: " + dt + "ms. and the limit was 300ms.", dt < 300); } @TestInfo(testType=UNIT) public void testCompleteLines() throws IOException, InterruptedException { double timestampBoost = System.currentTimeMillis() / 1000.0; String docid = "docid"; Document doc = new Document(ImmutableMap.of("text", multipleLines(), "title", "a headline!")); indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap()); SearchResults srs = searcher.search(new Query(new TermQuery("text", "term29925"), null, null), 0, 10, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "lines")); SearchResult sr = srs.getResults().iterator().next(); assertNotNull("sr is null!", sr); assertTrue("line does not start on linestart!", sr.getField("snippet_text").startsWith("linestart")); assertEquals("line does not end on a newline!", sr.getField("snippet_text").charAt(sr.getField("snippet_text").length()-1), '\n'); } @TestInfo(testType=UNIT) public void testEncodesHTMLonEnd() throws IOException, InterruptedException { double timestampBoost = System.currentTimeMillis() / 1000.0; String docid = "docid"; // have & signs before tokens, > signs between them, and < signs after. Document doc = new Document(ImmutableMap.of("text", "contains &&& signs >>>> and stuff <.", "title", "a headline!")); indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap()); SearchResults srs = searcher.search(new Query(new AndQuery( new TermQuery("text", "signs"), new TermQuery("text", "stuff")), null, null), 0, 10, 0, ImmutableMap.of("snippet_fields", "text","snippet_type", "lines")); SearchResult sr = srs.getResults().iterator().next(); assertNotNull("sr is null!", sr); assertTrue("signs not highlighted!", sr.getField("snippet_text").contains("<b>signs</b>")); assertTrue("ampersands not encoded!", sr.getField("snippet_text").contains("&")); assertTrue("greater-than signs not encoded!", sr.getField("snippet_text").contains(">")); assertTrue("less-than signs not encoded!", sr.getField("snippet_text").contains("<")); } @TestInfo(testType=UNIT) public void testTokenizingChangesTokenLength() throws IOException, InterruptedException, ParseException { double timestampBoost = System.currentTimeMillis() / 1000.0; String docid = "docid"; // \u00df is 'LATIN SMALL LETTER SHARP S' // ASCIIFoldingFilter converts it from 'ß' to 'ss' // see http://www.fileformat.info/info/unicode/char/df/index.htm String text = "Clown Ferdinand und der Fu\u00dfball player"; Document doc = new Document(ImmutableMap.of("text", text)); indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap()); String queryText = "fussball"; Query query = new Query(new TermQuery("text", queryText), queryText, null); SearchResults srs = searcher.search(query, 0, 1, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "html")); SearchResult sr = srs.getResults().iterator().next(); String snippet = sr.getField("snippet_text"); assertNotNull("Snippet is null", snippet); assertTrue("Search term not highlighted", snippet.contains("<b>Fußball</b>")); assertTrue("Snippet lost space before highlighted term", snippet.contains("der ")); assertTrue("Snippet lost space after highlighted term: " + snippet, snippet.contains(" player")); query = new Query(new PrefixTermQuery("text", "fu"), "fu*", null); srs = searcher.search(query, 0, 1, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "html")); sr = srs.getResults().iterator().next(); snippet = sr.getField("snippet_text"); assertNotNull("Snippet is null", snippet); assertTrue("Search term not highlighted", snippet.contains("<b>Fußball</b>")); assertTrue("Snippet lost space before highlighted term", snippet.contains("der ")); assertTrue("Snippet lost space after highlighted term", snippet.contains(" player")); } @TestInfo(testType=UNIT) public void testFetchAll() throws IOException, InterruptedException { double timestampBoost = System.currentTimeMillis() / 1000.0; String docid = "docid"; Document doc = new Document(ImmutableMap.of("text", "this is a sample text", "title", "a headline!")); indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap()); SearchResults srs = searcher.search(new Query(new TermQuery("text","sample"),null,null),0,10, 0, ImmutableMap.of("fetch_fields", "*")); SearchResult sr = srs.getResults().iterator().next(); assertEquals("document data modified. fetch_fields='*' should retrieve the same data.", sr.getFields(), doc.asMap()); } }