SnippetSearcherTest.java example

Explorer
indextank-engine-master
/*
 * Copyright (c) 2011 LinkedIn, Inc
 * 
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */


package com.flaptor.indextank.search;

import static com.flaptor.util.TestInfo.TestType.SYSTEM;
import static com.flaptor.util.TestInfo.TestType.UNIT;

import java.io.File;
import java.io.IOException;

import com.flaptor.indextank.DocumentStoringIndexer;
import com.flaptor.indextank.IndexTankTestCase;
import com.flaptor.indextank.index.Document;
import com.flaptor.indextank.index.IndexEngine;
import com.flaptor.indextank.query.ParseException;
import com.flaptor.indextank.query.PrefixTermQuery;
import com.flaptor.indextank.query.AndQuery;
import com.flaptor.indextank.query.Query;
import com.flaptor.indextank.query.TermQuery;
import com.flaptor.util.FileUtil;
import com.flaptor.util.TestInfo;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Maps;

public class SnippetSearcherTest extends IndexTankTestCase {

	private File tempDir;
    private IndexEngine indexEngine;
    private DocumentStoringIndexer indexer;
	private SnippetSearcher searcher;

    @Override
	protected void setUp() throws Exception {
        super.setUp();
        this.tempDir = FileUtil.createTempDir("indextank","testcase");
        this.indexEngine = new IndexEngine(this.tempDir, 11234, 5, false, 5, IndexEngine.SuggestValues.NO, IndexEngine.StorageValues.RAM, 0, null, false, "dummyCode", "TEST-environment");
        this.indexer = new DocumentStoringIndexer(indexEngine.getIndexer(), indexEngine.getStorage());
        this.searcher = new SnippetSearcher(indexEngine.getSearcher(), indexEngine.getStorage(), indexEngine.getParser());
	}

    @Override
    protected void tearDown() throws Exception {
    	super.tearDown();
    }
    
	private void indexVeryBigDoc() {
		double timestampBoost = System.currentTimeMillis() / 1000.0;
		String docId = "docid";
		Document doc = new Document(ImmutableMap.of("text", largeText()));
		indexer.add(docId, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap());
	}
	private String largeText() {
		StringBuilder sb = new StringBuilder();
		for (int i = 0; i < 1000; i++) {
			for (int j = 0; j < 30; j++) {
				sb.append(' ');
				sb.append("term");
				sb.append(j + i*30);
			}
			sb.append('.');
		}
		return sb.toString();
	}

    private String multipleLines(){
        StringBuilder sb = new StringBuilder();
        for (int i = 0; i < 1000; i++) {
            sb.append("linestart");
            for (int j = 0; j < 30; j++) {
                sb.append(' ');
                sb.append("term");
                sb.append(j + i*30);
            }
            sb.append('\n');
        }
        return sb.toString();
    
    };


    @TestInfo(testType=SYSTEM)
    public void testSnippet() throws IOException, ParseException, InterruptedException {
    	indexVeryBigDoc();
    	long t = System.currentTimeMillis();
        SearchResults srs = searcher.search(new Query(new TermQuery("text","term29925"),null,null),0,10, 0, ImmutableMap.of("snippet_fields", "text"));
        srs.getResults().iterator().next();
        long dt = System.currentTimeMillis() - t;
        //this test is ignored since it fails on some machines
        //assertTrue("Snippetting took too long: " + dt + "ms. and the limit was 300ms.", dt < 300);
    }

    
    @TestInfo(testType=UNIT)
    public void testCompleteLines() throws IOException, InterruptedException {
		double timestampBoost = System.currentTimeMillis() / 1000.0;
        String docid = "docid";
        Document doc = new Document(ImmutableMap.of("text", multipleLines(), "title", "a headline!"));
		indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap());

        SearchResults srs = searcher.search(new Query(new TermQuery("text", "term29925"), null, null), 0, 10, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "lines"));
        SearchResult sr = srs.getResults().iterator().next();
        assertNotNull("sr is null!", sr);
        assertTrue("line does not start on linestart!", sr.getField("snippet_text").startsWith("linestart")); 
        assertEquals("line does not end on a newline!", sr.getField("snippet_text").charAt(sr.getField("snippet_text").length()-1), '\n'); 
    }


    @TestInfo(testType=UNIT)
    public void testEncodesHTMLonEnd() throws IOException, InterruptedException {
		double timestampBoost = System.currentTimeMillis() / 1000.0;
        String docid = "docid";

        // have & signs before tokens, > signs between them, and < signs after.
        Document doc = new Document(ImmutableMap.of("text", "contains &&& signs >>>> and stuff <.", "title", "a headline!"));
		indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap());
    
        SearchResults srs = searcher.search(new Query(new AndQuery( new TermQuery("text", "signs"), new TermQuery("text", "stuff")), null, null), 0, 10, 0, ImmutableMap.of("snippet_fields", "text","snippet_type", "lines"));
    
        SearchResult sr = srs.getResults().iterator().next();
        assertNotNull("sr is null!", sr);
        assertTrue("signs not highlighted!", sr.getField("snippet_text").contains("<b>signs</b>"));
        assertTrue("ampersands not encoded!", sr.getField("snippet_text").contains("&"));
        assertTrue("greater-than signs not encoded!", sr.getField("snippet_text").contains(">"));
        assertTrue("less-than signs not encoded!", sr.getField("snippet_text").contains("<"));
    }

    @TestInfo(testType=UNIT)
    public void testTokenizingChangesTokenLength() throws IOException, InterruptedException, ParseException {
        double timestampBoost = System.currentTimeMillis() / 1000.0;
        String docid = "docid";
        // \u00df is 'LATIN SMALL LETTER SHARP S'
        // ASCIIFoldingFilter converts it from 'ß' to 'ss'
        // see http://www.fileformat.info/info/unicode/char/df/index.htm
        String text = "Clown Ferdinand und der Fu\u00dfball player";
        Document doc = new Document(ImmutableMap.of("text", text));
        indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap());

        String queryText = "fussball";
        Query query = new Query(new TermQuery("text", queryText), queryText, null);

        SearchResults srs = searcher.search(query, 0, 1, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "html"));
        SearchResult sr = srs.getResults().iterator().next();
        String snippet = sr.getField("snippet_text");
        assertNotNull("Snippet is null", snippet);
        assertTrue("Search term not highlighted", snippet.contains("<b>Fußball</b>"));
        assertTrue("Snippet lost space before highlighted term", snippet.contains("der "));
        assertTrue("Snippet lost space after highlighted term: " + snippet, snippet.contains(" player"));

        query = new Query(new PrefixTermQuery("text", "fu"), "fu*", null);

        srs = searcher.search(query, 0, 1, 0, ImmutableMap.of("snippet_fields", "text", "snippet_type", "html"));
        sr = srs.getResults().iterator().next();
        snippet = sr.getField("snippet_text");
        assertNotNull("Snippet is null", snippet);
        assertTrue("Search term not highlighted", snippet.contains("<b>Fußball</b>"));
        assertTrue("Snippet lost space before highlighted term", snippet.contains("der "));
        assertTrue("Snippet lost space after highlighted term", snippet.contains(" player"));
    }

    @TestInfo(testType=UNIT)
    public void testFetchAll() throws IOException, InterruptedException {
		double timestampBoost = System.currentTimeMillis() / 1000.0;
        String docid = "docid";
        Document doc = new Document(ImmutableMap.of("text", "this is a sample text", "title", "a headline!"));
        indexer.add(docid, doc, (int)timestampBoost, Maps.<Integer, Double>newHashMap());
        SearchResults srs = searcher.search(new Query(new TermQuery("text","sample"),null,null),0,10, 0, ImmutableMap.of("fetch_fields", "*"));
        SearchResult sr = srs.getResults().iterator().next();
        assertEquals("document data modified. fetch_fields='*' should retrieve the same data.", sr.getFields(), doc.asMap());
    }

}