GetSearchHitTask.java example

Explorer
documentr-master
- de.blizzy.documentr
  - src
/*
documentr - Edit, maintain, and present software documentation on the web.
Copyright (C) 2012-2013 Maik Schreiber

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
*/
package de.blizzy.documentr.search;

import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.Callable;

import org.apache.commons.lang3.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLEncoder;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.search.highlight.TokenSources;

import com.google.common.collect.Lists;

import de.blizzy.documentr.util.Util;

class GetSearchHitTask implements Callable<SearchHit> {
	private static final int NUM_FRAGMENTS = 5;
	private static final int FRAGMENT_SIZE = 50;

	private Query query;
	private IndexReader reader;
	private int docId;
	private Analyzer analyzer;

	GetSearchHitTask(Query query, IndexReader reader, int docId, Analyzer analyzer) {
		this.query = query;
		this.reader = reader;
		this.docId = docId;
		this.analyzer = analyzer;
	}

	@Override
	public SearchHit call() throws IOException {
		Formatter formatter = new SimpleHTMLFormatter("<strong>", "</strong>"); //$NON-NLS-1$ //$NON-NLS-2$
		Scorer scorer = new QueryScorer(query);
		Highlighter highlighter = new Highlighter(formatter, scorer);
		highlighter.setTextFragmenter(new SimpleFragmenter(FRAGMENT_SIZE));
		highlighter.setEncoder(new SimpleHTMLEncoder());

		Document doc = reader.document(docId);
		String projectName = doc.get(PageIndex.PROJECT);
		String branchName = doc.get(PageIndex.BRANCH);
		String path = doc.get(PageIndex.PATH);
		String title = doc.get(PageIndex.TITLE);
		String text = doc.get(PageIndex.TEXT);
		String[] tagsArray = doc.getValues(PageIndex.TAG);
		List<String> tags = Lists.newArrayList(tagsArray);
		Collections.sort(tags);
		TokenStream tokenStream = null;
		String highlightedText = StringUtils.EMPTY;
		try {
			tokenStream = TokenSources.getAnyTokenStream(reader, docId, PageIndex.TEXT, doc, analyzer);
			String[] fragments = highlighter.getBestFragments(tokenStream, text, NUM_FRAGMENTS);
			cleanupFragments(fragments);
			highlightedText = Util.join(fragments, " <strong>...</strong> "); //$NON-NLS-1$
		} catch (InvalidTokenOffsetsException e) {
			// ignore
		} finally {
			Util.closeQuietly(tokenStream);
		}
		return new SearchHit(projectName, branchName, path, title, highlightedText, tags);
	}

	private void cleanupFragments(String[] fragments) {
		for (int i = 0; i < fragments.length; i++) {
			fragments[i] = fragments[i].replaceAll("^[,\\.]+", StringUtils.EMPTY).trim(); //$NON-NLS-1$
		}
	}
}