package lia.tools;
/**
* Copyright Manning Publications Co.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific lan
*/
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;
import java.io.FileWriter;
import java.io.StringReader;
// From chapter 8
public class HighlightIt {
private static final String text =
"In this section we'll show you how to make the simplest " +
"programmatic query, searching for a single term, and then " +
"we'll see how to use QueryParser to accept textual queries. " +
"In the sections that follow, we’ll take this simple example " +
"further by detailing all the query types built into Lucene. " +
"We begin with the simplest search of all: searching for all " +
"documents that contain a single term.";
public static void main(String[] args) throws Exception {
if (args.length != 1) {
System.err.println("Usage: HighlightIt <filename-out>");
System.exit(-1);
}
String filename = args[0];
String searchText = "term"; // #1
QueryParser parser = new QueryParser(Version.LUCENE_30, // #1
"f", // #1
new StandardAnalyzer(Version.LUCENE_30));// #1
Query query = parser.parse(searchText); // #1
SimpleHTMLFormatter formatter = // #2
new SimpleHTMLFormatter("<span class=\"highlight\">", // #2
"</span>"); // #2
TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30) // #3
.tokenStream("f", new StringReader(text)); // #3
QueryScorer scorer = new QueryScorer(query, "f"); // #4
Highlighter highlighter = new Highlighter(formatter, scorer); // #5
highlighter.setTextFragmenter( // #6
new SimpleSpanFragmenter(scorer)); // #6
String result = // #7
highlighter.getBestFragments(tokens, text, 3, "..."); // #7
FileWriter writer = new FileWriter(filename); // #8
writer.write("<html>"); // #8
writer.write("<style>\n" + // #8
".highlight {\n" + // #8
" background: yellow;\n" + // #8
"}\n" + // #8
"</style>"); // #8
writer.write("<body>"); // #8
writer.write(result); // #8
writer.write("</body></html>"); // #8
writer.close(); // #8
}
}
/*
#1 Create the query
#2 Customize surrounding tags
#3 Tokenize text
#4 Create QueryScorer
#5 Create highlighter
#6 Use SimpleSpanFragmenter to fragment
#7 Highlight best 3 fragments
#8 Write highlighted HTML
*/