HighlightIt.java example

Explorer
l4ia-master
- src
  - lia
package lia.tools;

/**
 * Copyright Manning Publications Co.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific lan      
*/

import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.search.Query;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.SimpleSpanFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.util.Version;

import java.io.FileWriter;
import java.io.StringReader;

// From chapter 8
public class HighlightIt {
  private static final String text =
    "In this section we'll show you how to make the simplest " +
    "programmatic query, searching for a single term, and then " +
    "we'll see how to use QueryParser to accept textual queries. " +
    "In the sections that follow, we’ll take this simple example " +
    "further by detailing all the query types built into Lucene. " +
    "We begin with the simplest search of all: searching for all " +
    "documents that contain a single term.";

  public static void main(String[] args) throws Exception {

    if (args.length != 1) {
      System.err.println("Usage: HighlightIt <filename-out>");
      System.exit(-1);
    }

    String filename = args[0];

    String searchText = "term";                               // #1
    QueryParser parser = new QueryParser(Version.LUCENE_30,      // #1
                                         "f",                         // #1
                                         new StandardAnalyzer(Version.LUCENE_30));// #1
    Query query = parser.parse(searchText);                           // #1

    SimpleHTMLFormatter formatter =                                   // #2
      new SimpleHTMLFormatter("<span class=\"highlight\">",           // #2
                              "</span>");                             // #2

    TokenStream tokens = new StandardAnalyzer(Version.LUCENE_30)  // #3
        .tokenStream("f", new StringReader(text));                    // #3

    QueryScorer scorer = new QueryScorer(query, "f");                 // #4

    Highlighter highlighter = new Highlighter(formatter, scorer);     // #5
    highlighter.setTextFragmenter(                                    // #6
                  new SimpleSpanFragmenter(scorer));                  // #6

    String result =                                                   // #7
        highlighter.getBestFragments(tokens, text, 3, "...");         // #7

    FileWriter writer = new FileWriter(filename);                     // #8
    writer.write("<html>");                                           // #8
    writer.write("<style>\n" +                                        // #8
        ".highlight {\n" +                                            // #8
        " background: yellow;\n" +                                    // #8
        "}\n" +                                                       // #8
        "</style>");                                                  // #8
    writer.write("<body>");                                           // #8
    writer.write(result);                                             // #8
    writer.write("</body></html>");                                   // #8
    writer.close();                                                   // #8
  }
}

/*
#1 Create the query
#2 Customize surrounding tags
#3 Tokenize text
#4 Create QueryScorer
#5 Create highlighter
#6 Use SimpleSpanFragmenter to fragment
#7 Highlight best 3 fragments
#8 Write highlighted HTML
*/