package com.sohospace.lucene.analysis.xanalyzer; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexReader; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.TermPositionVector; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.Hits; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.Searcher; import org.apache.lucene.search.highlight.Formatter; import org.apache.lucene.search.highlight.Highlighter; import org.apache.lucene.search.highlight.QueryScorer; import org.apache.lucene.search.highlight.SimpleFragmenter; import org.apache.lucene.search.highlight.TokenGroup; import org.apache.lucene.search.highlight.TokenSources; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; public class TestHighLight { private static String FIELD_NAME = "content"; private static String CONTENT = "Since development first began on " + "Spring in 2003, there's been a constant buzz about it in " + "Java development publications and corporate IT departments." + " The reason is clear: Spring is a lightweight Java framework " + "in a world of complex heavyweight architectures that take " + "forever to implement. Spring is like a breath of fresh air " + "to overworked developers. In Spring, you can make an object " + "secure, remote, or transactional, with a couple of lines of " + "configuration instead of embedded code. The resulting application" + " is simple and clean. In Spring, you can work less and go home " + "early, because you can strip away a whole lot of the redundant " + "code that you tend to see in most J2EE applications. You won't" + " be nearly as burdened with meaningless detail. In Spring, you" + " can change your mind without the consequences bleeding through " + "your entire application. You'll adapt much more quickly than you " + "ever could before. Spring: A Developer's Notebook offers a quick " + "dive into the new Spring framework, designed to let you get " + "hands-on as quickly as you like. If you don't want to bother with" + " a lot of theory, this book is definitely for you. You'll work " + "through one example after another. Along the way, you'll discover " + "the energy and promise of the Spring framework. This practical guide " + "features ten code-intensive labs that'll rapidly get you up to speed." + " You'll learn how to do the following, and more: install the Spring" + " Framework set up the development environment use Spring with other " + "open source Java tools such as Tomcat, Struts, and Hibernate master " + "AOP and transactions utilize ORM solutions As with all titles in the" + " Developer's Notebook series, this no-nonsense book skips all the " + "boring prose and cuts right to the chase. It's an approach that " + "forces you to get your hands dirty by working through one instructional" + " example after another-examples that speak to you instead of at you. "; private static String QUERY = "tomcat"; /** * @param args */ public static void main(String[] args) { Directory ramDir = new RAMDirectory(); try { IndexWriter writer = new IndexWriter(ramDir, /*new StandardAnalyzer()/*/XFactory.getWriterAnalyzer()); Document doc = new Document(); Field fd = new Field(FIELD_NAME, CONTENT, Field.Store.YES, Field.Index.TOKENIZED,Field.TermVector.WITH_POSITIONS_OFFSETS); doc.add(fd); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(ramDir); String queryString = QUERY; QueryParser parser=new QueryParser(FIELD_NAME, /*new StandardAnalyzer()/*/XFactory.getWriterAnalyzer()); Query query = parser.parse(queryString); System.out.println(query); Searcher searcher = new IndexSearcher(ramDir); query = query.rewrite(reader); System.out.println(query); System.out.println("Searching for: " + query.toString(FIELD_NAME)); Hits hits = searcher.search(query); BoldFormatter formatter = new BoldFormatter(); Highlighter highlighter =new Highlighter(formatter,new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(50)); for (int i = 0; i < hits.length(); i++) { String text = hits.doc(i).get(FIELD_NAME); int maxNumFragmentsRequired = 5; String fragmentSeparator = "..."; TermPositionVector tpv = (TermPositionVector)reader.getTermFreqVector(hits.id(i),FIELD_NAME); TokenStream tokenStream=TokenSources.getTokenStream(tpv); /* TokenStream tokenStream2= (new StandardAnalyzer()) //XFactory.getWriterAnalyzer() .tokenStream(FIELD_NAME,new StringReader(text)); do { Token t = tokenStream2.next(); if(t==null)break; System.out.println("\t" + t.startOffset() + "," + t.endOffset() + "\t" + t.termText()); }while(true); */ String result = highlighter.getBestFragments( tokenStream, text, maxNumFragmentsRequired, fragmentSeparator); System.out.println("\n" + result); } reader.close(); }catch(Exception e) { e.printStackTrace(); } } } class BoldFormatter implements Formatter { public String highlightTerm(String originalText , TokenGroup group) { if(group.getTotalScore()<=0) { return originalText; } return "<b>" + originalText + "</b>"; } }