package lia.tools; /** * Copyright Manning Publications Co. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific lan */ import org.apache.lucene.search.Query; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.store.FSDirectory; import org.apache.lucene.document.Document; import org.apache.lucene.search.similar.MoreLikeThis; import org.apache.lucene.index.IndexReader; import java.io.File; // From chapter 8 public class BooksMoreLikeThis { public static void main(String[] args) throws Throwable { String indexDir = System.getProperty("index.dir"); FSDirectory directory = FSDirectory.open(new File(indexDir)); IndexReader reader = IndexReader.open(directory); IndexSearcher searcher = new IndexSearcher(reader); int numDocs = reader.maxDoc(); MoreLikeThis mlt = new MoreLikeThis(reader); // #A mlt.setFieldNames(new String[] {"title", "author"}); mlt.setMinTermFreq(1); // #B mlt.setMinDocFreq(1); for (int docID = 0; docID < numDocs; docID++) { // #C System.out.println(); Document doc = reader.document(docID); System.out.println(doc.get("title")); Query query = mlt.like(docID); // #D System.out.println(" query=" + query); TopDocs similarDocs = searcher.search(query, 10); if (similarDocs.totalHits == 0) System.out.println(" None like this"); for(int i=0;i<similarDocs.scoreDocs.length;i++) { if (similarDocs.scoreDocs[i].doc != docID) { // #E doc = reader.document(similarDocs.scoreDocs[i].doc); System.out.println(" -> " + doc.getField("title").stringValue()); } } } searcher.close(); reader.close(); directory.close(); } } /* #A Instantiate MoreLikeThis #B Lower default minimums #C Iterate through all docs in the index #D Build query to find similar documents #E Don't show the same document */