LuceneUtils.java example

Explorer
infinispan-master
package org.infinispan.lucene.testutils;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;

/**
 *
 * Utilities to read and write Lucene indexes
 *
 * @author gustavonalle
 * @since 7.0
 */
public class LuceneUtils {

   private LuceneUtils() {
   }

   /**
    * Read all terms from a field
    *
    * @param field the field in the document to load terms from
    * @param directory Any directory implementation
    * @return Unique terms represented as UTF-8
    * @throws IOException
    */
   public static Set<String> readTerms(String field, Directory directory) throws IOException {
      try (DirectoryReader reader = DirectoryReader.open(directory)) {
         Set<String> termStrings = new TreeSet<>();
         for (LeafReaderContext atomicReaderContext : reader.leaves()) {
            LeafReader atomicReader = atomicReaderContext.reader();
            TermsEnum iterator = atomicReader.terms(field).iterator();
            BytesRef next = iterator.next();
            while (next != null) {
               termStrings.add(iterator.term().utf8ToString());
               next = iterator.next();
            }
         }
         return termStrings;
      }
   }

   /**
    * Counts the documents
    * @param directory Directory
    * @return the number of docs,including all segments
    * @throws IOException
    */
   public static int numDocs(Directory directory) throws IOException {
      try (DirectoryReader reader = DirectoryReader.open(directory)) {
         return reader.numDocs();
      }
   }

   /**
    * Collect all documents from an index
    * @param directory Directory
    * @param limit maximum number of documents to collect
    * @return List of Documents
    * @throws IOException
    */
   public static List<Document> collect(Directory directory, int limit) throws IOException {
      try (DirectoryReader reader = DirectoryReader.open(directory)) {
         MatchAllDocsQuery allDocsQuery = new MatchAllDocsQuery();
         List<Document> docs = new ArrayList<>(limit);
         IndexSearcher indexSearcher = new IndexSearcher(reader);
         TopDocs topDocs = indexSearcher.search(allDocsQuery, limit);
         for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
            docs.add(indexSearcher.doc(scoreDoc.doc));
         }
         return docs;
      }
   }


}