package org.infinispan.lucene.testutils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
/**
*
* Utilities to read and write Lucene indexes
*
* @author gustavonalle
* @since 7.0
*/
public class LuceneUtils {
private LuceneUtils() {
}
/**
* Read all terms from a field
*
* @param field the field in the document to load terms from
* @param directory Any directory implementation
* @return Unique terms represented as UTF-8
* @throws IOException
*/
public static Set<String> readTerms(String field, Directory directory) throws IOException {
try (DirectoryReader reader = DirectoryReader.open(directory)) {
Set<String> termStrings = new TreeSet<>();
for (LeafReaderContext atomicReaderContext : reader.leaves()) {
LeafReader atomicReader = atomicReaderContext.reader();
TermsEnum iterator = atomicReader.terms(field).iterator();
BytesRef next = iterator.next();
while (next != null) {
termStrings.add(iterator.term().utf8ToString());
next = iterator.next();
}
}
return termStrings;
}
}
/**
* Counts the documents
* @param directory Directory
* @return the number of docs,including all segments
* @throws IOException
*/
public static int numDocs(Directory directory) throws IOException {
try (DirectoryReader reader = DirectoryReader.open(directory)) {
return reader.numDocs();
}
}
/**
* Collect all documents from an index
* @param directory Directory
* @param limit maximum number of documents to collect
* @return List of Documents
* @throws IOException
*/
public static List<Document> collect(Directory directory, int limit) throws IOException {
try (DirectoryReader reader = DirectoryReader.open(directory)) {
MatchAllDocsQuery allDocsQuery = new MatchAllDocsQuery();
List<Document> docs = new ArrayList<>(limit);
IndexSearcher indexSearcher = new IndexSearcher(reader);
TopDocs topDocs = indexSearcher.search(allDocsQuery, limit);
for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
docs.add(indexSearcher.doc(scoreDoc.doc));
}
return docs;
}
}
}