package edu.uncc.cs.watsonsim.index;
import java.io.IOException;
import java.nio.file.Path;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.search.similarities.BM25Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import edu.uncc.cs.watsonsim.Passage;
public class Lucene implements Segment {
private final IndexWriter index;
public Lucene(Path path) throws IOException {
/* Setup Lucene */
Directory dir = FSDirectory.open(path);
// here we are using a standard analyzer, there are a lot of analyzers available to our use.
Analyzer analyzer = new StandardAnalyzer();
IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
//this mode by default overwrites the previous index, not a very good option in real usage
iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
iwc.setSimilarity(new BM25Similarity());
index = new IndexWriter(dir, iwc);
}
public void accept(Passage p){
// Index with Lucene
Document doc = new Document();
doc.add(new TextField("title", p.title, Field.Store.NO));
doc.add(new TextField("text", p.text, Field.Store.YES));
doc.add(new StoredField("docno", p.reference));
try {
index.addDocument(doc);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
@Override
public void close() throws IOException {
index.close();
}
}