package i5.las2peer.services.ocd.utils; import java.io.*; import java.nio.file.Path; import java.util.HashMap; import java.util.Map; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.DelegatingAnalyzerWrapper; import org.apache.lucene.analysis.core.StopAnalyzer; import org.apache.lucene.analysis.core.WhitespaceAnalyzer; import org.apache.lucene.analysis.en.EnglishAnalyzer; import org.apache.lucene.analysis.en.PorterStemFilter; import org.apache.lucene.analysis.miscellaneous.PerFieldAnalyzerWrapper; import org.apache.lucene.analysis.standard.StandardAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.document.StringField; import org.apache.lucene.document.TextField; import org.apache.lucene.index.CorruptIndexException; import org.apache.lucene.index.IndexOptions; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.store.SimpleFSDirectory; import org.apache.lucene.util.Version; public class DocIndexer { private String indexPath; public DocIndexer(String pathIndex){ this.indexPath = pathIndex; } public void indexDoc(String docid, String docContent) throws IOException{ Path f = new File(indexPath).toPath(); try{ SimpleFSDirectory dir = new SimpleFSDirectory(f); //only stopword removal //IndexWriter iW = new IndexWriter(dir, new IndexWriterConfig(new StopAnalyzer())); //stopword removal and stemming using Porter Stemmer IndexWriter iW = new IndexWriter(dir, new IndexWriterConfig(new EnglishAnalyzer())); Document doc = new Document(); final FieldType fieldType = new FieldType(); //fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); fieldType.setStored(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); doc.add(new Field("doccontent", docContent, fieldType)); doc.add(new Field("docid", docid, fieldType)); iW.addDocument(doc); iW.close(); }catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } public void indexDocPerField(String docid, String docContent) throws IOException{ Path f = new File(indexPath).toPath(); try{ SimpleFSDirectory dir = new SimpleFSDirectory(f); //only stopword removal //IndexWriter iW = new IndexWriter(dir, new IndexWriterConfig(new StopAnalyzer())); //stopword removal and stemming using Porter Stemmer Map<String, Analyzer> analyzerPerField = new HashMap<String,Analyzer>(); analyzerPerField.put("docid", new WhitespaceAnalyzer()); analyzerPerField.put("doccontent", new EnglishAnalyzer()); PerFieldAnalyzerWrapper analyzer = new PerFieldAnalyzerWrapper(new WhitespaceAnalyzer(),analyzerPerField); IndexWriter iW = new IndexWriter(dir, new IndexWriterConfig(analyzer)); Document doc = new Document(); final FieldType fieldType = new FieldType(); //fieldType.setIndexed(true); fieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); fieldType.setStored(true); fieldType.setStoreTermVectors(true); fieldType.setTokenized(true); doc.add(new Field("doccontent", docContent, fieldType)); doc.add(new Field("docid", docid, fieldType)); iW.addDocument(doc); iW.close(); }catch (CorruptIndexException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } } }