package com.tistory.devyongsik.analyzer.dictionaryindex;
import java.io.IOException;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.search.SearcherFactory;
import org.apache.lucene.search.SearcherManager;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SynonymDictionaryIndex {
private Directory ramDirectory = new RAMDirectory();
private SearcherManager searcherManager = null;
private Logger logger = LoggerFactory.getLogger(SynonymDictionaryIndex.class);
private static SynonymDictionaryIndex indexingModule = new SynonymDictionaryIndex();
private IndexWriter indexWriter = null;
private SynonymDictionaryIndex() {
try {
Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_36); //문서 내용을 분석 할 때 사용 될 Analyzer
IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_36, analyzer);
iwc.setOpenMode(OpenMode.CREATE);
indexWriter = new IndexWriter(ramDirectory, iwc);
searcherManager = new SearcherManager(indexWriter,true, new SearcherFactory());
} catch (IOException e) {
e.printStackTrace();
throw new IllegalStateException();
}
}
public static SynonymDictionaryIndex getIndexingModule() {
return indexingModule;
}
public SearcherManager getSearcherManager() {
return searcherManager;
}
public synchronized void indexingDictionary(List<String> synonyms) {
try {
indexWriter.deleteAll();
indexWriter.commit();
int recordCnt = 0;
//동의어들을 ,로 잘라내어 색인합니다.
//하나의 document에 syn이라는 이름의 필드를 여러개 추가합니다.
//나중에 syn=노트북 으로 검색한다면 그때 나온 결과 Document로부터
//모든 동의어 리스트를 얻을 수 있습니다.
for(String syn : synonyms) {
String[] synonymWords = syn.split(",");
Document doc = new Document();
for(int i = 0, size = synonymWords.length; i < size ; i++) {
String fieldValue = synonymWords[i];
Field field = new Field("syn",fieldValue,Store.YES,Index.NOT_ANALYZED_NO_NORMS, TermVector.NO);
doc.add(field);
recordCnt++;
}//end inner for
indexWriter.addDocument(doc);
}//end outer for
indexWriter.commit();
logger.info("동의어 색인 단어 갯수 : {}", recordCnt);
} catch (Exception e) {
throw new IllegalStateException();
}
}
}