package com.tistory.devyongsik.analyzer.dictionaryindex; import java.io.IOException; import java.util.List; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.core.SimpleAnalyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.document.FieldType; import org.apache.lucene.index.FieldInfo.IndexOptions; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.index.IndexWriterConfig; import org.apache.lucene.index.IndexWriterConfig.OpenMode; import org.apache.lucene.search.SearcherFactory; import org.apache.lucene.search.SearcherManager; import org.apache.lucene.store.Directory; import org.apache.lucene.store.RAMDirectory; import org.apache.lucene.util.Version; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class SynonymDictionaryIndex { private Directory ramDirectory = new RAMDirectory(); private SearcherManager searcherManager = null; private Logger logger = LoggerFactory.getLogger(SynonymDictionaryIndex.class); private static SynonymDictionaryIndex indexingModule = new SynonymDictionaryIndex(); private IndexWriter indexWriter = null; private SynonymDictionaryIndex() { try { Analyzer analyzer = new SimpleAnalyzer(Version.LUCENE_44); //문서 내용을 분석 할 때 사용 될 Analyzer IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); iwc.setOpenMode(OpenMode.CREATE); indexWriter = new IndexWriter(ramDirectory, iwc); searcherManager = new SearcherManager(indexWriter,true, new SearcherFactory()); } catch (IOException e) { e.printStackTrace(); throw new IllegalStateException(); } } public static SynonymDictionaryIndex getIndexingModule() { return indexingModule; } public SearcherManager getSearcherManager() { return searcherManager; } public synchronized void indexingDictionary(List<String> synonyms) { try { indexWriter.deleteAll(); indexWriter.commit(); int recordCnt = 0; //동의어들을 ,로 잘라내어 색인합니다. //하나의 document에 syn이라는 이름의 필드를 여러개 추가합니다. //나중에 syn=노트북 으로 검색한다면 그때 나온 결과 Document로부터 //모든 동의어 리스트를 얻을 수 있습니다. FieldType fieldType = new FieldType(); fieldType.setIndexed(true); fieldType.setStored(true); fieldType.setIndexOptions(IndexOptions.DOCS_ONLY); fieldType.setTokenized(false); for(String syn : synonyms) { String[] synonymWords = syn.split(","); Document doc = new Document(); for(int i = 0, size = synonymWords.length; i < size ; i++) { String fieldValue = synonymWords[i]; Field field = new Field("syn", fieldValue, fieldType); doc.add(field); recordCnt++; }//end inner for indexWriter.addDocument(doc); }//end outer for indexWriter.commit(); logger.info("동의어 색인 단어 갯수 : {}", recordCnt); } catch (Exception e) { throw new IllegalStateException(); } } }