package org.fastcatsearch.ir; import java.io.File; import java.io.IOException; import java.util.Date; import org.fastcatsearch.datasource.reader.DataSourceReader; import org.fastcatsearch.ir.analysis.AnalyzerPoolManager; import org.fastcatsearch.ir.common.IRException; import org.fastcatsearch.ir.common.SettingException; import org.fastcatsearch.ir.config.CollectionContext; import org.fastcatsearch.ir.config.CollectionIndexStatus.IndexStatus; import org.fastcatsearch.ir.config.DataInfo.RevisionInfo; import org.fastcatsearch.ir.config.DataInfo.SegmentInfo; import org.fastcatsearch.ir.config.IndexConfig; import org.fastcatsearch.ir.document.Document; import org.fastcatsearch.ir.index.DeleteIdSet; import org.fastcatsearch.ir.index.IndexWritable; import org.fastcatsearch.ir.index.IndexWriteInfoList; import org.fastcatsearch.ir.index.SegmentWriter; import org.fastcatsearch.ir.index.SelectedIndexList; import org.fastcatsearch.ir.index.WriteInfoLoggable; import org.fastcatsearch.ir.settings.Schema; import org.fastcatsearch.ir.settings.SchemaSetting; import org.fastcatsearch.ir.util.Formatter; import org.fastcatsearch.job.indexing.IndexingStopException; import org.fastcatsearch.job.state.IndexingTaskState; import org.fastcatsearch.util.CollectionContextUtil; import org.fastcatsearch.util.FilePaths; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public abstract class AbstractCollectionIndexer implements CollectionIndexerable { protected static final Logger logger = LoggerFactory.getLogger(AbstractCollectionIndexer.class); protected CollectionContext collectionContext; protected AnalyzerPoolManager analyzerPoolManager; protected DataSourceReader dataSourceReader; protected long startTime; protected IndexingTaskState indexingTaskState; protected DeleteIdSet deleteIdSet; //삭제문서리스트. 외부에서 source reader를 통해 셋팅된다. protected IndexWriteInfoList indexWriteInfoList; protected IndexWritable indexWriter; protected SegmentInfo workingSegmentInfo; protected int count; protected long lapTime; protected boolean stopRequested; protected SelectedIndexList selectedIndexList;// 색인필드 선택사항. public AbstractCollectionIndexer(CollectionContext collectionContext, AnalyzerPoolManager analyzerPoolManager) { this(collectionContext, analyzerPoolManager, null); } public AbstractCollectionIndexer(CollectionContext collectionContext, AnalyzerPoolManager analyzerPoolManager, SelectedIndexList selectedIndexList) { this.collectionContext = collectionContext; this.analyzerPoolManager = analyzerPoolManager; this.selectedIndexList = selectedIndexList; } protected abstract DataSourceReader createDataSourceReader(File filePath, SchemaSetting schemaSetting) throws IRException; protected abstract void prepare() throws IRException; protected abstract boolean done(RevisionInfo revisionInfo, IndexStatus indexStatus) throws IRException, IndexingStopException; protected IndexWritable createIndexWriter(Schema schema, File segmentDir, RevisionInfo revisionInfo, IndexConfig indexConfig) throws IRException { return new SegmentWriter(schema, segmentDir, revisionInfo, indexConfig, analyzerPoolManager, selectedIndexList); } public void init(Schema schema) throws IRException { prepare(); FilePaths dataFilePaths = collectionContext.collectionFilePaths().dataPaths(); int dataSequence = collectionContext.getIndexSequence(); IndexConfig indexConfig = collectionContext.indexConfig(); logger.debug("WorkingSegmentInfo = {}", workingSegmentInfo); String segmentId = workingSegmentInfo.getId(); RevisionInfo revisionInfo = workingSegmentInfo.getRevisionInfo(); File segmentDir = dataFilePaths.segmentFile(dataSequence, segmentId); logger.info("Segment Dir = {}", segmentDir.getAbsolutePath()); File filePath = collectionContext.collectionFilePaths().file(); dataSourceReader = createDataSourceReader(filePath, schema.schemaSetting()); indexWriter = createIndexWriter(schema, segmentDir, revisionInfo, indexConfig); indexWriteInfoList = new IndexWriteInfoList(); startTime = System.currentTimeMillis(); } public void addDocument(Document document) throws IRException, IOException{ indexWriter.addDocument(document); count++; if (count % 10000 == 0) { logger.info( "{} documents indexed, lap = {} ms, elapsed = {}, mem = {}", count, System.currentTimeMillis() - lapTime, Formatter.getFormatTime(System.currentTimeMillis() - startTime), Formatter.getFormatSize(Runtime.getRuntime().totalMemory())); lapTime = System.currentTimeMillis(); } if(indexingTaskState != null){ indexingTaskState.incrementDocumentCount(); } } @Override public void requestStop(){ logger.info("Collection [{}] Indexer Stop Requested! ", collectionContext.collectionId()); stopRequested = true; } //색인취소(0건)이면 false; @Override public boolean close() throws IRException, SettingException, IndexingStopException { RevisionInfo revisionInfo = workingSegmentInfo.getRevisionInfo(); if (indexWriter != null) { try { indexWriter.close(); if(indexWriter instanceof WriteInfoLoggable) ((WriteInfoLoggable) indexWriter).getIndexWriteInfo(indexWriteInfoList); } catch (IOException e) { throw new IRException(e); } } dataSourceReader.close(); logger.debug("##Indexer close {}", revisionInfo); deleteIdSet = dataSourceReader.getDeleteList(); int deleteCount = 0; if(deleteIdSet != null) { deleteCount = deleteIdSet.size(); } revisionInfo.setDeleteCount(deleteCount); long endTime = System.currentTimeMillis(); IndexStatus indexStatus = new IndexStatus(revisionInfo.getDocumentCount(), revisionInfo.getInsertCount(), revisionInfo.getUpdateCount(), deleteCount, Formatter.formatDate(new Date(startTime)), Formatter.formatDate(new Date(endTime)), Formatter.getFormatTime(endTime - startTime)); if(done(revisionInfo, indexStatus)){ CollectionContextUtil.saveCollectionAfterIndexing(collectionContext); }else{ //저장하지 않음. } return true; } public IndexWriteInfoList indexWriteInfoList() { return indexWriteInfoList; } @Override public void doIndexing() throws IRException, IOException { indexingTaskState.setStep(IndexingTaskState.STEP_INDEXING); lapTime = System.currentTimeMillis(); while (dataSourceReader.hasNext()) { if(stopRequested){ break; } Document document = dataSourceReader.nextDocument(); // logger.debug("doc >> {}", document); addDocument(document); } } public DeleteIdSet deleteIdSet() { return deleteIdSet; } public void setTaskState(IndexingTaskState indexingTaskState) { this.indexingTaskState = indexingTaskState; } }