package org.fastcatsearch.ir;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import org.fastcatsearch.datasource.reader.AbstractDataSourceReader;
import org.fastcatsearch.ir.analysis.AnalyzerPoolManager;
import org.fastcatsearch.ir.common.IRException;
import org.fastcatsearch.ir.common.SettingException;
import org.fastcatsearch.ir.config.CollectionContext;
import org.fastcatsearch.ir.config.CollectionIndexStatus.IndexStatus;
import org.fastcatsearch.ir.config.DataInfo.RevisionInfo;
import org.fastcatsearch.ir.config.DataInfo.SegmentInfo;
import org.fastcatsearch.ir.config.IndexConfig;
import org.fastcatsearch.ir.document.Document;
import org.fastcatsearch.ir.document.DocumentWriter;
import org.fastcatsearch.ir.index.DeleteIdSet;
import org.fastcatsearch.ir.index.IndexWriteInfoList;
import org.fastcatsearch.ir.index.SegmentWriter;
import org.fastcatsearch.ir.settings.Schema;
import org.fastcatsearch.ir.settings.SchemaSetting;
import org.fastcatsearch.ir.util.Formatter;
import org.fastcatsearch.job.state.IndexingTaskState;
import org.fastcatsearch.util.CollectionContextUtil;
import org.fastcatsearch.util.FilePaths;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public abstract class AbstractCollectionDocumentStorer implements CollectionIndexerable {
protected static final Logger logger = LoggerFactory.getLogger(AbstractCollectionDocumentStorer.class);
protected CollectionContext collectionContext;
protected AbstractDataSourceReader dataSourceReader;
protected long startTime;
protected IndexingTaskState indexingTaskState;
protected DeleteIdSet deleteIdSet; //삭제문서리스트. 외부에서 source reader를 통해 셋팅된다.
protected DocumentWriter documentWriter;
protected SegmentInfo workingSegmentInfo;
protected int count;
protected long lapTime;
protected boolean stopRequested;
public AbstractCollectionDocumentStorer(CollectionContext collectionContext) {
this.collectionContext = collectionContext;
}
protected abstract AbstractDataSourceReader createDataSourceReader(File filePath, SchemaSetting schemaSetting) throws IRException;
protected abstract void prepare() throws IRException;
protected abstract boolean done(RevisionInfo revisionInfo, IndexStatus indexStatus) throws IRException;
public void init(Schema schema) throws IRException {
prepare();
FilePaths dataFilePaths = collectionContext.collectionFilePaths().dataPaths();
int dataSequence = collectionContext.getIndexSequence();
IndexConfig indexConfig = collectionContext.indexConfig();
logger.debug("WorkingSegmentInfo = {}", workingSegmentInfo);
String segmentId = workingSegmentInfo.getId();
RevisionInfo revisionInfo = workingSegmentInfo.getRevisionInfo();
File segmentDir = dataFilePaths.segmentFile(dataSequence, segmentId);
logger.info("Segment Dir = {}", segmentDir.getAbsolutePath());
File filePath = collectionContext.collectionFilePaths().file();
dataSourceReader = createDataSourceReader(filePath, schema.schemaSetting());
try{
documentWriter = new DocumentWriter(schema.schemaSetting(), segmentDir, revisionInfo, indexConfig);
}catch(Exception e){
throw new IRException(e);
}
startTime = System.currentTimeMillis();
}
public void addDocument(Document document) throws IRException, IOException{
documentWriter.write(document);
count++;
if (count % 10000 == 0) {
logger.info(
"{} documents stored, lap = {} ms, elapsed = {}, mem = {}",
count, System.currentTimeMillis() - lapTime,
Formatter.getFormatTime(System.currentTimeMillis() - startTime),
Formatter.getFormatSize(Runtime.getRuntime().totalMemory()));
lapTime = System.currentTimeMillis();
}
if(indexingTaskState != null){
indexingTaskState.incrementDocumentCount();
}
}
@Override
public void requestStop(){
logger.info("Collection [{}] Document Store Stop Requested! ", collectionContext.collectionId());
stopRequested = true;
}
//색인취소(0건)이면 false;
@Override
public boolean close() throws IRException, SettingException{
RevisionInfo revisionInfo = workingSegmentInfo.getRevisionInfo();
if (documentWriter != null) {
try {
documentWriter.close();
// documentWriter.getIndexWriteInfo(indexWriteInfoList);
} catch (IOException e) {
throw new IRException(e);
}
}
dataSourceReader.close();
logger.debug("##Indexer close {}", revisionInfo);
deleteIdSet = dataSourceReader.getDeleteList();
int deleteCount = deleteIdSet.size();
revisionInfo.setDocumentCount(count);
revisionInfo.setInsertCount(count);
revisionInfo.setDeleteCount(deleteCount);
revisionInfo.setCreateTime(Formatter.formatDate());
long endTime = System.currentTimeMillis();
IndexStatus indexStatus = new IndexStatus(revisionInfo.getDocumentCount(), revisionInfo.getInsertCount(), revisionInfo.getUpdateCount(), deleteCount,
Formatter.formatDate(new Date(startTime)), Formatter.formatDate(new Date(endTime)), Formatter.getFormatTime(endTime - startTime));
logger.debug("CLOSE >> indexStatus > {}", indexStatus);
done(revisionInfo, indexStatus);
return true;
}
@Override
public void doIndexing() throws IRException, IOException {
lapTime = System.currentTimeMillis();
while (dataSourceReader.hasNext()) {
if(stopRequested){
break;
}
Document document = dataSourceReader.nextDocument();
// logger.debug("doc >> {}", document);
addDocument(document);
}
}
public DeleteIdSet deleteIdSet() {
return deleteIdSet;
}
public void setState(IndexingTaskState indexingTaskState) {
this.indexingTaskState = indexingTaskState;
}
}