package org.fastcatsearch.ir;
import org.apache.commons.io.FileUtils;
import org.fastcatsearch.datasource.reader.DataSourceReader;
import org.fastcatsearch.datasource.reader.DefaultDataSourceReaderFactory;
import org.fastcatsearch.ir.analysis.AnalyzerPoolManager;
import org.fastcatsearch.ir.common.IRException;
import org.fastcatsearch.ir.common.IndexingType;
import org.fastcatsearch.ir.common.SettingException;
import org.fastcatsearch.ir.config.CollectionContext;
import org.fastcatsearch.ir.config.CollectionIndexStatus.IndexStatus;
import org.fastcatsearch.ir.config.DataInfo.RevisionInfo;
import org.fastcatsearch.ir.config.DataInfo.SegmentInfo;
import org.fastcatsearch.ir.config.DataSourceConfig;
import org.fastcatsearch.ir.config.IndexConfig;
import org.fastcatsearch.ir.document.Document;
import org.fastcatsearch.ir.index.*;
import org.fastcatsearch.ir.settings.Schema;
import org.fastcatsearch.ir.settings.SchemaSetting;
import org.fastcatsearch.ir.util.Formatter;
import org.fastcatsearch.job.indexing.IndexingStopException;
import org.fastcatsearch.job.state.IndexingTaskState;
import org.fastcatsearch.util.CollectionContextUtil;
import org.fastcatsearch.util.FilePaths;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
public class MultiThreadCollectionFullIndexer implements CollectionIndexerable {
protected static final Logger logger = LoggerFactory.getLogger(MultiThreadCollectionFullIndexer.class);
protected CollectionContext collectionContext;
protected AnalyzerPoolManager analyzerPoolManager;
protected DataSourceReader dataSourceReader;
protected long startTime;
protected IndexingTaskState indexingTaskState;
protected DeleteIdSet deleteIdSet; //삭제문서리스트. 외부에서 source reader를 통해 셋팅된다.
protected IndexWriteInfoList indexWriteInfoList;
protected List<SegmentInfo> workingSegmentInfoList;
// protected SegmentInfo workingSegmentInfo;
protected int count;
protected long lapTime;
protected boolean stopRequested;
protected SelectedIndexList selectedIndexList;// 색인필드 선택사항.
protected int segmentSize; //동시에 분할 생성할 segment 갯수.
private BlockingQueue<Document> documentQueue;
private CountDownLatch latch;
private List<SegmentIndexWriteConsumer> consumerList;
public MultiThreadCollectionFullIndexer(CollectionContext collectionContext, AnalyzerPoolManager analyzerPoolManager) throws IRException {
this(collectionContext, analyzerPoolManager, null);
}
public MultiThreadCollectionFullIndexer(CollectionContext collectionContext, AnalyzerPoolManager analyzerPoolManager, SelectedIndexList selectedIndexList) throws IRException {
this.collectionContext = collectionContext;
this.analyzerPoolManager = analyzerPoolManager;
this.selectedIndexList = selectedIndexList;
this.segmentSize = collectionContext.collectionConfig().getFullIndexingSegmentSize();
init(collectionContext.schema());
}
protected DataSourceReader createDataSourceReader(File filePath, SchemaSetting schemaSetting) throws IRException{
DataSourceConfig dataSourceConfig = collectionContext.dataSourceConfig();
return DefaultDataSourceReaderFactory.createFullIndexingSourceReader(collectionContext.collectionId(), filePath, schemaSetting, dataSourceConfig);
}
protected boolean done(RevisionInfo revisionInfo, IndexStatus indexStatus) throws IRException, IndexingStopException {
int insertCount = revisionInfo.getInsertCount();
if (insertCount > 0 && !stopRequested) {
//이미 동일한 revinfo이므로 재셋팅필요없다.
//workingSegmentInfo.updateRevision(revisionInfo);
int baseNumber = 0;
SegmentInfo workingSegmentInfo = null;
for (int inx = 0; inx < segmentSize; inx++) {
workingSegmentInfo = workingSegmentInfoList.get(inx);
//문서수가 스레드 수보다 작은경우 발생할 수 있는 오류 제어.
if(workingSegmentInfo.getRevisionInfo().getDocumentCount() == 0) {
break;
}
workingSegmentInfo.setBaseNumber(baseNumber);
//update index#/info.xml file
//addindexing의 updateCollection대신 호출.
collectionContext.addSegmentInfo(workingSegmentInfo);
logger.debug("Add Segment info = {}", workingSegmentInfo);
baseNumber = workingSegmentInfo.getNextBaseNumber();
}
//update status.xml file
collectionContext.updateCollectionStatus(IndexingType.FULL, revisionInfo, startTime, System.currentTimeMillis());
collectionContext.indexStatus().setFullIndexStatus(indexStatus);
return true;
}else{
if(!stopRequested){
logger.info("[{}] Indexing Canceled due to no documents.", collectionContext.collectionId());
throw new IndexingStopException(collectionContext.collectionId()+" Indexing Canceled due to no documents.");
}else{
logger.info("[{}] Indexing Canceled due to Stop Requested!", collectionContext.collectionId());
throw new IndexingStopException(collectionContext.collectionId()+" Indexing Canceled due to Stop Requested");
}
}
}
protected IndexWritable createIndexWriter(Schema schema, File segmentDir, RevisionInfo revisionInfo, IndexConfig indexConfig) throws IRException {
return new SegmentWriter(schema, segmentDir, revisionInfo, indexConfig, analyzerPoolManager, selectedIndexList);
}
public void init(Schema schema) throws IRException {
prepare();
documentQueue = new LinkedBlockingQueue<Document>(10);
latch = new CountDownLatch(segmentSize);
FilePaths dataFilePaths = collectionContext.collectionFilePaths().dataPaths();
int dataSequence = collectionContext.getIndexSequence();
consumerList = new ArrayList<SegmentIndexWriteConsumer>();
IndexConfig indexConfig = collectionContext.indexConfig();
for (int inx = 0; inx < segmentSize; inx++) {
SegmentInfo workingSegmentInfo = workingSegmentInfoList.get(inx);
logger.debug("WorkingSegmentInfo-{} = {}", inx, workingSegmentInfo);
String segmentId = workingSegmentInfo.getId();
RevisionInfo revisionInfo = workingSegmentInfo.getRevisionInfo();
File segmentDir = dataFilePaths.segmentFile(dataSequence, segmentId);
logger.info("Segment Dir = {}", segmentDir.getAbsolutePath());
IndexWritable indexWriter = createIndexWriter(schema, segmentDir, revisionInfo, indexConfig);
consumerList.add(new SegmentIndexWriteConsumer(segmentId, indexWriter, documentQueue, latch));
}
File filePath = collectionContext.collectionFilePaths().file();
dataSourceReader = createDataSourceReader(filePath, schema.schemaSetting());
indexWriteInfoList = new IndexWriteInfoList();
startTime = System.currentTimeMillis();
}
protected void prepare() throws IRException {
workingSegmentInfoList = new ArrayList<SegmentInfo>(segmentSize);
SegmentInfo workingSegmentInfo = new SegmentInfo();
workingSegmentInfoList.add(workingSegmentInfo);
//순차적 세그먼트 info 를 할당한다.
for (int inx = 1; inx < segmentSize; inx++) {
workingSegmentInfo = workingSegmentInfo.getNextSegmentInfo();
workingSegmentInfoList.add(workingSegmentInfo);
}
// data 디렉토리를 변경한다.
int newDataSequence = collectionContext.nextDataSequence();
// 디렉토리 초기화.
File indexDataDir = collectionContext.collectionFilePaths().dataPaths().indexDirFile(newDataSequence);
try {
//FileUtils.deleteDirectory(indexDataDir);
if(indexDataDir.exists()) {
FileUtils.forceDelete(indexDataDir);
}
} catch (IOException e) {
throw new IRException(e);
}
collectionContext.clearDataInfoAndStatus();
indexDataDir.mkdirs();
}
// public void addDocument(int threadInx, Document document) throws IRException, IOException{
// indexWriterList.get(threadInx).addDocument(document);
// if (count.incrementAndGet() % 10000 == 0) {
// logger.info(
// "{} documents indexed, lap = {} ms, elapsed = {}, mem = {}",
// count, System.currentTimeMillis() - lapTime,
// Formatter.getFormatTime(System.currentTimeMillis() - startTime),
// Formatter.getFormatSize(Runtime.getRuntime().totalMemory()));
// lapTime = System.currentTimeMillis();
// }
// if(indexingTaskState != null){
// indexingTaskState.incrementDocumentCount();
// }
// }
@Override
public void requestStop(){
logger.info("Collection [{}] Indexer Stop Requested! ", collectionContext.collectionId());
stopRequested = true;
}
//색인취소(0건)이면 false;
@Override
public boolean close() throws IRException, SettingException, IndexingStopException {
RevisionInfo revisionInfo = new RevisionInfo();
for (int inx = 0; inx < segmentSize; inx++) {
try {
IndexWritable indexWriter = consumerList.get(inx).getWriter();
indexWriter.close();
if(indexWriter instanceof WriteInfoLoggable)
((WriteInfoLoggable) indexWriter).getIndexWriteInfo(indexWriteInfoList);
} catch (IOException e) {
throw new IRException(e);
}
RevisionInfo subRevisionInfo = workingSegmentInfoList.get(inx).getRevisionInfo();
revisionInfo.add(subRevisionInfo);
logger.debug("revisionInfo#{} > {}", inx, revisionInfo);
}
dataSourceReader.close();
logger.debug("##Indexer close {}", revisionInfo);
deleteIdSet = dataSourceReader.getDeleteList();
int deleteCount = 0;
if(deleteIdSet != null) {
deleteCount = deleteIdSet.size();
}
revisionInfo.setDeleteCount(deleteCount);
long endTime = System.currentTimeMillis();
IndexStatus indexStatus = new IndexStatus(revisionInfo.getDocumentCount(), revisionInfo.getInsertCount(), revisionInfo.getUpdateCount(), deleteCount,
Formatter.formatDate(new Date(startTime)), Formatter.formatDate(new Date(endTime)), Formatter.getFormatTime(endTime - startTime));
if(done(revisionInfo, indexStatus)){
CollectionContextUtil.saveCollectionAfterIndexing(collectionContext);
}else{
//저장하지 않음.
}
return true;
}
public IndexWriteInfoList indexWriteInfoList() {
return indexWriteInfoList;
}
@Override
public void doIndexing() throws IRException, IOException {
indexingTaskState.setStep(IndexingTaskState.STEP_INDEXING);
for (SegmentIndexWriteConsumer consumer : consumerList) {
consumer.start();
}
try {
lapTime = System.currentTimeMillis();
while (dataSourceReader.hasNext()) {
if (stopRequested) {
break;
}
Document document = dataSourceReader.nextDocument();
documentQueue.put(document);
count++;
if (count % 10000 == 0) {
logger.info("{} documents indexed, lap = {} ms, elapsed = {}, mem = {}", count, System.currentTimeMillis() - lapTime,
Formatter.getFormatTime(System.currentTimeMillis() - startTime), Formatter.getFormatSize(Runtime.getRuntime().totalMemory()));
lapTime = System.currentTimeMillis();
}
if (indexingTaskState != null) {
indexingTaskState.incrementDocumentCount();
}
}
for (SegmentIndexWriteConsumer consumer : consumerList) {
consumer.requestDone();
}
latch.await();
} catch (Exception e) {
throw new IRException(e);
}
}
public DeleteIdSet deleteIdSet() {
return deleteIdSet;
}
public void setTaskState(IndexingTaskState indexingTaskState) {
this.indexingTaskState = indexingTaskState;
}
}