/* * Copyright (c) 2013 Websquared, Inc. * All rights reserved. This program and the accompanying materials * are made available under the terms of the GNU Public License v2.0 * which accompanies this distribution, and is available at * http://www.gnu.org/licenses/old-licenses/gpl-2.0.html * * Contributors: * swsong - initial API and implementation */ package org.fastcatsearch.task; import java.io.File; import org.apache.commons.io.FileUtils; import org.fastcatsearch.datasource.reader.AbstractDataSourceReader; import org.fastcatsearch.ir.IRService; import org.fastcatsearch.ir.analysis.AnalyzerPoolManager; import org.fastcatsearch.ir.common.IRException; import org.fastcatsearch.ir.config.CollectionConfig; import org.fastcatsearch.ir.config.CollectionContext; import org.fastcatsearch.ir.config.SingleSourceConfig; import org.fastcatsearch.ir.document.Document; import org.fastcatsearch.ir.index.SegmentWriter; import org.fastcatsearch.ir.settings.Schema; import org.fastcatsearch.ir.util.Formatter; import org.fastcatsearch.service.ServiceManager; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @Deprecated public class MakeIndexFileTask extends Task { private static Logger indexingLogger = LoggerFactory.getLogger("INDEXING_LOG"); public int makeIndex(String collectionId, File collectionHomeDir, Schema workSchema, File collectionDataDir, AbstractDataSourceReader sourceReader, File segmentDir) throws Exception { if(workSchema.getFieldSize() == 0){ throw new TaskException("["+collectionId+"] Full Indexing Canceled. Schema field is empty."); } //주키가 없으면 색인실패 // if(workSchema.getIndexID() == -1){ // throw new TaskException("컬렉션 스키마에 주키(Primary Key)를 설정해야합니다."); // } //FileUtils.deleteDirectory(collectionDataDir); FileUtils.forceDelete(collectionDataDir); indexingLogger.info("Segment Dir = "+segmentDir.getAbsolutePath()); SegmentWriter writer = null; int count = 0; try{ IRService irService = ServiceManager.getInstance().getService(IRService.class); CollectionContext collectionContext = irService.collectionContext(collectionId); AnalyzerPoolManager analyzerPoolManager = null; writer = new SegmentWriter(workSchema, segmentDir, collectionContext.indexConfig(), analyzerPoolManager); long startTime = System.currentTimeMillis(); long lapTime = startTime; while(sourceReader.hasNext()){ Document doc = sourceReader.nextDocument(); int lastDocNo = writer.addDocument(doc); if(lastDocNo % 10000 == 0){ logger.info("{} documents indexed, lap = {} ms, elapsed = {}, mem = {}", new Object[]{lastDocNo, System.currentTimeMillis() - lapTime, Formatter.getFormatTime(System.currentTimeMillis() - startTime), Formatter.getFormatSize(Runtime.getRuntime().totalMemory())}); lapTime = System.currentTimeMillis(); } } count = writer.getDocumentCount(); }catch(IRException e){ logger.error("SegmentWriter indexDocument Exception! "+e.getMessage(),e); throw e; }finally{ try{ if(writer != null){ writer.close(); } }catch(Exception e){ logger.error("Error while close segment writer! "+e.getMessage(),e); e.printStackTrace(); } } //FIXME int dupCount = 0;//writer.getDuplicateDocCount();//중복문서 삭제카운트 if(count == 0){ throw new TaskException("["+collectionId+"] Full Indexing Canceled due to no documents."); } return dupCount; } }