package org.fastcatsearch.job.management; import java.io.IOException; import java.util.ArrayList; import java.util.HashSet; import java.util.List; import java.util.Set; import org.fastcatsearch.common.io.Streamable; import org.fastcatsearch.exception.FastcatSearchException; import org.fastcatsearch.ir.IRService; import org.fastcatsearch.ir.config.DataInfo.SegmentInfo; import org.fastcatsearch.ir.document.Document; import org.fastcatsearch.ir.document.DocumentReader; import org.fastcatsearch.ir.field.Field; import org.fastcatsearch.ir.io.BytesDataOutput; import org.fastcatsearch.ir.io.DataInput; import org.fastcatsearch.ir.io.DataOutput; import org.fastcatsearch.ir.search.CollectionHandler; import org.fastcatsearch.ir.search.SegmentReader; import org.fastcatsearch.ir.search.SegmentSearcher; import org.fastcatsearch.ir.settings.FieldSetting; import org.fastcatsearch.ir.settings.IndexSetting; import org.fastcatsearch.ir.settings.Schema; import org.fastcatsearch.ir.settings.SchemaSetting; import org.fastcatsearch.job.Job; import org.fastcatsearch.service.ServiceManager; import org.fastcatsearch.vo.CollectionIndexData; import org.fastcatsearch.vo.CollectionIndexData.RowData; public class GetCollectionIndexDataJob extends Job implements Streamable { private static final long serialVersionUID = 1123665008671820737L; private String collectionId; private int start; private int end; private String pkValue; public GetCollectionIndexDataJob() {} public GetCollectionIndexDataJob(String collectionId, int start, int end, String pkValue) { this.collectionId = collectionId; this.start = start; this.end = end; this.pkValue = pkValue; } @Override public JobResult doRun() throws FastcatSearchException { IRService irService = ServiceManager.getInstance().getService(IRService.class); CollectionHandler collectionHandler = irService.collectionHandler(collectionId); if(collectionHandler == null || !collectionHandler.isLoaded()){ CollectionIndexData data = new CollectionIndexData(collectionId, 0, new ArrayList<String>(), new ArrayList<RowData>(), new ArrayList<Boolean>()); return new JobResult(data); } int segmentSize = collectionHandler.segmentSize(); List<String> fieldList = new ArrayList<String>(); List<RowData> indexDataList = new ArrayList<RowData>(); List<Boolean> isDeletedList = new ArrayList<Boolean>(); int documentSize = 0; try { SchemaSetting schemaSetting = collectionHandler.schema().schemaSetting(); List<FieldSetting> fieldSettingList = schemaSetting.getFieldSettingList(); for (int i = 0; i < fieldSettingList.size(); i++) { FieldSetting fieldSetting = fieldSettingList.get(i); String fieldId = fieldSetting.getId(); fieldList.add(fieldId); } if(pkValue != null && pkValue.length() > 0) { if(schemaSetting.getPrimaryKeySetting().getFieldList() != null && schemaSetting.getPrimaryKeySetting().getFieldList().size() > 0) { String[] pkList = pkValue.split("\\W"); BytesDataOutput tempOutput = new BytesDataOutput(); int count = 0; Set<String> dupSet = new HashSet<String>(); for(String pk : pkList) { pk = pk.trim(); if(pk.length() == 0) { continue; } if(dupSet.contains(pk)){ continue; }else{ dupSet.add(pk); } for (int segmentNumber = segmentSize - 1; segmentNumber >= 0; segmentNumber--) { SegmentReader segmentReader = collectionHandler.segmentReader(segmentNumber); int docNo = segmentReader.newSearchIndexesReader().getPrimaryKeyIndexesReader().getDocNo(pk, tempOutput); if (docNo != -1) { // logger.debug(">>> {} , doc={}~ {}", count, start, end); if(count >= start && count <= end) { Document document = collectionHandler.segmentReader(segmentNumber).segmentSearcher().getDocument(docNo); if(document != null) { isDeletedList.add(segmentReader.deleteSet().isSet(docNo)); add(document, String.valueOf(segmentNumber), indexDataList); } } documentSize++; count++; } } } } }else{ //이 배열의 index번호는 세그먼트번호. int[] segmentEndNumbers = new int[segmentSize]; for (int segmentNumber = 0; segmentNumber < segmentSize; segmentNumber++) { SegmentReader reader = collectionHandler.segmentReader(segmentNumber); DocumentReader documentReader = reader.newDocumentReader(); int count = documentReader.getDocumentCount(); documentSize += count; segmentEndNumbers[segmentNumber] = documentReader.getBaseNumber() + documentReader.getDocumentCount() - 1; logger.debug("segmentEndNumbers[{}]={}", segmentNumber, segmentEndNumbers[segmentNumber]); } //여러세그먼트에 걸쳐있을 경우를 고려한다. int[][] matchSegmentList = matchSegment(segmentEndNumbers, start, end - start + 1); //write data for (int i = 0; i < matchSegmentList.length; i++) { int segmentNumber = matchSegmentList[i][0]; int startNo = matchSegmentList[i][1]; int endNo = matchSegmentList[i][2]; SegmentReader segmentReader = collectionHandler.segmentReader(segmentNumber); if (segmentReader != null) { SegmentInfo segmentInfo = segmentReader.segmentInfo(); String segmentId = segmentInfo.getId(); SegmentSearcher segmentSearcher = segmentReader.segmentSearcher(); for (int docNo = startNo; docNo <= endNo; docNo++) { Document document = segmentSearcher.getDocument(docNo); if(document == null){ //문서의 끝에 다다름. break; } isDeletedList.add(segmentReader.deleteSet().isSet(docNo)); add(document, segmentId, indexDataList); } } else { logger.debug("segmentReader is NULL"); } } } CollectionIndexData data = new CollectionIndexData(collectionId, documentSize, fieldList, indexDataList, isDeletedList); return new JobResult(data); } catch (Throwable t) { logger.error("", t); CollectionIndexData data = new CollectionIndexData(collectionId, 0, null, null, null); return new JobResult(data); } } private void add(Document document, String segmentId, List<RowData> indexDataList) { int fieldSize = document.size(); String[][] fieldData = new String[fieldSize][]; for (int index = 0; index < fieldSize; index++) { Field field = document.get(index); fieldData[index] = new String[] { field.getId(), field.toString() }; } RowData rowData = new RowData(segmentId, fieldData); indexDataList.add(rowData); } private int[][] matchSegment(int[] segEndNums, int start, int rows) { // [][세그먼트번호,시작번호,끝번호] ArrayList<int[]> list = new ArrayList<int[]>(); for (int i = 0; i < segEndNums.length; i++) { if (start > segEndNums[i]) { start = start - segEndNums[i] - 1; } else { int[] res = new int[3]; int emptyCount = segEndNums[i] - start + 1; res[0] = i;// 세그먼트번호 if (emptyCount < rows) { res[1] = start;// 시작번호 res[2] = segEndNums[i]; start = 0; rows = rows - emptyCount; list.add(res); } else { res[1] = start;// 시작번호 res[2] = start + rows - 1;// 끝번호 list.add(res); break; } } } int[][] result = new int[list.size()][3]; for (int i = 0; i < list.size(); i++) { int[] tmp = list.get(i); result[i][0] = tmp[0]; result[i][1] = tmp[1]; result[i][2] = tmp[2]; } return result; } @Override public void readFrom(DataInput input) throws IOException { collectionId = input.readString(); start = input.readInt(); end = input.readInt(); pkValue = input.readString(); } @Override public void writeTo(DataOutput output) throws IOException { output.writeString(collectionId); output.writeInt(start); output.writeInt(end); output.writeString(pkValue); } }