ClusterSearchJob.java example

Explorer
fastcatsearch-master
package org.fastcatsearch.job.search;

import org.fastcatsearch.cluster.Node;
import org.fastcatsearch.cluster.NodeService;
import org.fastcatsearch.control.ResultFuture;
import org.fastcatsearch.error.ServerErrorCode;
import org.fastcatsearch.error.SearchError;
import org.fastcatsearch.exception.FastcatSearchException;
import org.fastcatsearch.ir.IRService;
import org.fastcatsearch.ir.common.IRException;
import org.fastcatsearch.ir.config.CollectionContext;
import org.fastcatsearch.ir.group.GroupResult;
import org.fastcatsearch.ir.group.GroupResults;
import org.fastcatsearch.ir.group.GroupsData;
import org.fastcatsearch.ir.io.FixedHitReader;
import org.fastcatsearch.ir.query.*;
import org.fastcatsearch.ir.search.*;
import org.fastcatsearch.ir.settings.Schema;
import org.fastcatsearch.job.Job;
import org.fastcatsearch.job.internal.InternalDocumentSearchJob;
import org.fastcatsearch.job.internal.InternalSearchJob;
import org.fastcatsearch.query.QueryMap;
import org.fastcatsearch.query.QueryParser;
import org.fastcatsearch.service.ServiceManager;
import org.fastcatsearch.transport.vo.StreamableDocumentResult;
import org.fastcatsearch.transport.vo.StreamableInternalSearchResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.*;

/**
 * 검색을 수행하여 병합까지 마치는 broker 검색작업.
 * */
public class ClusterSearchJob extends Job {

	private static final long serialVersionUID = 2375551165135599911L;
	protected static Logger searchLogger = LoggerFactory.getLogger("SEARCH_LOG");

	@Override
	public JobResult doRun() throws FastcatSearchException {

		long st = System.nanoTime();
		QueryMap queryMap = (QueryMap) getArgs();
		boolean noCache = false;
		String collectionId = null;
		String searchKeyword = null;
		boolean isCache = false;
		Result searchResult = null;
		try {
            Query q = QueryParser.getInstance().parseQuery(queryMap);

			Metadata meta = q.getMeta();
			QueryModifier queryModifier = meta.queryModifier();
			//쿼리모디파이.
			if (queryModifier != null) {
				q = queryModifier.modify(meta.collectionId(), q);
				meta = q.getMeta();
			}
			
			collectionId = meta.collectionId();
			if(collectionId == null) {
//				return new JobResult(new QueryParseException("cn cannot be empty."));
				throw new SearchError(ServerErrorCode.QUERY_SYNTAX_ERROR, "cn cannot be empty.");
			}
			searchKeyword = meta.getUserData("KEYWORD");
			// no cache 옵션이 없으면 캐시를 확인한다.
			if (meta.isSearchOption(Query.SEARCH_OPT_NOCACHE)) {
				noCache = true;
			}
	
			IRService irService = ServiceManager.getInstance().getService(IRService.class);
			if (!noCache) {
				Result result = irService.searchCache().get(queryMap.queryString());
				// logger.debug("CACHE_GET result>>{}, qr >>{}", result, queryMap.queryString());
				if (result != null) {
					isCache = true;
					searchResult = result;
					return new JobResult(result);
				}
			}
			
			NodeService nodeService = ServiceManager.getInstance().getService(NodeService.class);

			Groups groups = q.getGroups();

			String[] collectionIdList = collectionId.split(",");
			if(collectionIdList.length > 1) {
				shuffleCollectionList(collectionIdList);
			}
			
			// CollectionContext collectionContext = irService.collectionContext(collectionId);
			// 무조건 첫번째 context사용. 모든 컬렉션이 동일하다고 가정.
			CollectionContext collectionContext = irService.collectionContext(collectionIdList[0]);

			ResultFuture[] resultFutureList = new ResultFuture[collectionIdList.length];
			Map<String, Integer> collectionNumberMap = new HashMap<String, Integer>();
			Node[] selectedNodeList = new Node[collectionIdList.length];

			boolean forMerging = collectionIdList.length > 1;
			for (int i = 0; i < collectionIdList.length; i++) {
				String id = collectionIdList[i];
				if(irService.collectionHandler(id) == null) {
					throw new SearchError(ServerErrorCode.COLLECTION_NOT_FOUND, id);
				}
				collectionNumberMap.put(id, i);

				Node dataNode = nodeService.getBalancedNode(id);
				if (dataNode == null) {
					// 적합한 살아있는 노드를 찾지못함.
					logger.error("Not Found Node for {}", id);
					continue;
				}
				selectedNodeList[i] = dataNode;

				QueryMap newQueryMap = queryMap.clone();
				newQueryMap.setId(id);
				logger.debug("query-{} {} >> {}", i, id, newQueryMap);
				// collectionId가 하나이상이면 머징을 해야한다.
				InternalSearchJob job = new InternalSearchJob(newQueryMap, forMerging);
				resultFutureList[i] = nodeService.sendRequest(dataNode, job);
                // 노드 접속불가일경우 resultFutureList[i]가 null로 리턴됨.
                if (resultFutureList[i] == null) {
                    throw new SearchError(ServerErrorCode.DATA_NODE_CONNECTION_ERROR, dataNode.toString() );
                }
			}

			List<InternalSearchResult> resultList = new ArrayList<InternalSearchResult>(collectionIdList.length);
			HighlightInfo highlightInfo = null;

			for (int i = 0; i < collectionIdList.length; i++) {
				Object obj = resultFutureList[i].take();
				if (!resultFutureList[i].isSuccess()) {
                    if (obj instanceof SearchError) {
                        throw (SearchError) obj;
                    } else if (obj instanceof Throwable) {
                        throw new FastcatSearchException((Throwable) obj);
                    } else {
                        throw new FastcatSearchException("Error while searching.", obj);
                    }
				}

				StreamableInternalSearchResult obj2 = (StreamableInternalSearchResult) obj;
				InternalSearchResult internalSearchResult = obj2.getInternalSearchResult();
				internalSearchResult.setNodeId(selectedNodeList[i].id());
				resultList.add(internalSearchResult);

				// TODO highlightInfo 들을 머지해야하나?
				highlightInfo = internalSearchResult.getHighlightInfo();

			}

			//
			// collectionIdList 내의 스키마는 동일하다는 가정하에 진행한다. collectionIdList[0] 의 스키마를 가져온다.
			//

			Schema schema = collectionContext.schema();
			SearchResultAggregator aggregator = new SearchResultAggregator(q, schema);
			InternalSearchResult aggregatedSearchResult = aggregator.aggregate(resultList);
			int totalSize = aggregatedSearchResult.getTotalCount();
			List<Explanation> explanations = aggregatedSearchResult.getExplanations();

			// /
			// / 컬렉션별 도큐먼트를 가져와서 완전한 결과객체를 만든다.
			//

			// internalSearchResult의 결과를 보면서 컬렉션 별로 분류한다.
			int realSize = aggregatedSearchResult.getCount();
			DocIdList[] docIdList = new DocIdList[collectionIdList.length];
			int[] collectionTags = new int[realSize]; // 해당 문서가 어느 collection에 속하는지 알려주는 항목.
//			ArrayDeque<Integer>[] eachScores = new ArrayDeque[collectionIdList.length];
            int[] eachScores = new int[realSize];
            int[] eachHits = new int[realSize];
            float[] eachDistance = new float[realSize];
            int[] bundleTotalSizeList = new int[realSize];
			List<RowExplanation>[] rowExplanationsList = null;

			if(explanations != null){
				rowExplanationsList = new List[realSize];
			}
			
			for (int i = 0; i < collectionIdList.length; i++) {
				docIdList[i] = new DocIdList(realSize);
//				eachScores[i] = new ArrayDeque<Integer>(realSize);
			}

			int idx = 0;
			FixedHitReader hitReader = aggregatedSearchResult.getFixedHitReader();
			while (hitReader.next()) {
				HitElement el = hitReader.read();
				int collectionNo = collectionNumberMap.get(el.collectionId());
//				logger.debug("## {}", el.docNo());

//				if(el.getBundleDocIdList() != null) {
//					logger.debug("--bundle---");
//					DocIdList list = el.getBundleDocIdList();
//					for(int i=0;i<list.size(); i++) {
//						logger.debug("  >>> [{}] {}", list.segmentSequence(i), list.docNo(i));
//					}
//				}
				
				//묶음 문서 존재시 같이 넣어준다.
				docIdList[collectionNo].add(el.segmentSequence(), el.docNo(), el.getBundleDocIdList());
//				eachScores[collectionNo].add(el.score());
                eachScores[idx] = el.score();
                eachHits[idx] = el.hit();
                eachDistance[idx] = el.distance();
                bundleTotalSizeList[idx] = el.getTotalBundleSize();

				collectionTags[idx] = collectionNo;
				if(rowExplanationsList != null){
					rowExplanationsList[idx] = el.rowExplanations();
				}
				idx++;
			}

			// document 요청을 보낸다.
			resultFutureList = new ResultFuture[collectionIdList.length];
			ViewContainer views = q.getViews();
			String[] tags = q.getMeta().tags();
			for (int i = 0; i < collectionIdList.length; i++) {
				String cid = collectionIdList[i];
				Node dataNode = selectedNodeList[i];

				logger.debug("collection [{}] search at {}", cid, dataNode);

				InternalDocumentSearchJob job = new InternalDocumentSearchJob(cid, docIdList[i], views, tags, highlightInfo);
				resultFutureList[i] = nodeService.sendRequest(dataNode, job);
                if (resultFutureList[i] == null) {
                    throw new SearchError(ServerErrorCode.DATA_NODE_CONNECTION_ERROR, dataNode.toString() );
                }
			}

			// document 결과를 받는다.
			DocumentResult[] docResultList = new DocumentResult[collectionIdList.length];

			for (int i = 0; i < collectionIdList.length; i++) {
				String cid = collectionIdList[i];
				Object obj = resultFutureList[i].take();
				if (!resultFutureList[i].isSuccess()) {
                    if (obj instanceof SearchError) {
                        throw (SearchError) obj;
                    } else if (obj instanceof Throwable) {
                        throw new FastcatSearchException((Throwable) obj);
                    } else {
                        throw new FastcatSearchException("Error while searching.", obj);
                    }
				}

				StreamableDocumentResult obj2 = (StreamableDocumentResult) obj;
				DocumentResult documentResult = obj2.documentResult();
				if (documentResult != null) {
					docResultList[i] = documentResult;
				} else {
					logger.warn("{}의 documentList가 null입니다.", cid);
				}
			}
			String[] fieldIdList = docResultList[0].fieldIdList();
			Row[] rows = new Row[realSize];
			Row[][] bundleRows = null;
			for (int i = 0; i < realSize; i++) {
				int collectionNo = collectionTags[i];
				DocumentResult documentResult = docResultList[collectionNo];
				rows[i] = documentResult.row();
				Row[] bundleRow = documentResult.bundleRow();
				if(bundleRow != null) {
					if(bundleRows == null) {
						bundleRows = new Row[realSize][];
					}
					bundleRows[i] = bundleRow;
				}
//				int score = eachScores[collectionNo].pop();
                int score = eachScores[i];
				rows[i].setScore(score);
                rows[i].setHit(eachHits[i]);
                rows[i].setDistance(eachDistance[i]);
				
				documentResult.next();
			}

			//TODO row별과 통합 explain결과 포함시킨다.
			
			/*
			 * Group Result
			 */
			GroupsData groupsData = aggregatedSearchResult.getGroupsData();
			GroupResults groupResults = null;
			if (aggregatedSearchResult.getGroupsData() != null) {
				groupResults = groups.getGroupResultsGenerator().generate(groupsData);
			}

			searchResult = new Result(rows, bundleRows, bundleTotalSizeList, groupResults, fieldIdList, realSize, totalSize, meta.start(), explanations, rowExplanationsList);

			ResultModifier resultModifier = meta.resultModifier();
			if(resultModifier != null){
				/*
				* 2016-05-27 전제현
				* 결과모디파이어 modify 사용 시 키워드, 컬렉션명, 하이라이팅 태그를 매개변수로 받도록 수정
				* (별도로 모디파이어를 작성하여 사용하지 않으면 이전과 달라지지 않음)
				* */
				searchResult = resultModifier.modify(searchResult, searchKeyword, queryMap.get("cn"), queryMap.get("ht"));
			}
			
			if(!noCache && realSize > 0){
				irService.searchCache().put(queryMap.queryString(), searchResult);
			}
//			logger.debug("CACHE_PUT result>>{}, qr >>{}", searchResult, queryMap.queryString());

//			logger.debug("ClusterSearchJob 수행시간 : {}", Strings.getHumanReadableTimeInterval((System.nanoTime() - st) / 1000000));
			return new JobResult(searchResult);
		}catch(IRException e){
			throw new FastcatSearchException(e);
		} finally {
			//로깅은 반드시 수행한다.
			writeSearchLog(collectionId, searchKeyword, searchResult, (System.nanoTime() - st) / 1000000, isCache);
		}
	}

	// Fisher-Yates shuffle
	Random random = new Random(System.nanoTime());
	private void shuffleCollectionList(String[] collectionId) {
		for (int i = collectionId.length - 1; i > 0; i--) {
			int index = random.nextInt(i + 1);
			// Simple swap
			String t = collectionId[index];
			collectionId[index] = collectionId[i];
			collectionId[i] = t;
		}
	}
	
	private static String LOG_DELIMITER = "\t";
	private static String CACHE = "[cache]";
	private static String NOCACHE = "[nocache]";
	
	protected void writeSearchLog(String collectionId, String searchKeyword, Object obj, long searchTime, boolean isCache) {
		int count = -1;
		int totalCount = -1;
		GroupResults groupResults = null;

		if (obj instanceof Result) {
			Result result = (Result) obj;
			count = result.getCount();
			totalCount = result.getTotalCount();
			groupResults = result.getGroupResult();
		}

		StringBuffer logBuffer = new StringBuffer();

		logBuffer.append(isCache ? CACHE : NOCACHE);
		logBuffer.append(LOG_DELIMITER);
		
		logBuffer.append(collectionId);
		logBuffer.append(LOG_DELIMITER);

		logBuffer.append(searchKeyword);
		logBuffer.append(LOG_DELIMITER);

		logBuffer.append(searchTime);
		logBuffer.append(LOG_DELIMITER);

		logBuffer.append(count);
		logBuffer.append(LOG_DELIMITER);

		logBuffer.append(totalCount);

		if (groupResults != null) {
			logBuffer.append(LOG_DELIMITER);
			int groupSize = groupResults.groupSize();
			for (int i = 0; i < groupSize; i++) {
				GroupResult groupResult = groupResults.getGroupResult(i);
				if (i > 0) {
					logBuffer.append(";");
				}
				logBuffer.append(groupResult.size());
			}
		}
		searchLogger.info(logBuffer.toString());
	}
}