SequenceProcessor.java example

Explorer
hsearch-master
- src
/*
* Copyright 2010 The Apache Software Foundation
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.outpipe;

import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;

import com.bizosys.oneline.ApplicationFault;
import com.bizosys.oneline.SystemFault;
import com.bizosys.oneline.conf.Configuration;
import com.bizosys.oneline.pipes.PipeOut;
import com.bizosys.oneline.services.async.AsyncProcessor;

import com.bizosys.hsearch.index.TermList;
import com.bizosys.hsearch.query.HQuery;
import com.bizosys.hsearch.query.QueryContext;
import com.bizosys.hsearch.query.QueryPlanner;
import com.bizosys.hsearch.query.QueryTerm;

/**
 * Process each keyword of the given query in multiple steps
 * Must terms gets processes sequentially with search with in IDs
 * Multiple Optional terms gets processes parallely.
 * 
 * All non existing IDs are marked as -1.
 * @author karan
 *
 */
public class SequenceProcessor implements PipeOut{
	
	public SequenceProcessor() {
	}	

	public boolean visit(Object objQuery) throws ApplicationFault, SystemFault {
		HQuery query = (HQuery) objQuery;
		QueryContext ctx = query.ctx;
		QueryPlanner planner = query.planner;
		if ( null == planner.sequences) return false;
		
		try {
			List<byte[]> findWithinBuckets = null;
			QueryTerm lastMustQuery = null;
			
			for (List<QueryTerm> step : planner.sequences) {
				if ( 0 == step.size()) continue;
				if ( 1 == step.size()) { //Intermediatelt only 1 Term (Inline call)
					QueryTerm curQuery = step.get(0);
					if ( null != ctx.docTypeCode ) curQuery.docTypeCode = ctx.docTypeCode;
					SequenceProcessorFindHBase bucketId = new SequenceProcessorFindHBase(curQuery,findWithinBuckets);
					if ( null != lastMustQuery ) bucketId.setFilterByIds(lastMustQuery);
					bucketId.call();

					if ( ! curQuery.isOptional ) {
						findWithinBuckets = bucketId.foundBuckets;
						lastMustQuery = curQuery;
					}
				
				} else { //Lastly multiple Optional terms Process parallely
					List<SequenceProcessorFindHBase> findIdJobs = new ArrayList<SequenceProcessorFindHBase>(step.size()); 
					for(QueryTerm term : step) {
						SequenceProcessorFindHBase bucketId = new SequenceProcessorFindHBase(term,findWithinBuckets);
						if ( null != lastMustQuery ) bucketId.setFilterByIds(lastMustQuery);
						findIdJobs.add(bucketId);
					}
					AsyncProcessor.getInstance().getThreadPool().invokeAll(findIdJobs);
				}
			}
			
			intersectMustQs(planner, lastMustQuery);
			subsetOptQs(planner, lastMustQuery);
			
		} catch (InterruptedException ex) {
			String msg = ( null == planner) ? "Empty Planner" : planner.toString(); 
			OutpipeLog.l.fatal("Interrupted @ SequenceProcessor > " + msg, ex);
			return false;
		} catch (Exception ex) {
			String msg = ( null == planner) ? "Empty Planner" : planner.toString(); 
			OutpipeLog.l.fatal("Failed @ SequenceProcessor > " + msg, ex);
			return false;
		}
		
		return true;
	}

	/**
	 * This subsets across all MUST queries.
	 * Last 2 must queries are already in sync from the processing.
	 * @param planner
	 * @param lastMustQuery
	 */
	private void intersectMustQs(QueryPlanner planner, QueryTerm lastMustQuery) {
		if ( null == lastMustQuery) return;
		int stepsT = planner.sequences.size();
		
		for ( int step = stepsT - 1; step > -1; step--) {

			/**
			 * More than 1 means optional
			 */
			List<QueryTerm> curStep = planner.sequences.get(step);
			if ( curStep.size() != 1) continue; 
			
			/**
			 * Look for must only
			 */
			QueryTerm curQuery = curStep.get(0);
			if ( curQuery.isOptional) continue;
			
			/**
			 * Last must query - Already processed
			 */
			if ( lastMustQuery == curQuery) continue;

			/**
			 * Remove the buckets which are absent and then IDs
			 */
			Map<Long, TermList> curBuckets = curQuery.foundIds;
			Map<Long, TermList> lastBuckets = lastMustQuery.foundIds;
			int curBucketsT = curBuckets.size();
			Iterator<Long> curBucketsItr = curBuckets.keySet().iterator();
			
			for ( int i=0; i<curBucketsT; i++ ) {
				Long bucketId = curBucketsItr.next();
				boolean hasElements = lastBuckets.containsKey(bucketId);
				if ( hasElements) {
					hasElements = curBuckets.get(bucketId).
						intersect(lastBuckets.get(bucketId));
					if ( ! hasElements) {
						curBucketsItr.remove();
						lastBuckets.remove(bucketId);
					}
				} else {
					curBucketsItr.remove();
				}
			}
		}
	}
	
	/**
	 * This subsets across all MUST queries.
	 * Last 2 must queries are already in sync from the processing.
	 * @param planner
	 * @param lastMustQuery
	 */
	private void subsetOptQs(QueryPlanner planner, QueryTerm lastMustQuery) {
		if ( null == lastMustQuery) return;
		int stepsT = planner.sequences.size();
		for ( int step = stepsT -1; step > -1; step--) {
			List<QueryTerm> curStep = planner.sequences.get(step);
			for (QueryTerm curQuery : curStep) {
				if ( !curQuery.isOptional) continue;
				
				/**
				 * Remove the buckets which are absent and then IDs
				 */
				Map<Long, TermList> curBuckets = curQuery.foundIds;
				Map<Long, TermList> lastBuckets = lastMustQuery.foundIds;
				int curBucketsT = curBuckets.size();
				Iterator<Long> curBucketsItr = curBuckets.keySet().iterator();
				for ( int i=0; i<curBucketsT; i++ ) {
					Long bucketId = curBucketsItr.next();
					boolean hasElements = lastBuckets.containsKey(bucketId);
					if ( hasElements) {
						hasElements = curBuckets.get(bucketId).subset(lastBuckets.get(bucketId));
						if ( !hasElements) curBucketsItr.remove();
					} else {
						curBucketsItr.remove();
					}
				}
			}
		}
	}	
	
	public boolean commit() throws ApplicationFault, SystemFault {
		return true;
	}

	public PipeOut getInstance() {
		return this;
	}

	public boolean init(Configuration conf) throws ApplicationFault, SystemFault {
		return true;
	}
	
	public String getName() {
		return "SequenceProcessor";
	}		
}