/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.outpipe; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import com.bizosys.oneline.ApplicationFault; import com.bizosys.oneline.SystemFault; import com.bizosys.oneline.conf.Configuration; import com.bizosys.oneline.pipes.PipeOut; import com.bizosys.oneline.services.async.AsyncProcessor; import com.bizosys.hsearch.index.TermList; import com.bizosys.hsearch.query.HQuery; import com.bizosys.hsearch.query.QueryContext; import com.bizosys.hsearch.query.QueryPlanner; import com.bizosys.hsearch.query.QueryTerm; /** * Process each keyword of the given query in multiple steps * Must terms gets processes sequentially with search with in IDs * Multiple Optional terms gets processes parallely. * * All non existing IDs are marked as -1. * @author karan * */ public class SequenceProcessor implements PipeOut{ private boolean isParallel = false; public SequenceProcessor() { } private QueryTerm getSqlTermList() { QueryTerm sqlQuery = new QueryTerm(); TermList tl = new TermList(); tl.docPos = new short[]{18}; tl.totalTerms = tl.docPos.length; sqlQuery.foundIds.put(-9223372036854775807L, tl); return sqlQuery; } public void visit(Object objQuery, boolean multiWriter) throws ApplicationFault, SystemFault { HQuery query = (HQuery) objQuery; QueryContext ctx = query.ctx; QueryPlanner planner = query.planner; if ( null == planner.sequences) throw new ApplicationFault("No Sequencing."); try { List<byte[]> findWithinBuckets = ctx.getBuckets(); QueryTerm lastMustQuery = getSqlTermList(); for (List<QueryTerm> step : planner.sequences) { int totalTasks = step.size(); if ( 0 == totalTasks) continue; if ( 1 == totalTasks) { QueryTerm curQuery = step.get(0); if ( OutpipeLog.l.isDebugEnabled() ) OutpipeLog.l.debug( "Processing a single query on this step." + curQuery.toString()); if ( null != ctx.docTypeCode ) curQuery.docTypeCode = ctx.docTypeCode; SequenceProcessorFindHBase hbaseProxy = new SequenceProcessorFindHBase(curQuery,findWithinBuckets); if ( null != lastMustQuery ) hbaseProxy.setFilterByIds(lastMustQuery); hbaseProxy.call(); if ( ! curQuery.isOptional ) { // No matching term for a must word. if ( hbaseProxy.foundBuckets == null) break; if (hbaseProxy.foundBuckets.size() == 0 ) break; OutpipeLog.l.debug("Must Query found bucket size :" + hbaseProxy.foundBuckets.size()); findWithinBuckets = hbaseProxy.foundBuckets; lastMustQuery = curQuery; } else { if ( OutpipeLog.l.isDebugEnabled() ) OutpipeLog.l.debug("Optional Query.." + curQuery.toString()); } } else { //Lastly multiple Optional terms Process parallely if ( OutpipeLog.l.isDebugEnabled() ) OutpipeLog.l.debug("Processing in a parallel step."); List<SequenceProcessorFindHBase> findIdJobs = new ArrayList<SequenceProcessorFindHBase>(step.size()); for(QueryTerm term : step) { SequenceProcessorFindHBase hbaseProxy = (this.isParallel) ? new SequenceProcessorFindHBaseParallel(term,findWithinBuckets) : new SequenceProcessorFindHBase(term,findWithinBuckets); if ( null != lastMustQuery ) hbaseProxy.setFilterByIds(lastMustQuery); findIdJobs.add(hbaseProxy); } AsyncProcessor.getInstance().getThreadPool().invokeAll(findIdJobs); } } intersectMustQs(planner, lastMustQuery); subsetOptQs(planner, lastMustQuery); } catch (InterruptedException ex) { String msg = ( null == planner) ? "Empty Planner" : planner.toString(); OutpipeLog.l.fatal("Interrupted @ SequenceProcessor > " + msg, ex); throw new SystemFault(ex); } catch (Exception ex) { String msg = ( null == planner) ? "Empty Planner" : planner.toString(); OutpipeLog.l.fatal("Failed @ SequenceProcessor > " + msg, ex); throw new SystemFault(ex); } } /** * This subsets across all MUST queries. * Last 2 must queries are already in sync from the processing. * @param planner * @param lastMustQuery */ private void intersectMustQs(QueryPlanner planner, QueryTerm lastMustQuery) { if ( null == lastMustQuery) return; int stepsT = planner.sequences.size(); for ( int step = stepsT - 1; step > -1; step--) { /** * More than 1 means optional */ List<QueryTerm> curStepQueries = planner.sequences.get(step); if ( curStepQueries.size() != 1) continue; /** * Look for must only */ QueryTerm curQuery = curStepQueries.get(0); if ( curQuery.isOptional) continue; /** * Last must query - Already processed */ if ( lastMustQuery == curQuery) continue; /** * Remove the buckets which are absent and then IDs */ Map<Long, TermList> curResultBuckets = curQuery.foundIds; Map<Long, TermList> lastQueryBuckets = lastMustQuery.foundIds; int curBucketsT = curResultBuckets.size(); if ( curBucketsT == 0 ) { if ( OutpipeLog.l.isDebugEnabled() ) { OutpipeLog.l.debug("No found items for the must query."); } lastQueryBuckets.clear(); return; } Iterator<Long> curBucketsItr = curResultBuckets.keySet().iterator(); for ( int i=0; i<curBucketsT; i++ ) { Long bucketId = curBucketsItr.next(); boolean hasElements = lastQueryBuckets.containsKey(bucketId); if ( hasElements) { hasElements = curResultBuckets.get(bucketId). intersect(lastQueryBuckets.get(bucketId)); if ( ! hasElements) { curBucketsItr.remove(); lastQueryBuckets.remove(bucketId); } } else { curBucketsItr.remove(); } } } } /** * This subsets across all MUST queries. * Last 2 must queries are already in sync from the processing. * @param planner * @param lastMustQuery */ private void subsetOptQs(QueryPlanner planner, QueryTerm lastMustQuery) { if ( null == lastMustQuery) return; int stepsT = planner.sequences.size(); for ( int step = stepsT -1; step > -1; step--) { List<QueryTerm> curStep = planner.sequences.get(step); for (QueryTerm curQuery : curStep) { if ( !curQuery.isOptional) continue; /** * Remove the buckets which are absent and then IDs */ Map<Long, TermList> curBuckets = curQuery.foundIds; Map<Long, TermList> lastBuckets = lastMustQuery.foundIds; int curBucketsT = curBuckets.size(); Iterator<Long> curBucketsItr = curBuckets.keySet().iterator(); for ( int i=0; i<curBucketsT; i++ ) { Long bucketId = curBucketsItr.next(); boolean hasElements = lastBuckets.containsKey(bucketId); if ( hasElements) { hasElements = curBuckets.get(bucketId).subset(lastBuckets.get(bucketId)); if ( !hasElements) curBucketsItr.remove(); } else { curBucketsItr.remove(); } } } } } public void commit(boolean multiWriter) throws ApplicationFault, SystemFault { } public PipeOut getInstance() { return this; } public void init(Configuration conf) throws ApplicationFault, SystemFault { this.isParallel = conf.getBoolean("parallelization", false); } public String getName() { return "SequenceProcessor"; } }