CheckMetaInfoMerged.java example

Explorer
hsearch-obsolete-master
- src
/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.outpipe;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;

import com.bizosys.hsearch.common.AccessControl;
import com.bizosys.hsearch.filter.AMFilterCommon;
import com.bizosys.hsearch.filter.AMFilterMerged;
import com.bizosys.hsearch.filter.Access;
import com.bizosys.hsearch.filter.AccessStorable;
import com.bizosys.hsearch.filter.Storable;
import com.bizosys.hsearch.hbase.HBaseFacade;
import com.bizosys.hsearch.hbase.HTableWrapper;
import com.bizosys.hsearch.query.DocMetaWeight;
import com.bizosys.hsearch.query.DocWeight;
import com.bizosys.hsearch.query.QueryContext;
import com.bizosys.hsearch.query.QueryLog;
import com.bizosys.hsearch.schema.IOConstants;
import com.bizosys.oneline.SystemFault;

/**
 * This implements callable interface for execution in parallel
 * This actually executes and fetches IDs from the HBase table.
 * @author karan
 *
 */
class CheckMetaInfoMerged {
	
	private static final boolean DEBUG_ENABLED = QueryLog.l.isDebugEnabled();
	private AMFilterMerged pf;
	
	protected CheckMetaInfoMerged(QueryContext ctx) {
		
		AccessStorable aclB = null;
		if ( null == ctx.user ) {
			Access access = new Access();
			access.addAnonymous();
			aclB = access.toStorable();
		} else aclB = AccessControl.getAccessControl(ctx.user).toStorable();
		
		byte[] tagB = ( ctx.matchTags) ? 
				new Storable(ctx.queryString.toLowerCase()).toBytes() : null;
		byte[] stateB = ( null == ctx.state ) ? null : ctx.state.toBytes();
		byte[] teamB = ( null == ctx.team ) ? null : ctx.team.toBytes();
		long ca = ( null == ctx.createdAfter ) ? -1 : ctx.createdAfter.longValue();
		long cb = ( null == ctx.createdBefore ) ? -1 : ctx.createdBefore.longValue();
		long ma = ( null == ctx.modifiedAfter ) ? -1 : ctx.modifiedAfter.longValue();
		long mb = ( null == ctx.modifiedBefore ) ? -1 : ctx.modifiedBefore.longValue();
		
		AMFilterCommon setting = new AMFilterCommon(aclB,
			tagB, stateB, teamB, cb, ca, mb, ma );
		
		this.pf = new AMFilterMerged(setting);
	}
	
	protected List<DocMetaWeight> filter(Object[] staticL, 
		int  scroll, int pageSize ) throws SystemFault {
		
		QueryLog.l.debug("CheckMetaInfoMerged > Call START");
		if ( null == this.pf) return null;
		
		/**
		 * Bring the pointer to beginning from the end
		 */
		int staticT = staticL.length;
		if ( staticT < scroll) return null;
		
		
		/**
		 * Step 1 Identify table, family and column
		 */
		String tableName = IOConstants.TABLE_PREVIEW;
		byte[] familyName = IOConstants.SEARCH_BYTES;
		
		/**
		 * Step 2 Configure Filtering mechanism 
		 */
		HTableWrapper table = null;
		HBaseFacade facade = null;

		List<DocMetaWeight> foundDocs = new ArrayList<DocMetaWeight>();
		try {

			facade = HBaseFacade.getInstance();
			table = facade.getTable(tableName);
			DocWeight dw = null;
			
			int available = staticT - scroll;
			if ( available <= 0 ) return null;
			int fetchSize = ( pageSize < available ) ?   pageSize : available ;
			int fetchStart = scroll; // Starts from 0
			int fetchEnd = scroll + fetchSize - 1; //Counting from 0
			if ( DEBUG_ENABLED ) 
				QueryLog.l.debug( "CheckMetaInfoMerged > available:" + available + " , fetchSize" + fetchSize + " ,fetchStart=" + fetchStart + " ,fetchEnd=" + fetchEnd);
			int founds = 0;
			int totalDocs =0; 
			
			Map<Long, Integer> buckets = ( fetchSize > 14 ) ? 
				new HashMap<Long, Integer>(14): new HashMap<Long, Integer>(fetchSize) ;
			
			long tempBucket = -1;
			while ( fetchSize > 0) { //Keep on fetching

				buckets.clear();

				tempBucket = -1;
				
				StringBuilder sb = null;
				if ( DEBUG_ENABLED ) sb = new StringBuilder();
				
				/**
				 * Identify unique buckets and # docs inside it. 
				 * This helps to fetch the merged bytes
				 */
				for (int i=fetchStart; i<= fetchEnd; i++ ) {
					dw = (DocWeight) staticL[i];
					if ( DEBUG_ENABLED ) sb.append(dw.bucketId).append(',');
					tempBucket = dw.bucketId;					
					if ( buckets.containsKey(tempBucket)) {
						buckets.put(tempBucket, buckets.get(tempBucket) + 1); 
					} else {
						buckets.put(tempBucket, 1); 
					}
				}
				if ( DEBUG_ENABLED ) {
					QueryLog.l.debug( "ChechMetaInfoMerged > Found Doc Ids:" + sb.toString());
					sb.delete(0, sb.capacity());
					QueryLog.l.debug("CheckMetaInfoMerged > Distinct Buckets " + buckets.size());
				}
				
				/**
				 * Now create the document list
				 */
				for (long bucket : buckets.keySet()) {
					int docs = buckets.get(bucket);
					if ( 0 == docs) continue;
					int[] serials = new int[docs];
					float[] termWeights = new float[docs];

					if ( DEBUG_ENABLED ) QueryLog.l.debug(
							"CheckMetaInfoMerged > Bucket/Documents(#)  : " + bucket + '/' + docs);
					
					/**
					 * Now iterate through and populate this docSerial
					 */
					int pos = 0;
					
					for (int i=fetchStart; i<= fetchEnd; i++ ) {
						dw = (DocWeight) staticL[i];
						if (dw.bucketId.compareTo(bucket) != 0) continue;
						if ( DEBUG_ENABLED ) sb.append(dw.serialId).append(',');						
						serials[pos] = dw.serialId;
						termWeights[pos] = dw.wt;
						pos++;
						docs--; if ( docs == 0 ) break;
					}
					
					if ( DEBUG_ENABLED ) {
						QueryLog.l.debug( "CheckMetaInfoMerged > Doc SerialIds:" + sb.toString());
						sb.delete(0, sb.capacity());
					}
					
					/**
					 * Load the Document Meta Data From the merged Bytes
					 */
					this.pf.setDocSerials(serials);
					Get getter = new Get(Storable.putLong(bucket));
					getter = getter.addColumn(familyName,IOConstants.META_HEADER);
					getter = getter.addColumn(familyName,IOConstants.META_DETAIL);
					getter = getter.addColumn(familyName,IOConstants.ACL_HEADER);
					getter = getter.addColumn(familyName,IOConstants.ACL_DETAIL);
					//getter = getter.addFamily(familyName);
					getter = getter.setFilter(this.pf);
					getter = getter.setMaxVersions(1);
					
					Result result = table.get(getter);
					byte[] metaHeader = result.getValue(familyName,IOConstants.META_HEADER);
					if ( null == metaHeader) continue;
					byte[] metaData = result.getValue(familyName,IOConstants.META_DETAIL);
					if ( null == metaData) continue;

					totalDocs = metaHeader.length / 2;
					short docPos = 0;
					int beginPos = 0;
					int serialsT = serials.length;
					float aTermWeight;
					for ( int x=0; x< totalDocs; x++) {
						docPos = Storable.getShort(x * 2, metaHeader);
						
						//Find the term weight
						aTermWeight = 0;
						for (int i = 0; i < serialsT; i++) {
							if ( serials[i] == docPos ) {
								//serials[i] = serials[serialsT - 1]; //Bring last here
								//serialsT--;
								aTermWeight = termWeights[i];
								break;
							}
						}
						DocMetaWeight docMeta = new DocMetaWeight(bucket, docPos, aTermWeight);
						beginPos = docMeta.fromBytes(metaData, beginPos);
						foundDocs.add(docMeta);
						founds++;
					}					
					if ( DEBUG_ENABLED ) QueryLog.l.debug(
						"CheckMetaInfoMerged > " + bucket + 
						" Bucket has total documents " + founds + "/" + totalDocs);
				}
				
				fetchStart = fetchEnd + 1; //Next Set beginning
				pageSize = pageSize - founds; //Rest we need 
				available = staticT - fetchStart;
				if ( available == 0 ) break;
				fetchSize = ( pageSize < available ) ?   pageSize : available;
				fetchEnd = fetchStart + fetchSize - 1; //Boundary counting
			}

			return foundDocs;
			
		} catch ( IOException ex) {
			QueryLog.l.fatal("CheckMetaInfoHBase:", ex);
			throw new SystemFault(ex);
		} finally {
			if ( null != table ) facade.putTable(table);
		}	
	}
}