/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.outpipe;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Row;
import com.bizosys.hsearch.filter.Storable;
import com.bizosys.hsearch.filter.TeaserFilterMerged;
import com.bizosys.hsearch.hbase.HBaseFacade;
import com.bizosys.hsearch.hbase.HTableWrapper;
import com.bizosys.hsearch.query.DocMetaWeight;
import com.bizosys.hsearch.query.DocTeaserWeight;
import com.bizosys.hsearch.query.QueryLog;
import com.bizosys.hsearch.schema.IOConstants;
import com.bizosys.oneline.SystemFault;
/**
* This implements callable interface for execution in parallel
* This actually executes and fetches IDs from the HBase table.
* @author karan
*
*/
class BuildPreviewMerged {
private TeaserFilterMerged pf;
protected BuildPreviewMerged(byte[][] wordsB, short teaserSize) {
this.pf = new TeaserFilterMerged(wordsB, teaserSize);
}
protected List<DocTeaserWeight> filter(Object[] metaL, int pageSize, boolean parallelProcessed ) throws SystemFault {
QueryLog.l.debug("BuildTeaserMerged > Start");
if ( null == this.pf) return null;
/**
* Bring the pointer to beginning from the end
*/
int metaT = metaL.length;
/**
* Step 1 Identify table, family and column
*/
String tableName = IOConstants.TABLE_PREVIEW;
byte[] familyName = IOConstants.TEASER_BYTES;
/**
* Step 2 Configure Filtering mechanism
*/
HTableWrapper table = null;
HBaseFacade facade = null;
List<DocTeaserWeight> teasers = new ArrayList<DocTeaserWeight>();
try {
facade = HBaseFacade.getInstance();
table = facade.getTable(tableName);
DocMetaWeight dw = null;
int fetchSize = pageSize;
if ( fetchSize > metaT ) fetchSize = metaT;
Map<Long, Integer> buckets = null;
buckets = ( fetchSize < 5 ) ? new HashMap<Long, Integer>(fetchSize) : new HashMap<Long, Integer>();
/**
* Compute how many document available per bucket
*/
long tempBucket = -1;
for (int i=0; i< fetchSize; i++ ) {
dw = (DocMetaWeight) metaL[i];
tempBucket = dw.bucketId;
if ( buckets.containsKey(tempBucket)) {
buckets.put(tempBucket, buckets.get(tempBucket) + 1);
} else {
buckets.put(tempBucket, 1);
}
}
boolean DEBUG_MODE = QueryLog.l.isDebugEnabled();
if ( DEBUG_MODE ) QueryLog.l.debug(
"BuildTeaserMerged > Distinct Buckets " + buckets.size());
List<Row> gets = null;
if ( parallelProcessed ) gets = new ArrayList<Row>();
/**
* Now create the document list
*/
for (long bucket : buckets.keySet()) {
int docs = buckets.get(bucket);
if ( 0 == docs) continue;
int[] serials = new int[docs];
/**
* Now iterate through and populate this docSerial
*/
int pos = 0;
for (int i=0; i< fetchSize; i++ ) {
dw = (DocMetaWeight) metaL[i];
if (dw.bucketId.compareTo(bucket) != 0) continue;
serials[pos] = dw.serialId; pos++;
docs--; if ( docs == 0 ) break;
}
Get getter = new Get(Storable.putLong(bucket));
getter = getter.addColumn(familyName,IOConstants.TEASER_HEADER);
getter = getter.addColumn(familyName,IOConstants.TEASER_DETAIL);
this.pf.setDocSerials(serials);
getter = getter.setFilter(this.pf);
if ( parallelProcessed ) {
TeaserFilterMerged another = this.pf.clone();
another.setDocSerials(serials);
getter = getter.setFilter(another);
gets.add(getter);
}
else {
if ( DEBUG_MODE ) QueryLog.l.debug("BuildTeaserMerged > Sequential Mode Processing");
this.pf.setDocSerials(serials);
Result result = table.get(getter);
if ( null == result) continue;
fetch(table,result,familyName,bucket, teasers);
}
}
if ( null != gets) {
if ( DEBUG_MODE ) QueryLog.l.debug("BuildTeaserMerged > Parallel Mode Processing");
Object[] results = table.batch(gets);
if ( null != results) {
for (Object resObj : results) {
if ( null == resObj) continue;
Result result = (Result) resObj;
fetch(table,result,familyName,teasers);
}
}
}
if ( DEBUG_MODE ) {
int size = ( null == teasers) ? 0 : teasers.size();
QueryLog.l.debug("BuildTeaserMerged > Total Teasers:" + size);
}
/**
* Assign the static weights as base to dynamic weights
*/
int lastSeq = 0;
int i=0;
for (DocTeaserWeight dt : teasers) {
i=lastSeq;
for (; i< fetchSize; i++ ) {
dw = (DocMetaWeight) metaL[i];
if ( dt.bucketId.compareTo(dw.bucketId) == 0 ) {
if ( dt.serialId.compareTo(dw.serialId) == 0) {
dt.weight = dw.weight;
break;
}
}
}
if ( lastSeq == i) lastSeq++;
}
return teasers;
} catch ( InterruptedException ex) {
QueryLog.l.fatal("BuildTeaserMerged:", ex);
throw new SystemFault(ex);
} catch ( IOException ex) {
QueryLog.l.fatal("BuildTeaserMerged:", ex);
throw new SystemFault(ex);
} finally {
if ( null != table ) facade.putTable(table);
}
}
public void fetch(HTableWrapper table, Result result,
byte[] familyName, List<DocTeaserWeight> teasers) throws IOException{
long bucket = Storable.getLong(0, result.getRow());
fetch(table, result, familyName, bucket, teasers);
}
public void fetch(HTableWrapper table, Result result,
byte[] familyName, long bucket, List<DocTeaserWeight> teasers) throws IOException{
byte[] teaserHeader = result.getValue(familyName,IOConstants.TEASER_HEADER);
if ( null == teaserHeader) return;
byte[] teaserData = result.getValue(familyName,IOConstants.TEASER_DETAIL);
if ( null == teaserData) return;
int totalDocs = teaserHeader.length / 2; /** Document serial is short */
int beginPos = 0;
short docPos = 0;
for ( short x=0; x< totalDocs; x++) {
docPos = Storable.getShort(x * 2, teaserHeader);
DocTeaserWeight docTeaser = new DocTeaserWeight(bucket, docPos);
beginPos = docTeaser.fromBytes(teaserData, beginPos);
teasers.add(docTeaser);
}
}
}