/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.filter; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Iterator; import java.util.List; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.filter.Filter; /** * Sending the complete document over the wire may Jam the network on a * heavy concurrent user base. This filter ensures sending the most * relevant section only. It also uses multiple Region servers to create * the teasers to serve a search request. * @author karan */ public class TeaserFilterSingular implements Filter { private static final byte TEASER_BYTE = "T".getBytes()[0]; /** * Default teaser section length */ short cutLength = 360; /** * Searched words */ byte[][] bWords = null; /** * Default constructor * */ public TeaserFilterSingular(){} /** * Constructor * @param bWords Searched words * @param cutLength Teaser section length */ public TeaserFilterSingular(byte[][] bWords, short cutLength){ this.bWords = bWords; this.cutLength = cutLength; } public boolean filterAllRemaining() { return false; } public boolean filterRow() { return false; } /** * last chance to drop entire row based on the sequence of filterValue() * calls. Eg: filter a row if it doesn't contain a specified column */ public void filterRow(List<KeyValue> kvL) { if ( null == kvL) return; int kvT = kvL.size(); if ( 0 == kvT) return; KeyValue kv = null; Iterator<KeyValue> kvItr = kvL.iterator(); TeaserMarker marker = null; byte[] source = null; for ( int i=0; i< kvT; i++ ) { kv = kvItr.next(); if (TEASER_BYTE == kv.getFamily()[0]) { //Read the skip sections TeaserFilterCommon tfc = new TeaserFilterCommon(bWords); source = kv.getValue(); marker = new TeaserMarker(1,source,0,tfc,cutLength); break; } } byte[] dest = new byte[marker.getNewSize()]; marker.extract(source, dest, 0); kvL.add(new KeyValue(kv.getRow(), kv.getFamily(), kv.getQualifier(), dest) ); } /** * true to drop this row, if false, we will also call */ public boolean filterRowKey(byte[] rowKey, int offset, int length) { return false; } public KeyValue getNextKeyHint(KeyValue arg0) { return null; } public boolean hasFilterRow() { return true; } public void reset() { } @Override public void readFields(DataInput in) throws IOException { this.cutLength = in.readShort(); int len = in.readByte(); int index = 1; this.bWords = new byte[len][]; for ( int i=0; i<len; i++ ) { int wLen = in.readByte() ; index++; this.bWords[i] = new byte[wLen]; in.readFully(this.bWords[i], 0, wLen); index = index + wLen; } } @Override public void write(DataOutput out) throws IOException { out.writeShort(cutLength); out.writeByte(bWords.length); for ( int i=0; i<bWords.length; i++ ) { out.writeByte(bWords[i].length); out.write(bWords[i]); } } public ReturnCode filterKeyValue(KeyValue arg0) { return ReturnCode.INCLUDE; } }