TermFilter.java example

Explorer
hsearch-obsolete-master
- src
/*
* Copyright 2010 Bizosys Technologies Limited
*
* Licensed to the Bizosys Technologies Limited (Bizosys) under one
* or more contributor license agreements.  See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership.  The Bizosys licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License.  You may obtain a copy of the License at
*
*     http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.bizosys.hsearch.filter;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.List;

import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.filter.Filter;

/**
 * Finds the documents containing the search terms. 
 * It operates on the hash codec of the term.
 * @author karan
 *
 */
public class TermFilter implements Filter {
	/**
	 * All serialized bytes (Document Type, Term Type and others)
	 */
	public byte[]  B;
	
	/**
	 * Hash code bytes
	 */
	public byte[]  H;
	
	/**
	 * Matched term list bytes
	 */
	private byte[] matchedTLBytes = null;
	
	private byte family;
	private byte name;

	/**
	 * Default constructor
	 * DON't USE THIS
	 */
	public TermFilter(){}
	
	/**
	 * Constructor
	 * @param bytes	Serialized bytes
	 */
	public TermFilter( byte[] bytes){
		this.B=bytes;
		this.H = new byte[]{B[0],B[1],B[2],B[3]};
	}
	
	/**
	 * Add the family and column name
	 * @param family
	 * @param colName
	 */
	public void addColumn(byte[] family, byte[] colName) {
		this.family = family[0];
		this.name = colName[0];
	}

	public boolean filterAllRemaining() {
		return false;
	}

	/**
	 *  true to drop this key/value
	 */
	public ReturnCode filterKeyValue(KeyValue kv) {
		matchedTLBytes = null;
		boolean isMatched = FilterIds.isMatchingBucket(kv.getRow(),B);
		if ( isMatched ) {
			if ( kv.getFamily()[0] != family) return ReturnCode.NEXT_COL;
			if ( kv.getQualifier()[0] != name) return ReturnCode.NEXT_COL;
			matchedTLBytes = FilterIds.isMatchingColBytes(kv.getValue(), B);
			isMatched = ( null != matchedTLBytes);
			if (isMatched ) return ReturnCode.INCLUDE;
			return ReturnCode.NEXT_COL;
		}
		return ReturnCode.NEXT_ROW;
	}

	public boolean filterRow() {
		return false;
	}

	/**
	 * last chance to drop entire row based on the sequence of filterValue() 
	 * calls. Eg: filter a row if it doesn't contain a specified column
	 */
	public void filterRow(List<KeyValue> kvL) {
		if ( null == matchedTLBytes) return;
		if ( null == kvL) return;
		if ( 0 == kvL.size()) return;
		KeyValue kv = kvL.get(0);
		kvL.clear();
		kvL.add(new KeyValue(kv.getRow(),
			kv.getFamily(), kv.getQualifier(), matchedTLBytes));
	}
	
	/**
	 * true to drop this row, if false, we will also call
	 */
	public boolean filterRowKey(byte[] rowKey, int offset, int length) {
		return false;
	}
	
	public KeyValue getNextKeyHint(KeyValue arg0) {
		return null;
	}
	
	public boolean hasFilterRow() {
		return true;
	}
	
	public void reset() {
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		int T = FilterIds.readHeader(in);
		this.B = new byte[T];
		in.readFully(this.B, 0, T);
		this.family = in.readByte();
		this.name = in.readByte();
	}
	
	@Override
	public void write(DataOutput out) throws IOException {
		int BT = B.length;
		FilterIds.writeHeader(out, BT);
		out.write(B);
		out.write(this.family);
		out.write(this.name);
	}
}