/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.filter; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.List; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.filter.Filter; /** * Finds the documents containing the search terms. * It operates on the hash codec of the term. * @author karan * */ public class TermFilter implements Filter { /** * All serialized bytes (Document Type, Term Type and others) */ public byte[] B; /** * Hash code bytes */ public byte[] H; /** * Matched term list bytes */ private byte[] matchedTLBytes = null; private byte family; private byte name; /** * Default constructor * DON't USE THIS */ public TermFilter(){} /** * Constructor * @param bytes Serialized bytes */ public TermFilter( byte[] bytes){ this.B=bytes; this.H = new byte[]{B[0],B[1],B[2],B[3]}; } /** * Add the family and column name * @param family * @param colName */ public void addColumn(byte[] family, byte[] colName) { this.family = family[0]; this.name = colName[0]; } public boolean filterAllRemaining() { return false; } /** * true to drop this key/value */ public ReturnCode filterKeyValue(KeyValue kv) { matchedTLBytes = null; boolean isMatched = FilterIds.isMatchingBucket(kv.getRow(),B); if ( isMatched ) { if ( kv.getFamily()[0] != family) return ReturnCode.NEXT_COL; if ( kv.getQualifier()[0] != name) return ReturnCode.NEXT_COL; matchedTLBytes = FilterIds.isMatchingColBytes(kv.getValue(), B); isMatched = ( null != matchedTLBytes); if (isMatched ) return ReturnCode.INCLUDE; return ReturnCode.NEXT_COL; } return ReturnCode.NEXT_ROW; } public boolean filterRow() { return false; } /** * last chance to drop entire row based on the sequence of filterValue() * calls. Eg: filter a row if it doesn't contain a specified column */ public void filterRow(List<KeyValue> kvL) { if ( null == matchedTLBytes) return; if ( null == kvL) return; if ( 0 == kvL.size()) return; KeyValue kv = kvL.get(0); kvL.clear(); kvL.add(new KeyValue(kv.getRow(), kv.getFamily(), kv.getQualifier(), matchedTLBytes)); } /** * true to drop this row, if false, we will also call */ public boolean filterRowKey(byte[] rowKey, int offset, int length) { return false; } public KeyValue getNextKeyHint(KeyValue arg0) { return null; } public boolean hasFilterRow() { return true; } public void reset() { } @Override public void readFields(DataInput in) throws IOException { int T = FilterIds.readHeader(in); this.B = new byte[T]; in.readFully(this.B, 0, T); this.family = in.readByte(); this.name = in.readByte(); } @Override public void write(DataOutput out) throws IOException { int BT = B.length; FilterIds.writeHeader(out, BT); out.write(B); out.write(this.family); out.write(this.name); } }