/* * Copyright 2010 Bizosys Technologies Limited * * Licensed to the Bizosys Technologies Limited (Bizosys) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The Bizosys licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.bizosys.hsearch.filter; import java.io.DataInput; import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; /** * Finds out whether a document carries the matching * term hash, document type and term type restricting it inside * allowed merged sections (buckets). * @author karan * */ public class FilterIds { private static final int KEYWORD_BYTES = 5; /** * Restrict the findings only inside the filtered buckets * After the Top 4 all others are continuous bucket Ids * @param rowKey The primary key * @param inB Input Bytes * @return Is bucket matched? */ public static final boolean isMatchingBucket(byte[] rowKey, byte[] inB) { int inBLen = inB.length; if ( 6 >= inBLen) return true; //Only Hash + Typecodes for ( int i=6; i<inBLen;) { if ( inB[i] == rowKey[0] && inB[i+1] == rowKey[1] && inB[i+2] == rowKey[2] && inB[i+3] == rowKey[3] && inB[i+4] == rowKey[4] && inB[i+5] == rowKey[5] && inB[i+6] == rowKey[6] && inB[i+7] == rowKey[7] ) return true; i = i+8; } return false; } /** * Match the term hash, doc type and term type * @param storeB Stored bytes * @param inB Input Bytes * @return Matching Term-Lists Byte Array */ public static final byte[] isMatchingColBytes( byte[] storeB, byte[] inB) { if ( null == storeB ) return null; if ( null == inB ) return null; int inT = inB.length; int storeL = storeB.length; int pos = 0, startPos=0; int termsT = 0; int codecPos = 0; int origShift = 0; int newShift = 0; while ( storeL > pos) { //Loop on keyword hashes boolean isMatched = //Match a Keyword hash storeB[pos] == inB[0] && storeB[pos+1] == inB[1] && storeB[pos+2] == inB[2] && storeB[pos+3] == inB[3]; pos = pos + 4; termsT = (byte) storeB[pos++]; if ( -1 == termsT) { termsT = getInt(pos,storeB ); pos = pos + 4; } if ( ! isMatched) { /** Term Hash Not Matched */ pos = pos + (termsT * KEYWORD_BYTES); continue; } boolean[] matchedPositions = new boolean[termsT]; Arrays.fill(matchedPositions, true); codecPos = pos; /** * Term Hash code has matched. */ int matched = 0; if ( inT > 4 && Byte.MIN_VALUE != inB[4]) { /** Doc Type code match needed*/ for (int i=0; i<termsT; i++ ) { if ( storeB[pos+i] == inB[4] ) { //Any one is matched matched++; } else { matchedPositions[i] = false; } } } else { matched = termsT; } if ( ! isMatched) { /** Doc Type Has Not Matched */ pos = pos + (termsT * KEYWORD_BYTES); continue; } if ( inT > 5 && Byte.MIN_VALUE != inB[5]) { /** Term Type code match needed*/ startPos = pos+termsT; for (int i=0; i<termsT; i++ ) { if ( ! matchedPositions[i] ) continue; if ( storeB[startPos+i] != inB[5] ) { matchedPositions[i] = false; matched--; } } } if ( matched == 0) { /** Term Type Has Not Matched */ pos = pos + (termsT * KEYWORD_BYTES); continue; } /** Keyword, Termtype, Doctype all has Matched */ byte[] termLstBytes = new byte[matched * KEYWORD_BYTES]; if ( matched == termsT) { System.arraycopy(storeB,codecPos, termLstBytes, 0, termLstBytes.length); return termLstBytes; } /** * Return selectively */ for ( int j=0,i=0; i< termsT; i++) { if (! matchedPositions[i]) continue; origShift = codecPos + i; newShift = j; termLstBytes[newShift] = storeB[origShift]; //Doc Type newShift = newShift + matched; origShift = origShift + termsT; termLstBytes[newShift] = storeB[origShift]; //term Type newShift = newShift + matched; origShift = origShift + termsT; termLstBytes[newShift] = storeB[origShift]; //Term weight newShift = newShift + matched + j; origShift = origShift + termsT + i; termLstBytes[newShift] = storeB[origShift]; //Doc Pos termLstBytes[newShift+1] = storeB[origShift + 1]; //Doc Pos j++; } return termLstBytes; } return null; } /** * Reads the header section of input data to find total bytes encapsuled * @param in Input data * @return Total bytes to be read * @throws IOException */ public static final int readHeader(DataInput in) throws IOException { int T = (in.readByte() << 24 ) + ( (in.readByte() & 0xff ) << 16 ) + ( ( in.readByte() & 0xff ) << 8 ) + ( in.readByte() & 0xff ); return T; } /** * Write the header seciton of supplied header * @param out Data output * @param BT Total Bytes * @throws IOException */ public static final void writeHeader(DataOutput out, int BT) throws IOException { out.write(new byte[] { (byte)(BT >> 24), (byte)(BT >> 16 ),(byte)(BT >> 8 ), (byte)(BT) }); } /** * Integer - Byte conversion * @param index The reading start position * @param inputBytes Byte Array * @return The Integer data type */ public static final int getInt(int index, byte[] inputBytes) { int intVal = (inputBytes[index] << 24 ) + ( (inputBytes[++index] & 0xff ) << 16 ) + ( ( inputBytes[++index] & 0xff ) << 8 ) + ( inputBytes[++index] & 0xff ); return intVal; } }