package org.xlattice.crypto.filters; /** * Given a key, populates arrays determining word and bit offsets into * a Bloom filter. * * @author <A HREF="mailto:jddixon@users.sourceforge.net">Jim Dixon</A> * * BloomSHA1.java and KeySelector.java are BSD licensed from the xlattice * app - http://xlattice.sourceforge.net/ * * minor tweaks by jrandom, exposing unsynchronized access and * allowing larger M and K. changes released into the public domain. * * As of 0.8.11, bitoffset and wordoffset out parameters moved from fields * to selector arguments, to allow concurrency. * ALl methods are now thread-safe. */ public class KeySelector { private final int m; private final int k; private final BitSelector bitSel; private final WordSelector wordSel; public interface BitSelector { /** * @param bitOffset Out parameter of length k * @since 0.8.11 out parameter added */ public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset); } public interface WordSelector { /** * @param wordOffset Out parameter of length k * @since 0.8.11 out parameter added */ public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset); } /** AND with byte to expose index-many bits */ private final static int[] UNMASK = { // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0, 1, 3, 7, 15, 31, 63, 127, 255, 511, 1023, 2047, 4095, 8191, 16383, 32767}; /** AND with byte to zero out index-many bits */ private final static int[] MASK = { ~0,~1,~3,~7,~15,~31,~63,~127,~255,~511,~1023,~2047,~4095,~8191,~16383,~32767}; private final static int TWO_UP_15 = 32 * 1024; /** * Creates a key selector for a Bloom filter. When a key is presented * to the getOffsets() method, the k 'hash function' values are * extracted and used to populate bitOffset and wordOffset arrays which * specify the k flags to be set or examined in the filter. * * @param m size of the filter as a power of 2 * @param k number of 'hash functions' * * Note that if k and m are too big, the GenericWordSelector blows up - * The max for 32-byte keys is m=23 and k=11. * The precise restriction appears to be: * ((5k + (k-1)(m-5)) / 8) + 2 < keySizeInBytes * * It isn't clear how to fix this. */ public KeySelector (int m, int k) { //if ( (m < 2) || (m > 20)|| (k < 1) // || (bitOffset == null) || (wordOffset == null)) { // throw new IllegalArgumentException(); //} this.m = m; this.k = k; bitSel = new GenericBitSelector(); wordSel = new GenericWordSelector(); } /** * Extracts the k bit offsets from a key, suitable for general values * of m and k. */ public class GenericBitSelector implements BitSelector { /** Do the extraction */ public void getBitSelectors(byte[] b, int offset, int length, int[] bitOffset) { int curBit = 8 * offset; int curByte; for (int j = 0; j < k; j++) { curByte = curBit / 8; int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte // // DEBUG // System.out.println ( // "this byte = " + btoh(b[curByte]) // + ", next byte = " + btoh(b[curByte + 1]) // + "; curBit=" + curBit + ", curByte= " + curByte // + ", bitsUnused=" + bitsUnused); // // END if (bitsUnused > 5) { bitOffset[j] = ((0xff & b[curByte]) >> (bitsUnused - 5)) & UNMASK[5]; // // DEBUG // System.out.println( // " before shifting: " + btoh(b[curByte]) // + "\n after shifting: " // + itoh( (0xff & b[curByte]) >> (bitsUnused - 5)) // + "\n mask: " + itoh(UNMASK[5]) ); // // END } else if (bitsUnused == 5) { bitOffset[j] = b[curByte] & UNMASK[5]; } else { bitOffset[j] = (b[curByte] & UNMASK[bitsUnused]) | (((0xff & b[curByte + 1]) >> 3) & MASK[bitsUnused]); // // DEBUG // System.out.println( // " contribution from first byte: " // + itoh(b[curByte] & UNMASK[bitsUnused]) // + "\n second byte: " + btoh(b[curByte + 1]) // + "\n shifted: " + itoh((0xff & b[curByte + 1]) >> 3) // + "\n mask: " + itoh(MASK[bitsUnused]) // + "\n contribution from second byte: " // + itoh((0xff & b[curByte + 1] >> 3) & MASK[bitsUnused])); // // END } // // DEBUG // System.out.println (" bitOffset[j] = " + bitOffset[j]); // // END curBit += 5; } } } /** * Extracts the k word offsets from a key. Suitable for general * values of m and k. See above for formula for max m and k. */ public class GenericWordSelector implements WordSelector { /** Extract the k offsets into the word offset array */ public void getWordSelectors(byte[] b, int offset, int length, int[] wordOffset) { int stride = m - 5; //assert true: stride<16; int curBit = (k * 5) + (offset * 8); int curByte; for (int j = 0; j < k; j++) { curByte = curBit / 8; int bitsUnused = ((curByte + 1) * 8) - curBit; // left in byte // // DEBUG // System.out.println ( // "curr 3 bytes: " + btoh(b[curByte]) // + (curByte < 19 ? // " " + btoh(b[curByte + 1]) : "") // + (curByte < 18 ? // " " + btoh(b[curByte + 2]) : "") // + "; curBit=" + curBit + ", curByte= " + curByte // + ", bitsUnused=" + bitsUnused); // // END if (bitsUnused > stride) { // the value is entirely within the current byte wordOffset[j] = ((0xff & b[curByte]) >> (bitsUnused - stride)) & UNMASK[stride]; } else if (bitsUnused == stride) { // the value fills the current byte wordOffset[j] = b[curByte] & UNMASK[stride]; } else { // bitsUnused < stride // value occupies more than one byte // bits from first byte, right-aligned in result wordOffset[j] = b[curByte] & UNMASK[bitsUnused]; // // DEBUG // System.out.println(" first byte contributes " // + itoh(wordOffset[j])); // // END // bits from second byte int bitsToGet = stride - bitsUnused; if (bitsToGet >= 8) { // 8 bits from second byte wordOffset[j] |= (0xff & b[curByte + 1]) << bitsUnused; // // DEBUG // System.out.println(" second byte contributes " // + itoh( // (0xff & b[curByte + 1]) << bitsUnused // )); // // END // bits from third byte bitsToGet -= 8; if (bitsToGet > 0) { // AIOOBE here if m and k too big (23,11 is the max) // for a 32-byte key - see above wordOffset[j] |= ((0xff & b[curByte + 2]) >> (8 - bitsToGet)) << (stride - bitsToGet) ; // // DEBUG // System.out.println(" third byte contributes " // + itoh( // (((0xff & b[curByte + 2]) >> (8 - bitsToGet)) // << (stride - bitsToGet)) // )); // // END } } else { // all remaining bits are within second byte wordOffset[j] |= ((b[curByte + 1] >> (8 - bitsToGet)) & UNMASK[bitsToGet]) << bitsUnused; // // DEBUG // System.out.println(" second byte contributes " // + itoh( // ((b[curByte + 1] >> (8 - bitsToGet)) // & UNMASK[bitsToGet]) // << bitsUnused // )); // // END } } // // DEBUG // System.out.println ( // " wordOffset[" + j + "] = " + wordOffset[j] // + ", " + itoh(wordOffset[j]) // ); // // END curBit += stride; } } } /** * Given a key, populate the word and bit offset arrays, each * of which has k elements. * * @param key cryptographic key used in populating the arrays * @param bitOffset Out parameter of length k * @param wordOffset Out parameter of length k * @since 0.8.11 out parameters added */ public void getOffsets (byte[] key, int[] bitOffset, int[] wordOffset) { getOffsets(key, 0, key.length, bitOffset, wordOffset); } /** * Given a key, populate the word and bit offset arrays, each * of which has k elements. * * @param key cryptographic key used in populating the arrays * @param bitOffset Out parameter of length k * @param wordOffset Out parameter of length k * @since 0.8.11 out parameters added */ public void getOffsets (byte[] key, int off, int len, int[] bitOffset, int[] wordOffset) { // skip these checks for speed //if (key == null) { // throw new IllegalArgumentException("null key"); //} //if (len < 20) { // throw new IllegalArgumentException( // "key must be at least 20 bytes long"); //} // // DEBUG // System.out.println("KeySelector.getOffsets for " // + BloomSHA1.keyToString(b)); // // END bitSel.getBitSelectors(key, off, len, bitOffset); wordSel.getWordSelectors(key, off, len, wordOffset); } /***** // DEBUG METHODS //////////////////////////////////////////////// String itoh(int i) { return BloomSHA1.itoh(i); } String btoh(byte b) { return BloomSHA1.btoh(b); } *****/ }