/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.sling.resourceresolver.impl.mapping; /** * Bloom filter utilities. */ public class BloomFilterUtils { /** * The number of bits needed per stored element. * Using the formula m = - (n * ln(p)) / (ln(2)^2) as described in * http://en.wikipedia.org/wiki/Bloom_filter * (simplified, as we used a fixed K: 2). */ private static final double BIT_FACTOR = -Math.log(0.02) / Math.pow(Math.log(2), 2); /** * Create a bloom filter array for the given number of elements. * * @param elementCount the number of entries * @param maxBytes the maximum number of bytes * @return the empty bloom filter */ public static byte[] createFilter(int elementCount, int maxBytes) { int bits = (int) (elementCount * BIT_FACTOR) + 7; return new byte[Math.min(maxBytes, bits / 8)]; } /** * Add the key. * * @param bloom the bloom filter * @param key the key */ public static void add(byte[] bloom, Object key) { int len = bloom.length; if (len > 0) { int h1 = hash(key.hashCode()), h2 = hash(h1); bloom[(h1 >>> 3) % len] |= 1 << (h1 & 7); bloom[(h2 >>> 3) % len] |= 1 << (h2 & 7); } } /** * Remove the key. * * @param bloom the bloom filter * @param key the key */ public static void remove(byte[] bloom, Object key){ throw new UnsupportedOperationException(); } /** * Check whether the given key is probably in the set. This method never * returns false if the key is in the set, but possibly returns true even if * it isn't. * * @param bloom the bloom filter * @param key the key * @return true if the given key is probably in the set */ public static boolean probablyContains(byte[] bloom, Object key) { int len = bloom.length; if (len == 0) { return true; } int h1 = hash(key.hashCode()); long x = bloom[(h1 >>> 3) % len] & (1 << (h1 & 7)); if (x != 0) { int h2 = hash(h1); x = bloom[(h2 >>> 3) % len] & (1 << (h2 & 7)); } return x != 0; } /** * Get the hash value for the given key. The returned hash value is * stretched so that it should work well even for relatively bad hashCode * implementations. * * @param key the key * @return the hash value */ private static int hash(int key) { int hash = key; // a supplemental secondary hash function // to protect against hash codes that don't differ much hash = ((hash >>> 16) ^ hash) * 0x45d9f3b; hash = ((hash >>> 16) ^ hash) * 0x45d9f3b; hash = (hash >>> 16) ^ hash; return hash; } }