/*
* Part of the CCNx Java Library.
*
* Copyright (C) 2008, 2009, 2013 Palo Alto Research Center, Inc.
*
* This library is free software; you can redistribute it and/or modify it
* under the terms of the GNU Lesser General Public License version 2.1
* as published by the Free Software Foundation.
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details. You should have received
* a copy of the GNU Lesser General Public License along with this library;
* if not, write to the Free Software Foundation, Inc., 51 Franklin Street,
* Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.ccnx.ccn.protocol;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.util.Arrays;
import java.util.Random;
import org.ccnx.ccn.impl.encoding.CCNProtocolDTags;
import org.ccnx.ccn.impl.encoding.XMLDecoder;
import org.ccnx.ccn.impl.encoding.XMLEncoder;
import org.ccnx.ccn.impl.support.DataUtils;
import org.ccnx.ccn.impl.support.Log;
import org.ccnx.ccn.io.content.ContentDecodingException;
import org.ccnx.ccn.io.content.ContentEncodingException;
/**
* Deprecated - use named exclude elements instead.
*
* Implement bloom filter operations
*
* Bloom filters are used to exclude keys that are inserted into the filter
*/
@Deprecated
public class BloomFilter extends Exclude.Filler implements Comparable<BloomFilter> {
private int _lgBits;
private int _nHash;
// I am using a short for seed internally - even though it's
// supposed to be a byte array - to get around unsigned arithmetic
// issues.
private short [] _seed;
private byte [] _bloom = new byte[1024];
private int _size = 0;
/**
* Constructor
* @param estimatedMembers The performance of the bloom filter can be improved by accurately
* estimating the number of members that will be inserted into it. Too low a number
* will increase the likelihood of false positives. Too high a number will cause the
* filter to be larger than necessary, impacting performance. It is better for this
* number to be too low than too high.
* @param seed Random seed data must be of length 4
*/
public BloomFilter(int estimatedMembers, byte [] seed) {
if (seed.length != 4)
throw new IllegalArgumentException("Bloom seed length must be 4"); // for now
_seed = new short[seed.length];
for (int i = 0; i < seed.length; i++)
_seed[i] = (short)((seed[i]) & 0xff);
// Michael's comment: try for about m = 12*n (m = bits in Bloom filter)
_lgBits = 13;
while (_lgBits > 3 && (1 << _lgBits) > estimatedMembers * 12)
_lgBits--;
// Michael's comment: optimum number of hash functions is ln(2)*(m/n); use ln(2) ~= 9/13
_nHash = (9 << _lgBits) / (13 * estimatedMembers + 1);
if (_nHash < 2)
_nHash = 2;
if (_nHash > 32)
_nHash = 32;
}
/**
* Create a seed from random values
* @return the seed
*/
public static byte[] createSeed() {
byte[] seed = new byte[4];
Random rand = new Random();
rand.nextBytes(seed);
return seed;
}
/**
* For decoding
*/
public BloomFilter() {}
/**
* Insert a key
* @param key a key to exclude
*/
public void insert(byte [] key) {
if (_size < 0)
throw new IllegalArgumentException("Can't reuse bloomfilter from the network");
long s = computeSeed();
for (int i = 0; i < key.length; i++)
s = nextHash(s, key[i] + 1);
long m = (8*_bloom.length - 1) & ((1 << _lgBits) - 1);
for (int i = 0; i < _nHash; i++) {
s = nextHash(s, 0);
long h = s & m;
if ((_bloom[(int)(h >> 3)] & (1 << (h & 7))) == 0) {
_bloom[(int)(h >> 3)] |= (1 << (h & 7));
}
}
_size++;
}
/**
* Test if the bloom filter matches a particular key.
* Note - a negative result means the key was definitely not set, but a positive result only means the
* key was likely set.
* @param key key to test
* @return false if not set
*/
public boolean match(byte [] key) {
int m = ((8*_bloom.length) - 1) & ((1 << _lgBits) - 1);
long s = computeSeed();
for (int k = 0; k < key.length; k++)
s = nextHash(s, key[k] + 1);
for (int i = 0; i < _nHash; i++) {
s = nextHash(s, 0);
long h = s & m;
if (0 == (_bloom[(int)h >> 3] & (1 << (h & 7))))
return false;
}
return true;
}
/**
* Returns the value given on creation by estimatedMembers
* @see BloomFilter.BloomFilter
* @return the estimated members of this filter
*/
public int size() {
return _size;
}
/**
* Get a copy of the seed
* @return copy of seed
*/
public byte[] seed() {
byte [] outSeed = new byte[_seed.length];
System.arraycopy(_seed, 0, outSeed, 0, _seed.length);
return outSeed;
}
private long nextHash(long s, int u) {
long k = 13; // Michael's comment: use this many bits of feedback shift output
long b = s & ((1 << k) - 1);
// Michael's comment: fsr primitive polynomial (modulo 2) x**31 + x**13 + 1
s = ((s >> k) ^ (b << (31 - k)) ^ (b << (13 - k))) + u;
return(s & 0x7FFFFFFF);
}
private int usedBits() {
return 1 << (_lgBits - 3);
}
/**
* Gets the type of element this is within an exclude filter
*/
@Override
public long getElementLabel() { return CCNProtocolDTags.Bloom; }
@Override
public void decode(XMLDecoder decoder) throws ContentDecodingException {
ByteArrayInputStream bais = new ByteArrayInputStream(decoder.readBinaryElement(getElementLabel()));
_lgBits = bais.read();
_nHash = bais.read();
bais.skip(2); // method & reserved - ignored for now
_seed = new short[4];
for (int i = 0; i < _seed.length; i++)
_seed[i] = (byte)bais.read();
for (int i = 0; i < _seed.length; i++)
_seed[i] = (short)((_seed[i]) & 0xff);
int i = 0;
while (bais.available() > 0)
_bloom[i++] = (byte)bais.read();
// DKS decoding check
if (i != usedBits()) {
Log.warning("Unexpected result in decoding BloomFilter: expecting " + usedBits() + " bytes, got " + i);
}
_size = -1;
}
@Override
public void encode(XMLEncoder encoder) throws ContentEncodingException {
ByteArrayOutputStream baos = new ByteArrayOutputStream();
baos.write((byte)_lgBits);
baos.write((byte)_nHash);
baos.write('A'); // "method" - must be 'A' for now
baos.write(0); // "reserved" - must be 0 for now
for (int i = 0; i < _seed.length; i++)
baos.write((byte)_seed[i]);
int size = usedBits();
for (int i = 0; i < size; i++)
baos.write(_bloom[i]);
encoder.writeElement(getElementLabel(), baos.toByteArray());
}
public int compareTo(BloomFilter o) {
return DataUtils.compare(_bloom, o._bloom);
}
private long computeSeed() {
long u = ((_seed[0]) << 24) |((_seed[1]) << 16) |((_seed[2]) << 8) | (_seed[3]);
return u & 0x7FFFFFFF;
}
public BloomFilter clone() throws CloneNotSupportedException {
BloomFilter result = (BloomFilter)super.clone();
result._seed = _seed.clone();
result._bloom = _bloom.clone();
return result;
}
@Override
public boolean validate() {
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + Arrays.hashCode(_bloom);
result = prime * result + _lgBits;
result = prime * result + _nHash;
result = prime * result + Arrays.hashCode(_seed);
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
BloomFilter other = (BloomFilter) obj;
if (_lgBits != other._lgBits)
return false;
if (_nHash != other._nHash)
return false;
// Only compare the number of bytes of _bloom in use. Decoder
// may make _bloom array a different length than was set.
if (0 != DataUtils.bytencmp(_bloom, other._bloom, usedBits())) {
return false;
}
if (!Arrays.equals(_seed, other._seed))
return false;
return true;
}
}