package edu.isi.karma.kr2rml.writer; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.lang.reflect.Field; import java.util.BitSet; import java.util.zip.DeflaterOutputStream; import java.util.zip.InflaterInputStream; import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.util.bloom.BloomFilter; //import org.apache.hadoop.util.bloom.BloomFilter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; public class KR2RMLBloomFilter extends BloomFilter { private static final Logger LOG = LoggerFactory.getLogger(KR2RMLBloomFilter.class); public static final int defaultVectorSize = 1000000; public static final int defaultnbHash = 8; static Field bitsField; static { try { bitsField = BloomFilter.class.getDeclaredField("bits"); bitsField.setAccessible(true); } catch (SecurityException | NoSuchFieldException e) { LOG.error("Unable to set up KR2RMLBloomFilter: " + e.getMessage()); } } public KR2RMLBloomFilter(int vectorSize, int nbHash, int hashType) { super(vectorSize,nbHash, hashType); } public KR2RMLBloomFilter() { super(); } public int estimateNumberOfHashedValues() { int num = 0; try { BitSet bits = (BitSet)bitsField.get(this); int setBits = bits.cardinality(); double N = this.getVectorSize(); int k = this.nbHash; double tmp = -(N * Math.log(1 - (setBits / N))) / (double)k; num = (int) Math.round(tmp); } catch (IllegalArgumentException | IllegalAccessException e) { LOG.error("Unable to estimate number of hashed values: " + e.getMessage()); } return num; } public String compressAndBase64Encode() throws IOException { ByteArrayOutputStream baos = new ByteArrayOutputStream(getVectorSize() + 1000); String base64EncodedCompressedSerializedBloomFilter = null; ObjectOutputStream dout = new ObjectOutputStream(new DeflaterOutputStream(baos)); write(dout); dout.flush(); dout.close(); base64EncodedCompressedSerializedBloomFilter = Base64.encodeBase64String(baos.toByteArray()); return base64EncodedCompressedSerializedBloomFilter; } public void populateFromCompressedAndBase64EncodedString(String base64EncodedBloomFilter) throws IOException { byte[] serializedBloomFilter = Base64.decodeBase64(base64EncodedBloomFilter); readFields(new ObjectInputStream(new InflaterInputStream(new ByteArrayInputStream(serializedBloomFilter)))); } }