package ivory.bloomir.data;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import org.apache.hadoop.io.Writable;
/**
* A configuration of parameters for an experiment.
*
* @author Nima Asadi
*/
public class BloomConfig implements Writable {
/**
* Filename where the configuration parameters are stored.
*/
public static final String CONFIG_FILE = "conf";
private int nbDocuments;
private int nbTerms;
private int nbHash;
private int bitsPerElement;
private int identityHashThreshold;
private BloomConfig() {
}
/**
* Creates a new configuration object.
*
* @param nbDocuments Number of documents in the collection
* @param nbTerms Number of unique terms in the collection
* @param nbHash Number of Hash functions
* @param bitsPerElement Number of bit positions per element
*/
public BloomConfig(int nbDocuments, int nbTerms, int nbHash, int bitsPerElement) {
this.nbDocuments = nbDocuments;
this.nbTerms = nbTerms;
this.nbHash = nbHash;
this.bitsPerElement = bitsPerElement;
this.identityHashThreshold = computeIdentityHashThreshold(nbDocuments, bitsPerElement);
}
@Override public void write(DataOutput out) throws IOException {
out.writeInt(nbDocuments);
out.writeInt(nbTerms);
out.writeInt(nbHash);
out.writeInt(bitsPerElement);
out.writeInt(identityHashThreshold);
}
@Override public void readFields(DataInput in) throws IOException {
this.nbDocuments = in.readInt();
this.nbTerms = in.readInt();
this.nbHash = in.readInt();
this.bitsPerElement = in.readInt();
this.identityHashThreshold = in.readInt();
}
/**
* Reads and returns an instance of the configuration object.
*
* @param in DataInput stream
* @return A BloomConfig object containing all the necessary parameters to
* run an experiment
*/
public static BloomConfig readInstance(DataInput in) throws IOException {
BloomConfig bloomConfig = new BloomConfig();
bloomConfig.readFields(in);
return bloomConfig;
}
/**
* @return The number of documents in the collection.
*/
public int getDocumentCount() {
return nbDocuments;
}
/**
* @return The number of hash functions.
*/
public int getHashCount() {
return nbHash;
}
/**
* @return The number of unique terms in the collection.
*/
public int getTermCount() {
return nbTerms;
}
/**
* @return The number of bit positions per element.
*/
public int getBitsPerElement() {
return bitsPerElement;
}
/**
* @return The identity hash function threshold (\theta_I)
*/
public int getIdentityHashThreshold() {
return identityHashThreshold;
}
private int computeIdentityHashThreshold(int nbDocuments, int bitsPerElement) {
return (nbDocuments / bitsPerElement);
}
}