/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.criteria;
import java.security.SecureRandom;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import org.apache.commons.math3.analysis.function.Exp;
import org.apache.commons.math3.analysis.function.Log;
import org.apache.commons.math3.distribution.BinomialDistribution;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.DataGeneralizationScheme;
import org.deidentifier.arx.DataSubset;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.lattice.Transformation;
/**
* (e,d)-Differential Privacy implemented with (k,b)-SDGS as proposed in:
*
* Ninghui Li, Wahbeh H. Qardaji, Dong Su:
* On sampling, anonymization, and differential privacy or, k-anonymization meets differential privacy.
* Proceedings of ASIACCS 2012. pp. 32-33
*
* @author Raffael Bild
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class EDDifferentialPrivacy extends ImplicitPrivacyCriterion {
/** SVUID */
private static final long serialVersionUID = 242579895476272606L;
/** Parameter */
private final double epsilon;
/** Parameter */
private final double delta;
/** Parameter */
private final int k;
/** Parameter */
private final double beta;
/** Parameter */
private DataSubset subset;
/** Parameter */
private transient DataManager manager;
/** Parameter */
private transient boolean deterministic = false;
/** Parameter */
private DataGeneralizationScheme generalization;
/**
* Creates a new instance
* @param epsilon
* @param delta
* @param generalization
*/
public EDDifferentialPrivacy(double epsilon, double delta,
DataGeneralizationScheme generalization) {
super(false, false);
this.epsilon = epsilon;
this.delta = delta;
this.generalization = generalization;
this.beta = calculateBeta(epsilon);
this.k = calculateK(delta, epsilon, this.beta);
this.deterministic = false;
}
/**
* Creates a new instance which may be configured to produce deterministic output.
* Note: *never* use this in production. It is implemented for testing purposes, only.
*
* @param epsilon
* @param delta
* @param generalization
* @param deterministic
*/
public EDDifferentialPrivacy(double epsilon, double delta,
DataGeneralizationScheme generalization,
boolean deterministic) {
super(false, false);
this.epsilon = epsilon;
this.delta = delta;
this.generalization = generalization;
this.beta = calculateBeta(epsilon);
this.k = calculateK(delta, epsilon, this.beta);
this.deterministic = true;
}
@Override
public EDDifferentialPrivacy clone() {
return new EDDifferentialPrivacy(this.getEpsilon(), this.getDelta(), this.getGeneralizationScheme());
}
/**
* Returns the k parameter of (k,b)-SDGS
* @return
*/
public double getBeta() {
return beta;
}
@Override
public DataSubset getDataSubset() {
return subset;
}
/**
* Returns the delta parameter of (e,d)-DP
* @return
*/
public double getDelta() {
return delta;
}
/**
* Returns the epsilon parameter of (e,d)-DP
* @return
*/
public double getEpsilon() {
return epsilon;
}
/**
* Returns the defined generalization scheme
* @return
*/
public DataGeneralizationScheme getGeneralizationScheme() {
return this.generalization;
}
/**
* Returns the k parameter of (k,b)-SDGS
* @return
*/
public int getK() {
return k;
}
@Override
public int getMinimalClassSize() {
return k;
}
@Override
public int getRequirements(){
// Requires two counters
return ARXConfiguration.REQUIREMENT_COUNTER |
ARXConfiguration.REQUIREMENT_SECONDARY_COUNTER;
}
/**
* Creates a random sample based on beta
*
* @param manager
*/
public void initialize(DataManager manager, ARXConfiguration config){
// Needed for consistent de-serialization. We need to call this
// method in the constructor of the class DataManager. The following
// condition should hold, when this constructor is called during
// de-serialization, when we must not change the subset.
if (subset != null && this.manager == null) {
this.manager = manager;
return;
}
// Needed to prevent inconsistencies. We need to call this
// method in the constructor of the class DataManager. It will be called again, when
// ARXConfiguration is initialized(). During the second call we must not change the subset.
if (subset != null && this.manager == manager) {
return;
}
// Create RNG
Random random;
if (deterministic) {
random = new Random(0xDEADBEEF);
} else {
random = new SecureRandom();
}
// Create a data subset via sampling based on beta
Set<Integer> subsetIndices = new HashSet<Integer>();
int records = manager.getDataGeneralized().getDataLength();
for (int i = 0; i < records; ++i) {
if (random.nextDouble() < beta) {
subsetIndices.add(i);
}
}
this.subset = DataSubset.create(records, subsetIndices);
this.manager = manager;
}
@Override
public boolean isAnonymous(Transformation node, HashGroupifyEntry entry) {
return entry.count >= k;
}
@Override
public boolean isLocalRecodingSupported() {
return false;
}
@Override
public boolean isMinimalClassSizeAvailable() {
return true;
}
@Override
public boolean isSubsetAvailable() {
return subset != null;
}
@Override
public ElementData render() {
ElementData result = new ElementData("Differential privacy");
result.addProperty("Epsilon", epsilon);
result.addProperty("Delta", delta);
result.addProperty("Uniqueness threshold (k)", k);
result.addProperty("Sampling probability (beta)", beta);
return result;
}
@Override
public String toString() {
return "("+epsilon+","+delta+")-DP";
}
/**
* Calculates a_n
* @param n
* @param epsilon
* @param beta
* @return
*/
private double calculateA(int n, double epsilon, double beta) {
double gamma = calculateGamma(epsilon, beta);
return calculateBinomialSum((int) Math.floor(n * gamma) + 1, n, beta);
}
/**
* Calculates beta_max
* @param epsilon
* @return
*/
private double calculateBeta(double epsilon) {
return 1.0d - (new Exp()).value(-1.0d * epsilon);
}
/**
* Adds summands of the binomial distribution with probability beta
* @param from
* @param to
* @param beta
* @return
*/
private double calculateBinomialSum(int from, int to, double beta) {
BinomialDistribution binomialDistribution = new BinomialDistribution(to, beta);
double sum = 0.0d;
for (int j = from; j <= to; ++j) {
sum += binomialDistribution.probability(j);
}
return sum;
}
/**
* Calculates c_n
* @param n
* @param epsilon
* @param beta
* @return
*/
private double calculateC(int n, double epsilon, double beta) {
double gamma = calculateGamma(epsilon, beta);
return (new Exp()).value(-1.0d * n * (gamma * (new Log()).value(gamma / beta) - (gamma - beta)));
}
/**
* Calculates delta
* @param k
* @param epsilon
* @param beta
* @return
*/
private double calculateDelta(int k, double epsilon, double beta) {
double gamma = calculateGamma(epsilon, beta);
int n_m = (int) Math.ceil((double) k / gamma - 1.0d);
double delta = Double.MIN_VALUE;
double bound = Double.MAX_VALUE;
for (int n = n_m; delta < bound; ++n) {
delta = Math.max(delta, calculateA(n, epsilon, beta));
bound = calculateC(n, epsilon, beta);
}
return delta;
}
/**
* Calculates gamma
* @param epsilon
* @param beta
* @return
*/
private double calculateGamma(double epsilon, double beta) {
double power = (new Exp()).value(epsilon);
return (power - 1.0d + beta) / power;
}
/**
* Calculates k
* @param delta
* @param epsilon
* @param beta
* @return
*/
private int calculateK(double delta, double epsilon, double beta) {
int k = 1;
for (double delta_k = Double.MAX_VALUE; delta_k > delta; ++k) {
delta_k = calculateDelta(k, epsilon, beta);
}
return k;
}
}