/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.criteria;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.lattice.Transformation;
/**
* The t-closeness criterion with equal-distance EMD.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class EqualDistanceTCloseness extends TCloseness {
/** SVUID */
private static final long serialVersionUID = -1383357036299011323L;
/** The original distribution. */
private double[] distribution;
/**
* Creates a new instance of the t-closeness criterion with equal earth-movers-distance as proposed in:
* Li N, Li T, Venkatasubramanian S.
* t-Closeness: Privacy beyond k-anonymity and l-diversity.
* 23rd International Conference on Data Engineering. 2007:106-115.
*
* @param attribute
* @param t
*/
public EqualDistanceTCloseness(String attribute, double t) {
super(attribute, t);
}
@Override
public EqualDistanceTCloseness clone() {
return new EqualDistanceTCloseness(this.getAttribute(), this.getT());
}
@Override
public void initialize(DataManager manager, ARXConfiguration config) {
super.initialize(manager, config);
distribution = manager.getDistribution(attribute);
}
@Override
public boolean isAnonymous(Transformation node, HashGroupifyEntry entry) {
// Calculate EMD with equal distance
int[] buckets = entry.distributions[index].getBuckets();
double count = entry.count;
/*
* P = Set of ids of values in local frequency set
* Q = Set of ids of values in global dataset
*
* According to Li et al., EMD with equal ground distance is:
* D[P, Q] = 1/2 SUM_{i \in Q} (|p_i - q_i|)
*
* This can be reformulated as:
* D[P, Q] = 1/2 * (SUM_{i \in Q\P} q_i + SUM_{i \in P}(|p_i - q_i|))
*
* Additionally,
* SUM_{i \in Q\P} q_i = 1 - SUM_{i \in P} q_i = 1 + SUM_{i \in P} - q_i
*
* As a result, we implement the distance measure as follows
*
* D[P, Q] = 1/2 * ( 1 + SUM_{i \in P} (|p_i - q_i| - q_i))
*/
double val = 1.0d;
for (int i = 0; i < buckets.length; i += 2) {
if (buckets[i] != -1) { // bucket not empty
double frequency = distribution[buckets[i]];
val += Math.abs((frequency - ((double) buckets[i + 1] / count))) - frequency;
}
}
val /= 2;
// check
return val <= t;
}
@Override
public boolean isLocalRecodingSupported() {
return true;
}
@Override
public ElementData render() {
ElementData result = new ElementData("t-Closeness");
result.addProperty("Attribute", attribute);
result.addProperty("Threshold (t)", this.t);
result.addProperty("Distance", "Equal");
return result;
}
@Override
public String toString() {
return t+"-closeness with equal ground-distance for attribute '"+attribute+"'";
}
}