/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.criteria;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.lattice.Transformation;
/**
* The t-closeness criterion with hierarchical-distance EMD.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class HierarchicalDistanceTCloseness extends TCloseness {
/** SVUID */
private static final long serialVersionUID = -2142590190479670706L;
/** The hierarchy used for the EMD. */
private final Hierarchy hierarchy;
/** Internal tree. */
private int[] tree;
/** Internal offset. */
private int start;
/** Internal empty tree. */
private int[] empty;
/**
* Creates a new instance of the t-closeness criterion with hierarchical earth-movers-distance as proposed in:
* Li N, Li T, Venkatasubramanian S.
* t-Closeness: Privacy beyond k-anonymity and l-diversity.
* 23rd International Conference on Data Engineering. 2007:106-115.
*
* @param attribute
* @param t
* @param h
*/
public HierarchicalDistanceTCloseness(String attribute, double t, Hierarchy h) {
super(attribute, t);
this.hierarchy = h;
}
@Override
public HierarchicalDistanceTCloseness clone() {
return new HierarchicalDistanceTCloseness(this.getAttribute(), this.getT(), this.hierarchy);
}
/**
* Returns the hierarchy backing the EMD calculations.
*
* @return
*/
public Hierarchy getHierarchy() {
return hierarchy;
}
@Override
public void initialize(DataManager manager, ARXConfiguration config) {
super.initialize(manager, config);
this.tree = manager.getTree(attribute);
this.start = this.tree[1] + 3;
this.empty = new int[this.tree[1]];
}
@Override
public boolean isAnonymous(Transformation node, HashGroupifyEntry entry) {
// Empty data in tree
System.arraycopy(empty, 0, tree, start, empty.length);
// init parameters
final int totalElementsP = tree[0];
final int numLeafs = tree[1];
final double height = tree[2]; // cast to double as it is used in double
// calculations
final int extraStartPos = numLeafs + 3;
final int extraEndPos = extraStartPos + numLeafs;
// Copy and count
int totalElementsQ = 0;
int[] buckets = entry.distributions[index].getBuckets();
for (int i = 0; i < buckets.length; i += 2) {
if (buckets[i] != -1) { // bucket not empty
final int value = buckets[i];
final int frequency = buckets[i + 1];
tree[value + extraStartPos] = frequency;
totalElementsQ += frequency;
}
}
// Tree data format: #p_count, #leafs, height, freqLeaf_1, ...,
// freqLeaf_n, extra_1,..., extra_n, [#childs, level, child_1, ...
// child_x, pos_e, neg_e], ...
double cost = 0;
// leafs
for (int i = extraStartPos; i < extraEndPos; i++) {
tree[i] = (tree[i - numLeafs] * totalElementsQ) -
(tree[i] * totalElementsP); // p_i - q_i
}
// innerNodes
for (int i = extraEndPos; i < tree.length; i++) {
int pos_e = 0;
int neg_e = 0;
final int numChilds = tree[i++];
final int level = tree[i++];
// iterate over all children
for (int j = 0; j < numChilds; j++) {
// differentiate between first level and rest
int extra = 0;
if (level == 1) {
extra = tree[tree[i + j]];
} else {
final int extra_child_index = tree[i + j] +
tree[tree[i + j]] + 2; // pointer to the pos_e of node
final int pos_child = tree[extra_child_index];
final int neg_child = tree[extra_child_index + 1];
extra = pos_child - neg_child;
}
if (extra > 0) { // positive
pos_e += extra;
} else { // negative
neg_e += (-extra);
}
}
// save extras
i += numChilds; // increment pointer to extra
tree[i++] = pos_e;
tree[i] = neg_e;
// sum
final double cost_n = (level / height) * Math.min(pos_e, neg_e);
cost += cost_n;
}
cost /= ((double) totalElementsP * (double) totalElementsQ);
// check
return cost <= t;
}
@Override
public boolean isLocalRecodingSupported() {
return true;
}
@Override
public ElementData render() {
ElementData result = new ElementData("t-Closeness");
result.addProperty("Attribute", attribute);
result.addProperty("Threshold (t)", this.t);
result.addProperty("Distance", "Hierarchical");
return result;
}
@Override
public String toString() {
return t+"-closeness with hierarchical ground-distance for attribute '"+attribute+"'";
}
}