/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import java.util.Arrays;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.GeneralizationHierarchy;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.MetricConfiguration;
/**
* This class provides an implementation of a weighted precision metric as
* proposed in: <br>
* Sweeney, L. (2002). Achieving k-anonymity privacy protection using generalization and suppression.<br>
* International Journal of Uncertainty Fuzziness and, 10(5), 2002.<br>
* <br>
* This metric will respect attribute weights defined in the configuration.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class MetricMDNMPrecision extends AbstractMetricMultiDimensional {
/** SVUID. */
private static final long serialVersionUID = 7972929684708525849L;
/** Row count. */
private double rowCount;
/** Hierarchy heights. */
private int[] heights;
/**
* Creates a new instance.
*/
protected MetricMDNMPrecision() {
super(true, false, false, AggregateFunction.ARITHMETIC_MEAN);
}
/**
* Creates a new instance.
*
* @param function
*/
protected MetricMDNMPrecision(AggregateFunction function){
super(true, false, false, function);
}
/**
* For subclasses.
*
* @param monotonicWithGeneralization
* @param monotonicWithSuppression
* @param independent
* @param function
*/
protected MetricMDNMPrecision(boolean monotonicWithGeneralization, boolean monotonicWithSuppression, boolean independent, AggregateFunction function){
super(monotonicWithGeneralization, monotonicWithSuppression, independent, function);
}
/**
* For subclasses.
*
* @param monotonicWithGeneralization
* @param monotonicWithSuppression
* @param independent
* @param gsFactor
* @param function
*/
protected MetricMDNMPrecision(boolean monotonicWithGeneralization, boolean monotonicWithSuppression, boolean independent, double gsFactor, AggregateFunction function){
super(monotonicWithGeneralization, monotonicWithSuppression, independent, gsFactor, function);
}
/**
* Creates a new instance.
* @param gsFactor
*/
protected MetricMDNMPrecision(double gsFactor) {
super(true, false, false, gsFactor, AggregateFunction.ARITHMETIC_MEAN);
}
/**
* Creates a new instance.
*
* @param gsFactor
* @param function
*/
protected MetricMDNMPrecision(double gsFactor, AggregateFunction function){
super(true, false, false, gsFactor, function);
}
/**
* Returns the configuration of this metric.
*
* @return
*/
public MetricConfiguration getConfiguration() {
return new MetricConfiguration(false, // monotonic
super.getGeneralizationSuppressionFactor(), // gs-factor
false, // precomputed
0.0d, // precomputation threshold
this.getAggregateFunction() // aggregate function
);
}
@Override
public boolean isAbleToHandleMicroaggregation() {
return true;
}
@Override
public boolean isGSFactorSupported() {
return true;
}
@Override
public ElementData render(ARXConfiguration config) {
ElementData result = new ElementData("Precision");
result.addProperty("Aggregate function", super.getAggregateFunction().toString());
result.addProperty("Monotonic", this.isMonotonic(config.getMaxOutliers()));
result.addProperty("Generalization factor", this.getGeneralizationFactor());
result.addProperty("Suppression factor", this.getSuppressionFactor());
return result;
}
@Override
public String toString() {
return "Non-monotonic precision";
}
@Override
protected ILMultiDimensionalWithBound getInformationLossInternal(final Transformation node, final HashGroupify g) {
// Prepare
int dimensions = getDimensions();
int dimensionsGeneralized = getDimensionsGeneralized();
int dimensionsAggregated = getDimensionsAggregated();
int microaggregationStart = getMicroaggregationStartIndex();
DistributionAggregateFunction[] microaggregationFunctions = getMicroaggregationFunctions();
int[] transformation = node.getGeneralization();
double[] result = new double[dimensions];
double gFactor = super.getGeneralizationFactor();
double sFactor = super.getSuppressionFactor();
int suppressedTuples = 0;
int unsuppressedTuples = 0;
// For each group
HashGroupifyEntry m = g.getFirstEquivalenceClass();
while (m != null) {
// Calculate number of affected records
// if (m.count > 0) is given implicitly
unsuppressedTuples += m.isNotOutlier ? m.count : 0;
suppressedTuples += m.isNotOutlier ? 0 : m.count;
// Calculate avg. error
for (int i = 0; i < dimensionsAggregated; i++) {
double share = (double) m.count * super.getError(microaggregationFunctions[i],
m.distributions[microaggregationStart + i]);
result[dimensionsGeneralized + i] += m.isNotOutlier ? share * gFactor :
(sFactor == 1d ? m.count : share + sFactor * ((double) m.count - share));
}
// Next group
m = m.nextOrdered;
}
// Calculate precision
for (int i = 0; i<dimensionsGeneralized; i++) {
double value = heights[i] == 0 ? 0 : (double) transformation[i] / (double) heights[i];
result[i] += ((double)unsuppressedTuples * value) * gFactor + (double)suppressedTuples * sFactor;
result[i] /= rowCount;
}
for (int i = 0; i<dimensionsAggregated; i++) {
result[dimensionsGeneralized + i] /= rowCount;
}
// Return
return new ILMultiDimensionalWithBound(createInformationLoss(result),
(AbstractILMultiDimensional)getLowerBoundInternal(node).clone());
}
@Override
protected ILMultiDimensionalWithBound getInformationLossInternal(Transformation node, HashGroupifyEntry entry) {
double[] result = new double[getDimensions()];
Arrays.fill(result, entry.count);
return new ILMultiDimensionalWithBound(super.createInformationLoss(result));
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node) {
double gFactor = super.getGeneralizationFactor();
double[] result = new double[getDimensions()];
final int[] transformation = node.getGeneralization();
// Note: we ignore microaggregation, as we cannot compute a bound for it
// this means that the according entries in the resulting array are not changed and remain 0d
// This is not a problem, as it is OK to underestimate information loss when computing lower bounds
for (int i = 0; i < transformation.length; i++) {
double level = (double) transformation[i];
result[i] += (double)(heights[i] == 0 ? 0 : (level / (double) heights[i])) * gFactor;
}
return createInformationLoss(result);
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node,
HashGroupify groupify) {
return getLowerBoundInternal(node);
}
/**
* For backwards compatibility only.
*
* @param heights
* @param cells
*/
protected void initialize(int[] heights, double cells){
// TODO: Get rid of this
super.initialize(heights.length);
this.heights = heights;
this.rowCount = cells / heights.length;
double gFactor = super.getGeneralizationFactor();
double sFactor = super.getSuppressionFactor();
// Min and max
double[] min = new double[heights.length];
Arrays.fill(min, 0d);
double[] max = new double[min.length];
Arrays.fill(max, 1d * Math.max(gFactor, sFactor));
setMin(min);
setMax(max);
}
@Override
protected void initializeInternal(final DataManager manager,
final DataDefinition definition,
final Data input,
final GeneralizationHierarchy[] hierarchies,
final ARXConfiguration config) {
super.initializeInternal(manager, definition, input, hierarchies, config);
double gFactor = super.getGeneralizationFactor();
double sFactor = super.getSuppressionFactor();
// Min and max
double[] min = new double[super.getDimensions()];
Arrays.fill(min, 0d);
double[] max = new double[min.length];
Arrays.fill(max, 1d * Math.max(gFactor, sFactor));
setMin(min);
setMax(max);
// Store row count
rowCount = (double)super.getNumRecords(config, input);
// Store heights
this.heights = new int[hierarchies.length];
for (int j = 0; j < heights.length; j++) {
heights[j] = hierarchies[j].getArray()[0].length - 1;
}
}
}