/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import java.util.Arrays;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.GeneralizationHierarchy;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.MetricConfiguration;
/**
* This class implements a variant of the Loss metric.
*
* @author Fabian Prasser
*/
public class MetricMDNMLoss extends AbstractMetricMultiDimensional {
/** SUID. */
private static final long serialVersionUID = -573670902335136600L;
/** Total number of tuples, depends on existence of research subset. */
private double tuples;
/** Domain shares for each dimension. */
private DomainShare[] shares;
/** TODO: We must override this for backward compatibility. Remove, when re-implemented. */
private final double gFactor;
/** TODO: We must override this for backward compatibility. Remove, when re-implemented. */
private final double gsFactor;
/** TODO: We must override this for backward compatibility. Remove, when re-implemented. */
private final double sFactor;
/**
* Default constructor which treats all transformation methods equally.
*/
public MetricMDNMLoss(){
this(0.5d, AggregateFunction.GEOMETRIC_MEAN);
}
/**
* Default constructor which treats all transformation methods equally.
*
* @param function
*/
public MetricMDNMLoss(AggregateFunction function){
this(0.5d, function);
}
/**
* A constructor that allows to define a factor weighting generalization and suppression.
*
* @param gsFactor A factor [0,1] weighting generalization and suppression.
* The default value is 0.5, which means that generalization
* and suppression will be treated equally. A factor of 0
* will favor suppression, and a factor of 1 will favor
* generalization. The values in between can be used for
* balancing both methods.
* @param function
*/
public MetricMDNMLoss(double gsFactor, AggregateFunction function){
super(true, false, false, function);
if (gsFactor < 0d || gsFactor > 1d) {
throw new IllegalArgumentException("Parameter must be in [0, 1]");
}
this.gsFactor = gsFactor;
this.sFactor = gsFactor < 0.5d ? 2d * gsFactor : 1d;
this.gFactor = gsFactor <= 0.5d ? 1d : 1d - 2d * (gsFactor - 0.5d);
}
/**
* Returns the configuration of this metric.
*
* @return
*/
public MetricConfiguration getConfiguration() {
return new MetricConfiguration(false, // monotonic
gsFactor, // gs-factor
false, // precomputed
0.0d, // precomputation threshold
this.getAggregateFunction() // aggregate function
);
}
@Override
// TODO: We must override this for backward compatibility. Remove, when re-implemented.
public double getGeneralizationFactor() {
return gFactor;
}
@Override
// TODO: We must override this for backward compatibility. Remove, when re-implemented.
public double getGeneralizationSuppressionFactor() {
return gsFactor;
}
@Override
public String getName() {
return "Loss";
}
@Override
// TODO: We must override this for backward compatibility. Remove, when re-implemented.
public double getSuppressionFactor() {
return sFactor;
}
@Override
public boolean isAbleToHandleMicroaggregation() {
return true;
}
@Override
public boolean isGSFactorSupported() {
return true;
}
@Override
public ElementData render(ARXConfiguration config) {
ElementData result = new ElementData("Loss");
result.addProperty("Aggregate function", super.getAggregateFunction().toString());
result.addProperty("Monotonic", this.isMonotonic(config.getMaxOutliers()));
result.addProperty("Generalization factor", this.getGeneralizationFactor());
result.addProperty("Suppression factor", this.getSuppressionFactor());
return result;
}
@Override
public String toString() {
return "Loss ("+gsFactor+"/"+gFactor+"/"+sFactor+")";
}
@Override
protected ILMultiDimensionalWithBound getInformationLossInternal(Transformation node, HashGroupify g) {
// Prepare
int dimensions = getDimensions();
int dimensionsGeneralized = getDimensionsGeneralized();
int dimensionsAggregated = getDimensionsAggregated();
int microaggregationStart = getMicroaggregationStartIndex();
DistributionAggregateFunction[] microaggregationFunctions = getMicroaggregationFunctions();
int[] transformation = node.getGeneralization();
double[] result = new double[dimensions];
double[] bound = new double[dimensions];
// Compute information loss and lower bound
HashGroupifyEntry m = g.getFirstEquivalenceClass();
while (m != null) {
if (m.count>0) {
for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
int value = m.key[dimension];
int level = transformation[dimension];
double share = (double)m.count * shares[dimension].getShare(value, level);
result[dimension] += m.isNotOutlier ? share * gFactor :
(sFactor == 1d ? m.count : share + sFactor * ((double)m.count - share));
bound[dimension] += share * gFactor;
}
for (int dimension=0; dimension<dimensionsAggregated; dimension++){
double share = (double)m.count * super.getError(microaggregationFunctions[dimension],
m.distributions[microaggregationStart + dimension]);
result[dimensionsGeneralized + dimension] += m.isNotOutlier ? share * gFactor :
(sFactor == 1d ? m.count : share + sFactor * ((double)m.count - share));
// Note: we ignore a bound for microaggregation, as we cannot compute it
// this means that the according entries in the resulting array are not changed and remain 0d
// This is not a problem, as it is OK to underestimate information loss when computing lower bounds
}
}
m = m.nextOrdered;
}
// Normalize
for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
result[dimension] = normalizeGeneralized(result[dimension], dimension);
bound[dimension] = normalizeGeneralized(bound[dimension], dimension);
}
// Normalize
for (int dimension=dimensionsGeneralized; dimension<dimensionsGeneralized + dimensionsAggregated; dimension++){
result[dimension] = normalizeAggregated(result[dimension]);
}
// Return information loss and lower bound
return new ILMultiDimensionalWithBound(super.createInformationLoss(result),
super.createInformationLoss(bound));
}
@Override
protected ILMultiDimensionalWithBound getInformationLossInternal(Transformation node, HashGroupifyEntry entry) {
// Init
int dimensions = getDimensions();
int dimensionsGeneralized = getDimensionsGeneralized();
int dimensionsAggregated = getDimensionsAggregated();
int microaggregationStart = getMicroaggregationStartIndex();
DistributionAggregateFunction[] microaggregationFunctions = getMicroaggregationFunctions();
double[] result = new double[dimensions];
int[] transformation = node.getGeneralization();
// Compute
for (int dimension = 0; dimension < dimensionsGeneralized; dimension++) {
int value = entry.key[dimension];
int level = transformation[dimension];
result[dimension] = (double) entry.count * shares[dimension].getShare(value, level);
}
// Compute
for (int dimension=0; dimension<dimensionsAggregated; dimension++){
result[dimensionsGeneralized + dimension] = (double)entry.count * super.getError(microaggregationFunctions[dimension],
entry.distributions[microaggregationStart + dimension]);
}
// Return
return new ILMultiDimensionalWithBound(super.createInformationLoss(result));
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node) {
return null;
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node, HashGroupify g) {
// Prepare
int dimensions = getDimensions();
int dimensionsGeneralized = getDimensionsGeneralized();
int[] transformation = node.getGeneralization();
double[] bound = new double[dimensions];
// Compute lower bound
HashGroupifyEntry m = g.getFirstEquivalenceClass();
while (m != null) {
if (m.count>0) {
for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
int value = m.key[dimension];
int level = transformation[dimension];
double share = (double)m.count * shares[dimension].getShare(value, level);
bound[dimension] += share * gFactor;
}
// Note: we ignore microaggregation, as we cannot compute a bound for it
// this means that the according entries in the resulting array are not changed and remain 0d
// This is not a problem, as it is OK to underestimate information loss when computing lower bounds
}
m = m.nextOrdered;
}
// Normalize
for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
bound[dimension] = normalizeGeneralized(bound[dimension], dimension);
}
// Return
return super.createInformationLoss(bound);
}
/**
* For subclasses.
*
* @return
*/
protected DomainShare[] getShares(){
return this.shares;
}
@Override
protected void initializeInternal(final DataManager manager,
final DataDefinition definition,
final Data input,
final GeneralizationHierarchy[] hierarchies,
final ARXConfiguration config) {
// Prepare weights
super.initializeInternal(manager, definition, input, hierarchies, config);
// Determine total number of tuples
this.tuples = (double)super.getNumRecords(config, input);
// Save domain shares
this.shares = manager.getDomainShares();
// Min and max
double[] min = new double[getDimensions()];
Arrays.fill(min, 0d);
double[] max = new double[getDimensions()];
Arrays.fill(max, 1d);
super.setMin(min);
super.setMax(max);
}
/**
* Normalizes the aggregate.
*
* @param aggregate
* @param dimension
* @return
*/
protected double normalizeAggregated(double aggregate) {
double result = aggregate / tuples;
result = result >= 0d ? result : 0d;
return round(result);
}
/**
* Normalizes the aggregate.
*
* @param aggregate
* @param dimension
* @return
*/
protected double normalizeGeneralized(double aggregate, int dimension) {
double min = gFactor * tuples / shares[dimension].getDomainSize();
double max = tuples;
double result = (aggregate - min) / (max - min);
result = result >= 0d ? result : 0d;
return round(result);
}
}