/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.RowSet;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.GeneralizationHierarchy;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.MetricConfiguration;
/**
* This class implements a variant of the Loss metric.
* TODO: Add reference.
*
* @author Fabian Prasser
*/
public class MetricMDNMLossPrecomputed extends MetricMDNMLoss {
/** SUID. */
private static final long serialVersionUID = -7505441444551612996L;
/** Cardinalities. */
private Cardinalities cardinalities;
/** Distinct values: attribute -> level -> values. */
private int[][][] values;
/**
* Creates a new instance.
*/
protected MetricMDNMLossPrecomputed() {
super();
}
/**
* Creates a new instance.
*
* @param function
*/
protected MetricMDNMLossPrecomputed(AggregateFunction function) {
super(function);
}
/**
* Creates a new instance.
*
* @param gsFactor
* @param function
*/
protected MetricMDNMLossPrecomputed(double gsFactor,
AggregateFunction function) {
super(gsFactor, function);
}
/**
* Returns the configuration of this metric.
*
* @return
*/
public MetricConfiguration getConfiguration() {
return new MetricConfiguration(false, // monotonic
super.getGeneralizationSuppressionFactor(), // gs-factor
true, // precomputed
1.0d, // precomputation threshold
this.getAggregateFunction() // aggregate function
);
}
@Override
public boolean isAbleToHandleMicroaggregation() {
return true;
}
@Override
public boolean isGSFactorSupported() {
return true;
}
@Override
public boolean isPrecomputed() {
return true;
}
@Override
public ElementData render(ARXConfiguration config) {
ElementData result = new ElementData("Loss");
result.addProperty("Aggregate function", super.getAggregateFunction().toString());
result.addProperty("Monotonic", this.isMonotonic(config.getMaxOutliers()));
result.addProperty("Generalization factor", this.getGeneralizationFactor());
result.addProperty("Suppression factor", this.getSuppressionFactor());
return result;
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node) {
// Prepare
int dimensions = getDimensions();
int dimensionsGeneralized = getDimensionsGeneralized();
int[] transformation = node.getGeneralization();
double[] bound = new double[dimensions];
DomainShare[] shares = super.getShares();
double gFactor = super.getGeneralizationFactor();
// Column -> Id -> Level -> Count
int[][][] cardinalities = this.cardinalities.getCardinalities();
// For each column
for (int column = 0; column < dimensionsGeneralized; column++) {
// Check for cached value
int level = transformation[column];
int[][] cardinality = cardinalities[column];
int[] values = this.values[column][level];
for (int value : values) {
double count = cardinality[value][level];
double share = count * shares[column].getShare(value, level);
bound[column] += share * gFactor;
}
}
// Note: we ignore microaggregation, as we cannot compute a bound for it
// this means that the according entries in the resulting array are not changed and remain 0d
// This is not a problem, as it is OK to underestimate information loss when computing lower bounds
// Normalize
for (int column=0; column<dimensionsGeneralized; column++){
bound[column] = normalizeGeneralized(bound[column], column);
}
// Return
return super.createInformationLoss(bound);
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node, HashGroupify g) {
return this.getLowerBoundInternal(node);
}
@Override
protected void initializeInternal(final DataManager manager,
final DataDefinition definition,
final Data input,
final GeneralizationHierarchy[] hierarchies,
final ARXConfiguration config) {
// Prepare super
super.initializeInternal(manager, definition, input, hierarchies, config);
// Compute cardinalities
RowSet subset = super.getSubset(config);
// Cardinalities
this.cardinalities = new Cardinalities(input, subset, hierarchies);
// Distinct values
this.values = new int[hierarchies.length][][];
for (int i=0; i<values.length; i++) {
values[i] = new int[hierarchies[i].getHeight()][];
for (int j=0; j<values[i].length; j++){
values[i][j] = hierarchies[i].getDistinctValues(j);
}
}
}
}