/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.metric.v2; import java.util.Arrays; import org.deidentifier.arx.ARXConfiguration; import org.deidentifier.arx.DataDefinition; import org.deidentifier.arx.framework.check.distribution.Distribution; import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction; import org.deidentifier.arx.framework.data.Data; import org.deidentifier.arx.framework.data.DataManager; import org.deidentifier.arx.framework.data.GeneralizationHierarchy; import org.deidentifier.arx.metric.InformationLoss; import org.deidentifier.arx.metric.Metric; /** * This class provides an abstract skeleton for the implementation of multi-dimensional metrics. * * @author Fabian Prasser * @author Florian Kohlmayer */ public abstract class AbstractMetricMultiDimensional extends Metric<AbstractILMultiDimensional> { /** SVUID. */ private static final long serialVersionUID = 3909752748519119689L; /** The weights. */ private double[] weights; /** Number of dimensions. */ private int dimensions; /** Number of dimensions with generalization */ private int dimensionsGeneralized; /** Number of dimensions with aggregation */ private int dimensionsAggregated; /** Min. */ private double[] min; /** Max. */ private double[] max; /** The aggregate function. */ private AggregateFunction function; /** The microaggregation functions. */ private DistributionAggregateFunction[] microaggregationFunctions; /** The start index of the attributes with microaggregation in the data array */ private int microaggregationStartIndex; /** Should the mean squared error be used */ private boolean microaggregationUseMeanSquaredError; /** Header of the microaggregated data subset */ private String[] microaggregationHeader; /** * Creates a new instance. * * @param monotonicWithGeneralization * @param monotonicWithSuppression * @param independent * @param function */ AbstractMetricMultiDimensional(final boolean monotonicWithGeneralization, final boolean monotonicWithSuppression, final boolean independent, final AggregateFunction function) { super(monotonicWithGeneralization, monotonicWithSuppression, independent, 0.5d); this.function = function; } /** * Creates a new instance. * * @param monotonicWithGeneralization * @param monotonicWithSuppression * @param independent * @param gsFactor * @param function */ AbstractMetricMultiDimensional(final boolean monotonicWithGeneralization, final boolean monotonicWithSuppression, final boolean independent, final double gsFactor, final AggregateFunction function) { super(monotonicWithGeneralization, monotonicWithSuppression, independent, gsFactor); this.function = function; } @Override public InformationLoss<?> createMaxInformationLoss() { if (max == null) { throw new IllegalStateException("Metric must be initialized first"); } else { return createInformationLoss(max); } } @Override public InformationLoss<?> createMinInformationLoss() { if (min == null) { throw new IllegalStateException("Metric must be intialized first. "+this.getClass().getSimpleName()); } else { return createInformationLoss(min); } } @Override public AggregateFunction getAggregateFunction() { return this.function; } /** * Helper method for creating information loss. * * @param values * @return */ protected AbstractILMultiDimensional createInformationLoss(double[] values){ switch (function){ case ARITHMETIC_MEAN: return new ILMultiDimensionalArithmeticMean(values, weights); case GEOMETRIC_MEAN: return new ILMultiDimensionalGeometricMean(values, weights); case MAXIMUM: return new ILMultiDimensionalMax(values, weights); case RANK: return new ILMultiDimensionalRank(values, weights); case SUM: return new ILMultiDimensionalSum(values, weights); default: throw new IllegalStateException("Unknown aggregate function: "+function); } } /** * Helper method for creating information loss. * * @param values * @param bound * @return */ protected ILMultiDimensionalWithBound createInformationLossWithBound(double[] values, double[] bound){ return new ILMultiDimensionalWithBound(createInformationLoss(values), createInformationLoss(bound)); } /** * Helper method for creating information loss. * * @param values * @return */ protected ILMultiDimensionalWithBound createInformationLossWithoutBound(double[] values){ return new ILMultiDimensionalWithBound(createInformationLoss(values)); } /** * Returns the aggregate functions used for microaggregation * @return */ protected DistributionAggregateFunction[] getAggregateFunctions() { return this.microaggregationFunctions; } /** * Returns the number of dimensions. * * @return */ protected int getDimensions() { return dimensions; } /** * Returns the number of dimensions. * * @return */ protected int getDimensionsAggregated() { return dimensionsAggregated; } /** * Returns the number of dimensions. * * @return */ protected int getDimensionsGeneralized() { return dimensionsGeneralized; } /** * Returns the error induced by aggregating values in the distribution * @param function * @param distribution * @return */ protected double getError(DistributionAggregateFunction function, Distribution distribution) { if (this.microaggregationUseMeanSquaredError) { return function.getError(distribution); } else { return function.getInformationLoss(distribution); } } /** * Needed for microaggregation * @return */ protected DistributionAggregateFunction[] getMicroaggregationFunctions() { return microaggregationFunctions; } /** * Needed for microaggregation * @return */ protected int getMicroaggregationStartIndex() { return microaggregationStartIndex; } /** * For backwards compatibility only. * * @param dimensions */ protected void initialize(int dimensions){ this.weights = new double[dimensions]; Arrays.fill(weights, 1d); this.dimensions = dimensions; } @Override protected void initializeInternal(final DataManager manager, final DataDefinition definition, final Data input, final GeneralizationHierarchy[] hierarchies, final ARXConfiguration config) { // Handle microaggregation this.microaggregationFunctions = manager.getMicroaggregationFunctions(); this.microaggregationStartIndex = manager.getMicroaggregationStartIndex(); this.microaggregationHeader = manager.getMicroaggregationHeader(); this.microaggregationUseMeanSquaredError = config.isUtilityBasedMicroaggregationUseMeanSquaredError(); if (!config.isUtilityBasedMicroaggregation() || !isAbleToHandleMicroaggregation()) { this.microaggregationFunctions = new DistributionAggregateFunction[0]; } // Initialize dimensions this.dimensionsGeneralized = hierarchies.length; this.dimensionsAggregated = this.microaggregationFunctions.length; this.dimensions = this.dimensionsGeneralized + this.dimensionsAggregated; // Initialize weights this.weights = new double[this.dimensions]; double maximum = 0d; for (int i = 0; i < this.dimensionsGeneralized; i++) { String attribute = hierarchies[i].getName(); double weight = config.getAttributeWeight(attribute); this.weights[i] = weight; maximum = Math.max(maximum, weight); } for (int i = 0; i < this.dimensionsAggregated; i++) { String attribute = this.microaggregationHeader[i]; double weight = config.getAttributeWeight(attribute); this.weights[this.dimensionsGeneralized + i] = weight; maximum = Math.max(maximum, weight); } // Normalize: default case if (maximum == 0d) { Arrays.fill(this.weights, 1d); // Weighted case } else { for (int i=0; i<this.weights.length; i++){ this.weights[i] /= maximum; } } // Min and max this.min = new double[this.dimensions]; Arrays.fill(min, 0d); this.max = new double[this.dimensions]; Arrays.fill(max, Double.MAX_VALUE); } /** * Sets the maximal information loss. * * @param max */ protected void setMax(double[] max) { if (max.length != dimensions) { throw new IllegalArgumentException("Invalid number of dimensions"); } this.max = max; } /** * Sets the minimal information loss. * * @param min */ protected void setMin(double[] min) { if (min.length != dimensions) { throw new IllegalArgumentException("Invalid number of dimensions"); } this.min = min; } }