/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.metric; import java.io.Serializable; import java.util.Arrays; import java.util.List; import java.util.Map; import org.deidentifier.arx.ARXConfiguration; import org.deidentifier.arx.DataDefinition; import org.deidentifier.arx.DataSubset; import org.deidentifier.arx.RowSet; import org.deidentifier.arx.certificate.elements.ElementData; import org.deidentifier.arx.criteria.PrivacyCriterion; import org.deidentifier.arx.framework.check.groupify.HashGroupify; import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry; import org.deidentifier.arx.framework.data.Data; import org.deidentifier.arx.framework.data.DataManager; import org.deidentifier.arx.framework.data.GeneralizationHierarchy; import org.deidentifier.arx.framework.lattice.Transformation; import org.deidentifier.arx.metric.v2.AbstractILMultiDimensional; import org.deidentifier.arx.metric.v2.AbstractMetricMultiDimensional; import org.deidentifier.arx.metric.v2.ILSingleDimensional; import org.deidentifier.arx.metric.v2.MetricMDHeight; import org.deidentifier.arx.metric.v2.MetricMDNMLoss; import org.deidentifier.arx.metric.v2.MetricMDNMLossPotentiallyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNMLossPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNMPrecision; import org.deidentifier.arx.metric.v2.MetricMDNUEntropy; import org.deidentifier.arx.metric.v2.MetricMDNUEntropyPotentiallyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNUEntropyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNUNMEntropy; import org.deidentifier.arx.metric.v2.MetricMDNUNMEntropyPotentiallyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNUNMEntropyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNUNMNormalizedEntropy; import org.deidentifier.arx.metric.v2.MetricMDNUNMNormalizedEntropyPotentiallyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDNUNMNormalizedEntropyPrecomputed; import org.deidentifier.arx.metric.v2.MetricMDPrecision; import org.deidentifier.arx.metric.v2.MetricSDAECS; import org.deidentifier.arx.metric.v2.MetricSDDiscernability; import org.deidentifier.arx.metric.v2.MetricSDNMAmbiguity; import org.deidentifier.arx.metric.v2.MetricSDNMDiscernability; import org.deidentifier.arx.metric.v2.MetricSDNMEntropyBasedInformationLoss; import org.deidentifier.arx.metric.v2.MetricSDNMKLDivergence; import org.deidentifier.arx.metric.v2.MetricSDNMPublisherPayout; import org.deidentifier.arx.metric.v2.__MetricV2; /** * Abstract base class for metrics. * * @author Fabian Prasser * @author Florian Kohlmayer * @param <T> */ public abstract class Metric<T extends InformationLoss<?>> implements Serializable { /** * Pluggable aggregate functions. * * @author Fabian Prasser */ public static enum AggregateFunction implements Serializable{ /** Sum */ SUM("Sum"), /** Maximum */ MAXIMUM("Maximum"), /** Arithmetic mean */ ARITHMETIC_MEAN("Arithmetric mean"), /** Geometric mean: To handle zero values while not violating guarantees required for pruning * based on lower bounds, 1d is added to every individual value and 1d is subtracted from the * final result. */ GEOMETRIC_MEAN("Geometric mean"), /** Rank: Ordered list of values, compared lexicographically. */ RANK("Rank"); /** Name */ private String name; /** * Creates a new instance * @param name */ private AggregateFunction(String name){ this.name = name; } public String toString() { return name; } } /** TODO */ private static final long serialVersionUID = -2657745103125430229L; /** For comparisons. */ private static final double DIGITS = 10d; /** For comparisons. */ private static final double FACTOR = Math.pow(10d, DIGITS); /** * Creates a new instance of the AECS metric. * * @return */ public static Metric<ILSingleDimensional> createAECSMetric() { return __MetricV2.createAECSMetric(); } /** * Creates a new instance of the AECS metric. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @return */ public static Metric<ILSingleDimensional> createAECSMetric(double gsFactor) { return __MetricV2.createAECSMetric(gsFactor); } /** * Creates an instance of the ambiguity metric. * * @return */ public static Metric<ILSingleDimensional> createAmbiguityMetric() { return __MetricV2.createAmbiguityMetric(); } /** * Creates an instance of the discernability metric. * * @return */ public static Metric<ILSingleDimensional> createDiscernabilityMetric() { return __MetricV2.createDiscernabilityMetric(); } /** * Creates an instance of the discernability metric. The monotonic variant is DM*. * * @param monotonic If set to true, the monotonic variant (DM*) will be created * @return */ public static Metric<ILSingleDimensional> createDiscernabilityMetric(boolean monotonic) { return __MetricV2.createDiscernabilityMetric(monotonic); } /** * Creates an instance of the entropy-based information loss metric, which will treat * generalization and suppression equally. * * @return */ public static MetricSDNMEntropyBasedInformationLoss createEntropyBasedInformationLossMetric() { return __MetricV2.createEntropyBasedInformationLossMetric(0.5d); } /** * Creates an instance of the entropy-based information loss metric. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @return */ public static MetricSDNMEntropyBasedInformationLoss createEntropyBasedInformationLossMetric(double gsFactor) { return __MetricV2.createEntropyBasedInformationLossMetric(gsFactor); } /** * Creates an instance of the non-monotonic non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @return */ public static Metric<AbstractILMultiDimensional> createEntropyMetric() { return __MetricV2.createEntropyMetric(); } /** * Creates an instance of the non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * * @return */ public static Metric<AbstractILMultiDimensional> createEntropyMetric(boolean monotonic) { return __MetricV2.createEntropyMetric(monotonic); } /** * Creates an instance of the non-uniform entropy metric. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * @param function The aggregate function to be used for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createEntropyMetric(boolean monotonic, AggregateFunction function) { return __MetricV2.createEntropyMetric(monotonic, function); } /** * Creates an instance of the non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @param monotonic If set to true, the monotonic variant of the metric will be created * * @return */ public static Metric<AbstractILMultiDimensional> createEntropyMetric(boolean monotonic, double gsFactor) { return __MetricV2.createEntropyMetric(monotonic, gsFactor); } /** * Creates an instance of the non-uniform entropy metric. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @param function The aggregate function to be used for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createEntropyMetric(boolean monotonic, double gsFactor, AggregateFunction function) { return __MetricV2.createEntropyMetric(monotonic, gsFactor, function); } /** * Creates an instance of the non-monotonic non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @return */ public static Metric<AbstractILMultiDimensional> createEntropyMetric(double gsFactor) { return __MetricV2.createEntropyMetric(gsFactor); } /** * Creates an instance of the height metric. The default aggregate function, which is the sum-function, * will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @return */ public static Metric<AbstractILMultiDimensional> createHeightMetric() { return __MetricV2.createHeightMetric(); } /** * Creates an instance of the height metric. * This metric will respect attribute weights defined in the configuration. * * @param function The aggregate function to use for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createHeightMetric(AggregateFunction function) { return __MetricV2.createHeightMetric(function); } /** * Creates an instance of the KL Divergence metric. * * @return */ public static Metric<ILSingleDimensional> createKLDivergenceMetric() { return __MetricV2.createKLDivergenceMetric(); } /** * Creates an instance of the loss metric which treats generalization and suppression equally. * The default aggregate function, which is the rank function, will be used. * This metric will respect attribute weights defined in the configuration. * * @return */ public static Metric<AbstractILMultiDimensional> createLossMetric() { return __MetricV2.createLossMetric(); } /** * Creates an instance of the loss metric which treats generalization and suppression equally. * This metric will respect attribute weights defined in the configuration. * * @param function The aggregate function to use for comparing results * @return */ public static Metric<AbstractILMultiDimensional> createLossMetric(AggregateFunction function) { return __MetricV2.createLossMetric(function); } /** * Creates an instance of the loss metric with factors for weighting generalization and suppression. * The default aggregate function, which is the rank function, will be used. * This metric will respect attribute weights defined in the configuration. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @return */ public static Metric<AbstractILMultiDimensional> createLossMetric(double gsFactor) { return __MetricV2.createLossMetric(gsFactor); } /** * Creates an instance of the loss metric with factors for weighting generalization and suppression. * This metric will respect attribute weights defined in the configuration. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @param function The aggregate function to use for comparing results * @return */ public static Metric<AbstractILMultiDimensional> createLossMetric(double gsFactor, AggregateFunction function) { return __MetricV2.createLossMetric(gsFactor, function); } /** * This method supports backwards compatibility. It will transform implementations from version 1 to * implementations from version 2, if necessary. * @param metric * @param minLevel * @param maxLevel * @return */ public static Metric<?> createMetric(Metric<?> metric, int minLevel, int maxLevel) { if (metric instanceof MetricHeight) { return __MetricV2.createHeightMetric(minLevel, maxLevel); } else { return createMetric(metric); } } /** * Creates an instance of the normalized entropy metric. * The default aggregate function, which is the sum function, will be used. * This metric will respect attribute weights defined in the configuration. * * @return */ public static Metric<AbstractILMultiDimensional> createNormalizedEntropyMetric() { return __MetricV2.createNormalizedEntropyMetric(); } /** * Creates an instance of the normalized entropy metric. * This metric will respect attribute weights defined in the configuration. * * @param function The aggregate function to use for comparing results * @return */ public static Metric<AbstractILMultiDimensional> createNormalizedEntropyMetric(AggregateFunction function) { return __MetricV2.createNormalizedEntropyMetric(function); } /** * Creates an instance of the non-monotonic precision metric. * The default aggregate function, which is the arithmetic mean, will be used. * This metric will respect attribute weights defined in the configuration. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric() { return __MetricV2.createPrecisionMetric(); } /** * Creates an instance of the non-monotonic precision metric. * This metric will respect attribute weights defined in the configuration. * * @param function The aggregate function to use for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(AggregateFunction function) { return __MetricV2.createPrecisionMetric(function); } /** * Creates an instance of the precision metric. * The default aggregate function, which is the arithmetic mean, will be used. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(boolean monotonic) { return __MetricV2.createPrecisionMetric(monotonic); } /** * Creates an instance of the precision metric. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * @param function * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(boolean monotonic, AggregateFunction function) { return __MetricV2.createPrecisionMetric(monotonic, function); } /** * Creates an instance of the precision metric. * The default aggregate function, which is the arithmetic mean, will be used. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(boolean monotonic, double gsFactor) { return __MetricV2.createPrecisionMetric(monotonic, gsFactor); } /** * Creates an instance of the precision metric. * This metric will respect attribute weights defined in the configuration. * * @param monotonic If set to true, the monotonic variant of the metric will be created * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @param function * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(boolean monotonic, double gsFactor, AggregateFunction function) { return __MetricV2.createPrecisionMetric(monotonic, gsFactor, function); } /** * Creates an instance of the non-monotonic precision metric. * The default aggregate function, which is the arithmetic mean, will be used. * This metric will respect attribute weights defined in the configuration. * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(double gsFactor) { return __MetricV2.createPrecisionMetric(gsFactor); } /** * Creates an instance of the non-monotonic precision metric. * This metric will respect attribute weights defined in the configuration. * * @param function The aggregate function to use for comparing results * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecisionMetric(double gsFactor, AggregateFunction function) { return __MetricV2.createPrecisionMetric(gsFactor, function); } /** * Creates a potentially precomputed instance of the non-monotonic non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedEntropyMetric(double threshold) { return __MetricV2.createPrecomputedEntropyMetric(threshold); } /** * Creates a potentially precomputed instance of the non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param monotonic If set to true, the monotonic variant of the metric will be created * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedEntropyMetric(double threshold, boolean monotonic) { return __MetricV2.createPrecomputedEntropyMetric(threshold, monotonic); } /** * Creates a potentially precomputed instance of the non-uniform entropy metric. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param monotonic If set to true, the monotonic variant of the metric will be created * @param function The aggregate function to be used for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedEntropyMetric(double threshold, boolean monotonic, AggregateFunction function) { return __MetricV2.createPrecomputedEntropyMetric(threshold, monotonic, function); } /** * Creates a potentially precomputed instance of the non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @param monotonic If set to true, the monotonic variant of the metric will be created * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedEntropyMetric(double threshold, boolean monotonic, double gsFactor) { return __MetricV2.createPrecomputedEntropyMetric(threshold, monotonic, gsFactor); } /** * Creates a potentially precomputed instance of the non-uniform entropy metric. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param monotonic If set to true, the monotonic variant of the metric will be created * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @param function The aggregate function to be used for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedEntropyMetric(double threshold, boolean monotonic, double gsFactor, AggregateFunction function) { return __MetricV2.createPrecomputedEntropyMetric(threshold, monotonic, gsFactor, function); } /** * Creates a potentially precomputed instance of the non-monotonic non-uniform entropy metric. The default aggregate function, * which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedEntropyMetric(double threshold, double gsFactor) { return __MetricV2.createPrecomputedEntropyMetric(threshold, gsFactor); } /** * Creates a potentially precomputed instance of the loss metric which treats generalization * and suppression equally. * The default aggregate function, which is the rank function, will be used. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedLossMetric(double threshold) { return __MetricV2.createPrecomputedLossMetric(threshold); } /** * Creates a potentially precomputed instance of the loss metric which treats generalization and suppression equally. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param function The aggregate function to use for comparing results * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedLossMetric(double threshold, AggregateFunction function) { return __MetricV2.createPrecomputedLossMetric(threshold, function); } /** * Creates a potentially precomputed instance of the loss metric with factors for weighting generalization and suppression. * The default aggregate function, which is the rank function, will be used. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedLossMetric(double threshold, double gsFactor) { return __MetricV2.createPrecomputedLossMetric(threshold, gsFactor); } /** * Creates a potentially precomputed instance of the loss metric with factors for weighting generalization and suppression. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * * @param function The aggregate function to use for comparing results * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedLossMetric(double threshold, double gsFactor, AggregateFunction function) { return __MetricV2.createPrecomputedLossMetric(threshold, gsFactor, function); } /** * Creates a potentially precomputed instance of the normalized entropy metric. * The default aggregate function, which is the sum function, will be used. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedNormalizedEntropyMetric(double threshold) { return __MetricV2.createPrecomputedNormalizedEntropyMetric(threshold); } /** * Creates a potentially precomputed instance of the normalized entropy metric. * This metric will respect attribute weights defined in the configuration. * * @param threshold The precomputed variant of the metric will be used if * #distinctValues / #rows <= threshold for all quasi-identifiers. * * @param function The aggregate function to use for comparing results * @return */ public static Metric<AbstractILMultiDimensional> createPrecomputedNormalizedEntropyMetric(double threshold, AggregateFunction function) { return __MetricV2.createPrecomputedNormalizedEntropyMetric(threshold, function); } /** * Creates an instance of the model for maximizing publisher benefit in the game-theoretic privacy * model based on a cost/benefit analysis. The model treats generalization and suppression equally. * * @param journalistAttackerModel If set to true, the journalist attacker model will be assumed, * the prosecutor model will be assumed, otherwise * @return */ public static MetricSDNMPublisherPayout createPublisherPayoutMetric(boolean journalistAttackerModel) { return __MetricV2.createPublisherBenefitMetric(journalistAttackerModel, 0.5d); } /** * Creates an instance of the model for maximizing publisher benefit in the game-theoretic privacy * model based on a cost/benefit analysis. * * @param journalistAttackerModel If set to true, the journalist attacker model will be assumed, * the prosecutor model will be assumed, otherwise * * @param gsFactor A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. * @return */ public static MetricSDNMPublisherPayout createPublisherPayoutMetric(boolean journalistAttackerModel, double gsFactor) { return __MetricV2.createPublisherBenefitMetric(journalistAttackerModel, gsFactor); } /** * Creates an instance of a metric with statically defined information loss. * The default aggregate function, which is the sum-function, will be used for comparing results. * This metric will respect attribute weights defined in the configuration. * * @param loss User defined information loss per attribute * * @return */ public static Metric<AbstractILMultiDimensional> createStaticMetric(Map<String, List<Double>> loss) { return __MetricV2.createStaticMetric(loss); } /** * Creates an instance of a metric with statically defined information loss. * This metric will respect attribute weights defined in the configuration. * * @param loss User defined information loss per attribute * @param function The aggregate function to use for comparing results * * @return */ public static Metric<AbstractILMultiDimensional> createStaticMetric(Map<String, List<Double>> loss, AggregateFunction function) { return __MetricV2.createStaticMetric(loss, function); } /** * Returns a list of all available metrics for information loss. * * @return */ public static List<MetricDescription> list(){ return Arrays.asList(new MetricDescription[]{ new MetricDescription("Average equivalence class size", false, // monotonic variant supported false, // attribute weights supported true, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported private static final long serialVersionUID = 5194477380451716051L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createAECSMetric(config.getGsFactor()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDAECS); } }, new MetricDescription("Discernability", true, // monotonic variant supported false, // attribute weights supported false, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported private static final long serialVersionUID = 183842500322023095L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createDiscernabilityMetric(config.isMonotonic()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDDiscernability) || (metric instanceof MetricSDNMDiscernability); } }, new MetricDescription("Height", false, // monotonic variant supported true, // attribute weights supported false, // configurable coding model supported false, // pre-computation supported true, // aggregate function supported false){ // attacker model supported private static final long serialVersionUID = 9125639204133496116L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createHeightMetric(config.getAggregateFunction()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricMDHeight); } }, new MetricDescription("Loss", false, // monotonic variant supported true, // attribute weights supported true, // configurable coding model supported true, // pre-computation supported true, // aggregate function supported false){ // attacker model supported private static final long serialVersionUID = 4274885123814166707L; @Override public Metric<?> createInstance(MetricConfiguration config) { if (config.isPrecomputed()) { return createPrecomputedLossMetric(config.getPrecomputationThreshold(), config.getGsFactor(), config.getAggregateFunction()); } else { return createLossMetric(config.getGsFactor(), config.getAggregateFunction()); } } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricMDNMLoss) || (metric instanceof MetricMDNMLossPrecomputed) || (metric instanceof MetricMDNMLossPotentiallyPrecomputed); } }, new MetricDescription("Non-uniform entropy", true, // monotonic variant supported true, // attribute weights supported true, // configurable coding model supported true, // pre-computation supported true, // aggregate function supported false){ // attacker model supported private static final long serialVersionUID = 2578476174209277258L; @Override public Metric<?> createInstance(MetricConfiguration config) { if (config.isPrecomputed()) { return createPrecomputedEntropyMetric(config.getPrecomputationThreshold(), config.isMonotonic(), config.getGsFactor(), config.getAggregateFunction()); } else { return createEntropyMetric(config.isMonotonic(), config.getGsFactor(), config.getAggregateFunction()); } } @Override public boolean isInstance(Metric<?> metric) { return ((metric instanceof MetricMDNUEntropy) || (metric instanceof MetricMDNUEntropyPrecomputed) || (metric instanceof MetricMDNUEntropyPotentiallyPrecomputed) || (metric instanceof MetricMDNUNMEntropy) || (metric instanceof MetricMDNUNMEntropyPrecomputed) || (metric instanceof MetricMDNUNMEntropyPotentiallyPrecomputed)) &&! ((metric instanceof MetricMDNUNMNormalizedEntropy) || (metric instanceof MetricMDNUNMNormalizedEntropyPrecomputed) || (metric instanceof MetricMDNUNMNormalizedEntropyPotentiallyPrecomputed)); } }, new MetricDescription("Precision", true, // monotonic variant supported true, // attribute weights supported true, // configurable coding model supported false, // pre-computation supported true, // aggregate function supported false){ // attacker model supported private static final long serialVersionUID = 2992096817427174514L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createPrecisionMetric(config.isMonotonic(), config.getGsFactor(), config.getAggregateFunction()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricMDPrecision) || (metric instanceof MetricMDNMPrecision); } }, new MetricDescription("Ambiguity", false, // monotonic variant supported false, // attribute weights supported false, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported /** SVUID */ private static final long serialVersionUID = 3549715700376537750L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createAmbiguityMetric(); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDNMAmbiguity); } }, new MetricDescription("Normalized non-uniform entropy", false, // monotonic variant supported true, // attribute weights supported false, // configurable coding model supported true, // pre-computation supported true, // aggregate function supported false){ // attacker model supported /** SVUID*/ private static final long serialVersionUID = 8536219303137546137L; @Override public Metric<?> createInstance(MetricConfiguration config) { if (config.isPrecomputed()) { return createPrecomputedNormalizedEntropyMetric(config.getPrecomputationThreshold(), config.getAggregateFunction()); } else { return createNormalizedEntropyMetric(config.getAggregateFunction()); } } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricMDNUNMNormalizedEntropy) || (metric instanceof MetricMDNUNMNormalizedEntropyPrecomputed) || (metric instanceof MetricMDNUNMNormalizedEntropyPotentiallyPrecomputed); } }, new MetricDescription("KL-Divergence", false, // monotonic variant supported false, // attribute weights supported false, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported /** SVUID */ private static final long serialVersionUID = 6152052294903443361L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createKLDivergenceMetric(); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDNMKLDivergence); } }, new MetricDescription("Publisher payout (prosecutor)", false, // monotonic variant supported false, // attribute weights supported true, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported /** SVUID */ private static final long serialVersionUID = 5297850895808449665L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createPublisherPayoutMetric(false, config.getGsFactor()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDNMPublisherPayout) && ((MetricSDNMPublisherPayout)metric).isProsecutorAttackerModel(); } }, new MetricDescription("Publisher payout (journalist)", false, // monotonic variant supported false, // attribute weights supported true, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported /** SVUID */ private static final long serialVersionUID = -6985377052003037099L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createPublisherPayoutMetric(true, config.getGsFactor()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDNMPublisherPayout) && ((MetricSDNMPublisherPayout)metric).isJournalistAttackerModel(); } }, new MetricDescription("Entropy-based information loss", false, // monotonic variant supported false, // attribute weights supported true, // configurable coding model supported false, // pre-computation supported false, // aggregate function supported false){ // attacker model supported /** SVUID */ private static final long serialVersionUID = -6985377052003037099L; @Override public Metric<?> createInstance(MetricConfiguration config) { return createEntropyBasedInformationLossMetric(config.getGsFactor()); } @Override public boolean isInstance(Metric<?> metric) { return (metric instanceof MetricSDNMEntropyBasedInformationLoss); } } }); } /** * This method supports backwards compatibility. It will transform implementations from version 1 to * implementations from version 2, if necessary. * @param metric * @return */ private static Metric<?> createMetric(Metric<?> metric) { if (metric instanceof MetricAECS) { return __MetricV2.createAECSMetric((int)((MetricAECS)metric).getRowCount()); } else if (metric instanceof MetricDM) { return __MetricV2.createDiscernabilityMetric(false, ((MetricDM)metric).getRowCount()); } else if (metric instanceof MetricDMStar) { return __MetricV2.createDiscernabilityMetric(true, ((MetricDMStar)metric).getRowCount()); } else if (metric instanceof MetricEntropy) { return __MetricV2.createEntropyMetric(true, ((MetricEntropy)metric).getCache(), ((MetricEntropy)metric).getCardinalities(), ((MetricEntropy)metric).getHierarchies()); } else if (metric instanceof MetricNMEntropy) { return __MetricV2.createEntropyMetric(false, ((MetricEntropy)metric).getCache(), ((MetricEntropy)metric).getCardinalities(), ((MetricEntropy)metric).getHierarchies()); } else if (metric instanceof MetricNMPrecision) { return __MetricV2.createPrecisionMetric(false, ((MetricNMPrecision)metric).getHeights(), ((MetricNMPrecision)metric).getCells()); } else if (metric instanceof MetricPrecision) { return __MetricV2.createPrecisionMetric(true, ((MetricPrecision)metric).getHeights(), ((MetricPrecision)metric).getCells()); } else if (metric instanceof MetricStatic) { return __MetricV2.createStaticMetric(((MetricStatic)metric)._infoloss); } else { return metric; } } /** * Returns a description for the given metric, if there is any, null otherwise. * * @param metric * @return */ protected static MetricDescription getDescription(Metric<?> metric) { for (MetricDescription description : Metric.list()){ if (description.isInstance(metric)) { return description; } } return null; } /** Is the metric independent?. */ private boolean independent = false; /** Is the metric monotonic with generalization?. */ private Boolean monotonicWithGeneralization = true; /** Is the metric monotonic with suppression?. */ private boolean monotonic = false; /** Configuration factor. */ private final Double gFactor; /** Configuration factor. */ private final Double gsFactor; /** Configuration factor. */ private final Double sFactor; /** * Create a new metric. * * @param monotonicWithGeneralization * @param monotonicWithSuppression * @param independent * @param gsFactor */ protected Metric(final boolean monotonicWithGeneralization, final boolean monotonicWithSuppression, final boolean independent, final double gsFactor) { this.monotonicWithGeneralization = monotonicWithGeneralization; this.monotonic = monotonicWithSuppression; this.independent = independent; if (gsFactor < 0d || gsFactor > 1d) { throw new IllegalArgumentException("Parameter must be in [0, 1]"); } // A factor [0,1] weighting generalization and suppression. // The default value is 0.5, which means that generalization // and suppression will be treated equally. A factor of 0 // will favor suppression, and a factor of 1 will favor // generalization. The values in between can be used for // balancing both methods. this.gsFactor = gsFactor; // sFactor = 0 will only calculate the information loss through generalization this.sFactor = gsFactor < 0.5d ? 2d * gsFactor : 1d; // gFactor = 0 will only calculate the information loss through suppression this.gFactor = gsFactor <= 0.5d ? 1d : 1d - 2d * (gsFactor - 0.5d); } /** * Returns an instance of the highest possible score. Lower is better. * @return */ public InformationLoss<?> createInstanceOfHighestScore() { return createMaxInformationLoss(); } /** * Returns an instance of the lowest possible score. Lower is better. * @return */ public InformationLoss<?> createInstanceOfLowestScore() { return createMinInformationLoss(); } /** * Returns an instance of the maximal value. * * @return */ @Deprecated public abstract InformationLoss<?> createMaxInformationLoss(); /** * Returns an instance of the minimal value. * * @return */ @Deprecated public abstract InformationLoss<?> createMinInformationLoss(); /** * Returns the aggregate function of a multi-dimensional metric, null otherwise. * * @return */ public AggregateFunction getAggregateFunction(){ return null; } /** * Returns the configuration of this metric. * * @return */ public MetricConfiguration getConfiguration() { throw new UnsupportedOperationException(); } /** * Returns a description of this metric. * * @return */ public MetricDescription getDescription() { return Metric.getDescription(this); } /** * Returns the factor used weight generalized values. * * @return */ public double getGeneralizationFactor() { return gFactor != null ? gFactor : 1d; } /** * Returns the factor weighting generalization and suppression. * * @return A factor [0,1] weighting generalization and suppression. * The default value is 0.5, which means that generalization * and suppression will be treated equally. A factor of 0 * will favor suppression, and a factor of 1 will favor * generalization. The values in between can be used for * balancing both methods. */ public double getGeneralizationSuppressionFactor() { return gsFactor != null ? gsFactor : 0.5d; } /** * Evaluates the metric for the given node. * * @param node The node for which to compute the information loss * @param groupify The groupify operator of the previous check * @return the information loss */ public final InformationLossWithBound<T> getInformationLoss(final Transformation node, final HashGroupify groupify) { return this.getInformationLossInternal(node, groupify); } /** * Returns the information loss that would be induced by suppressing the given entry. The loss * is not necessarily consistent with the loss that is computed by * <code>getInformationLoss(node, groupify)</code> but is guaranteed to be comparable for * different entries from the same groupify operator. * * @param entry * @return */ public final InformationLossWithBound<T> getInformationLoss(final Transformation node, final HashGroupifyEntry entry) { return this.getInformationLossInternal(node, entry); } /** * Returns a lower bound for the information loss for the given node. * This can be used to expose the results of monotonic shares of a metric, * which can significantly speed-up the anonymization process. If no * such metric exists, the method returns <code>null</code>. * * @param node * @return */ @SuppressWarnings("unchecked") public T getLowerBound(final Transformation node) { if (node.getLowerBound() != null) { return (T)node.getLowerBound(); } else { return getLowerBoundInternal(node); } } /** * Returns a lower bound for the information loss for the given node. * This can be used to expose the results of monotonic shares of a metric, * which can significantly speed-up the anonymization process. If no * such metric exists the method returns <code>null</code>. * * @param node * @param groupify * @return */ @SuppressWarnings("unchecked") public T getLowerBound(final Transformation node, final HashGroupify groupify) { if (node.getLowerBound() != null) { return (T)node.getLowerBound(); } else { return getLowerBoundInternal(node, groupify); } } /** * Returns the name of metric. * * @return */ public String getName() { return this.toString(); } /** * Returns the factor used to weight suppressed values. * * @return */ public double getSuppressionFactor() { return sFactor != null ? sFactor : 1d; } /** * Initializes the metric. * * @param manager * @param definition * @param input * @param hierarchies * @param config */ public final void initialize(final DataManager manager, final DataDefinition definition, final Data input, final GeneralizationHierarchy[] hierarchies, final ARXConfiguration config) { initializeInternal(manager, definition, input, hierarchies, config); } /** * Returns whether this metric handles microaggregation * @return */ public boolean isAbleToHandleMicroaggregation() { return false; } /** * Returns whether a generalization/suppression factor is supported * @return */ public boolean isGSFactorSupported() { // TODO: This information is redundant to data in MetricConfiguration return false; } /** * Returns whether this metric requires the transformed data or groups to * determine information loss. * * @return */ public boolean isIndependent() { return independent; } /** * Returns whether this model is monotonic under the given suppression limit. * Note: The suppression limit may be relative or absolute. * * @param suppressionLimit * @return */ public final boolean isMonotonic(double suppressionLimit) { // The suppression limit may be relative or absolute, so we check against 0 to cover both call conventions. if (suppressionLimit == 0d) { return this.isMonotonicWithGeneralization(); } else { return this.isMonotonicWithSuppression(); } } /** * Returns false if the metric is non-monotonic when using generalization. * * @return */ public final boolean isMonotonicWithGeneralization(){ if (monotonicWithGeneralization == null) { monotonicWithGeneralization = true; } return monotonicWithGeneralization; } /** * Returns false if the metric is non-monotonic when using suppression. * * @return */ public final boolean isMonotonicWithSuppression() { return monotonic; } /** * Returns true if the metric is multi-dimensional. * * @return */ public final boolean isMultiDimensional(){ return (this instanceof AbstractMetricMultiDimensional); } /** * Returns whether the metric is precomputed * @return */ public boolean isPrecomputed() { return false; } /** * Returns true if the metric is weighted. * * @return */ public final boolean isWeighted() { return (this instanceof MetricWeighted) || this.isMultiDimensional(); } /** * Renders the privacy model * @return */ public abstract ElementData render(ARXConfiguration config); /** * Returns the name of metric. * * @return */ public String toString() { return this.getClass().getSimpleName(); } /** * Evaluates the metric for the given node. * * @param node The node for which to compute the information loss * @param groupify The groupify operator of the previous check * @return the double */ protected abstract InformationLossWithBound<T> getInformationLossInternal(final Transformation node, final HashGroupify groupify); /** * Returns the information loss that would be induced by suppressing the given entry. The loss * is not necessarily consistent with the loss that is computed by * <code>getInformationLoss(node, groupify)</code> but is guaranteed to be comparable for * different entries from the same groupify operator. * * @param entry * @return */ protected abstract InformationLossWithBound<T> getInformationLossInternal(final Transformation node, HashGroupifyEntry entry); /** * Returns a lower bound for the information loss for the given node. * This can be used to expose the results of monotonic shares of a metric, * which can significantly speed-up the anonymization process. If no * such metric exists, simply return <code>null</code>. * * @param node * @return */ protected abstract T getLowerBoundInternal(Transformation node); /** * Returns a lower bound for the information loss for the given node. * This can be used to expose the results of monotonic shares of a metric, * which can significantly speed-up the anonymization process. If no * such metric exists, simply return <code>null</code>. <br> * <br> * This variant of the method allows computing a monotonic share based on * a groupified data representation. IMPORTANT NOTE: The groups may not have * been classified correctly when the method is called, i.e., * HashGroupifyEntry.isNotOutlier may not be set correctly! * * @param node * @param groupify * @return */ protected abstract T getLowerBoundInternal(final Transformation node, final HashGroupify groupify); /** * Returns the number of records * @param config * @param input * @return */ protected int getNumRecords(ARXConfiguration config, Data input) { if (getSubset(config) != null) { return getSubset(config).size(); } else{ return input.getDataLength(); } } /** * Returns the subset * @param config * @param input * @return */ protected RowSet getSubset(ARXConfiguration config) { for (PrivacyCriterion c : config.getPrivacyModels()) { if (c.isSubsetAvailable()) { DataSubset subset = c.getDataSubset(); if (subset != null) { return subset.getSet(); } } } return null; } /** * Implement this to initialize the metric. * * @param manager * @param definition * @param input * @param hierarchies * @param config */ protected abstract void initializeInternal(final DataManager manager, final DataDefinition definition, final Data input, final GeneralizationHierarchy[] hierarchies, final ARXConfiguration config); /** * Ignore anything but the first DIGITS digits. * * @param value * @return */ protected double round(double value) { return Math.floor(value * FACTOR) / FACTOR; } }