/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.criteria.BasicBLikeness;
import org.deidentifier.arx.criteria.DDisclosurePrivacy;
import org.deidentifier.arx.criteria.DPresence;
import org.deidentifier.arx.criteria.EDDifferentialPrivacy;
import org.deidentifier.arx.criteria.EnhancedBLikeness;
import org.deidentifier.arx.criteria.KAnonymity;
import org.deidentifier.arx.criteria.KMap;
import org.deidentifier.arx.criteria.LDiversity;
import org.deidentifier.arx.criteria.PrivacyCriterion;
import org.deidentifier.arx.criteria.ProfitabilityJournalist;
import org.deidentifier.arx.criteria.ProfitabilityJournalistNoAttack;
import org.deidentifier.arx.criteria.ProfitabilityProsecutor;
import org.deidentifier.arx.criteria.ProfitabilityProsecutorNoAttack;
import org.deidentifier.arx.criteria.SampleBasedCriterion;
import org.deidentifier.arx.criteria.TCloseness;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.metric.Metric;
import org.deidentifier.arx.metric.MetricConfiguration;
/**
* A generic configuration for the ARX anonymizer.
*
* @author Fabian Prasser
*/
public class ARXConfiguration implements Serializable, Cloneable {
// TODO: While in use, this configuration object should be locked, similar to, e.g., DataDefinition
/**
* Class for internal use that provides access to more parameters and functionality.
*/
public static class ARXConfigurationInternal {
/** The wrapped object. */
private final ARXConfiguration config;
/**
* Creates a new instance.
*
* @param config
*/
protected ARXConfigurationInternal(ARXConfiguration config){
this.config = config;
}
/**
* @param clazz
* @return
* @see org.deidentifier.arx.ARXConfiguration#isPrivacyModelSpecified(java.lang.Class)
*/
public boolean isPrivacyModelSpecified(Class<? extends PrivacyCriterion> clazz) {
return config.isPrivacyModelSpecified(clazz);
}
/**
* Returns the maximum number of allowed outliers.
*
* @return
*/
public final int getAbsoluteMaxOutliers() {
return config.getAbsoluteMaxOutliers();
}
/**
* Returns all class-based criteria (except k-anonymity) as an array.
* Only used internally. If k-anonymity is included the minimal
* group size should be obtained and enforced
* @return
*/
public PrivacyCriterion[] getClassBasedPrivacyModelsAsArray() {
return config.getPrivacyModelsAsArray();
}
/**
* Returns all criteria.
* @return
*/
public Set<PrivacyCriterion> getPrivacyModels() {
return config.getPrivacyModels();
}
/**
*
*
* @param <T>
* @param clazz
* @return
* @see org.deidentifier.arx.ARXConfiguration#getPrivacyModel(java.lang.Class)
*/
public <T extends PrivacyCriterion> T getPrivacyModel(Class<T> clazz) {
return config.getPrivacyModel(clazz);
}
/**
* Returns the max relative number of outliers.
*
* @return
*/
public double getMaxOutliers() {
return config.getMaxOutliers();
}
/**
* Returns the quality model to be used for optimizing output data.
*
* @return
*/
public Metric<?> getQualityModel() {
return config.getQualityModel();
}
/**
* Returns the minimal size of an equivalence class induced by the contained criteria.
* @return If k-anonymity is contained, k is returned. If l-diversity is contained, l is returned.
* If both are contained max(k,l) is returned. Otherwise, Integer.MAX_VALUE is returned.
*/
public int getMinimalGroupSize() {
return config.getMinimalGroupSize();
}
/**
* Returns a monotonicity property
* @return
*/
public Monotonicity getMonotonicityOfPrivacy() {
return config.getMonotonicityOfPrivacy();
}
/**
* Returns a monotonicity property
* @return
*/
public Monotonicity getMonotonicityOfUtility() {
return config.getMonotonicityOfUtility();
}
/**
* Returns the criteria's requirements.
*
* @return
*/
public int getRequirements() {
return config.getRequirements();
}
/**
* Returns all sample-based criteria as an array.
* @return
*/
public SampleBasedCriterion[] getSampleBasedPrivacyModelsAsArray() {
return config.getSampleBasedPrivacyModelsAsArray();
}
/**
* Returns the specific length of each entry in a snapshot.
*
* @return
*/
public int getSnapshotLength() {
return config.getSnapshotLength();
}
/**
* Returns the data subset, if any
* @return
*/
public DataSubset getSubset() {
return config.getSubset();
}
/**
* Returns an integer representing all attribute types that must be suppressed.
*
* @return
*/
public int getSuppressedAttributeTypes() {
return config.getSuppressedAttributeTypes();
}
/**
* Is practical monotonicity assumed.
*
* @return
*/
public boolean isPracticalMonotonicity() {
return config.isPracticalMonotonicity();
}
/**
* Returns whether suppression is applied to the output of anonymous as
* well as non-anonymous transformations. If this flag is set to true,
* suppression will be applied to the output of non-anonymous transformations
* to make them anonymous (if possible). Default is true.
* @return
*/
public boolean isSuppressionAlwaysEnabled() {
return config.isSuppressionAlwaysEnabled();
}
/**
* Do we guarantee optimality for sample-based criteria?
*/
public boolean isUseHeuristicForSampleBasedCriteria() {
return config.isUseHeuristicSearchForSampleBasedCriteria();
}
/**
* Convenience method for checking the requirements.
*
* @param requirement
* @return
*/
public boolean requires(int requirement) {
return config.requires(requirement);
}
}
/**
* Monotonicity.
*/
public static enum Monotonicity {
/** Fully monotonic */
FULL,
/** Partially monotonic */
PARTIAL,
/** Non-monotonic */
NONE
}
/** Do the criteria require a counter per equivalence class. */
public static final int REQUIREMENT_COUNTER = 0x1;
/** Do the criteria require distributions of sensitive values in the equivalence classes. */
public static final int REQUIREMENT_DISTRIBUTION = 0x4;
/** Do the criteria require a second counter. */
public static final int REQUIREMENT_SECONDARY_COUNTER = 0x2;
/** For serialization. */
private static final long serialVersionUID = -6713510386735241964L;
/**
* Creates a new configuration without tuple suppression.
*
* @return
*/
public static ARXConfiguration create() {
return new ARXConfiguration();
}
/**
* Creates a new configuration that allows the given percentage of outliers and
* thus implements tuple suppression.
*
* @param suppressionLimit
* @return
*/
public static ARXConfiguration create(double suppressionLimit) {
return new ARXConfiguration(suppressionLimit);
}
/**
* Creates a new configuration that allows the given percentage of outliers and
* thus implements tuple suppression. Defines the metric for measuring information loss.
*
* @param suppressionLimit
* @param metric
* @return
*/
public static ARXConfiguration create(double suppressionLimit, Metric<?> metric) {
return new ARXConfiguration(suppressionLimit, metric);
}
/**
* Creates a new configuration that allows to define the metric for measuring information loss.
*
* @param metric
* @return
*/
public static ARXConfiguration create(Metric<?> metric) {
return new ARXConfiguration(metric);
}
/** Absolute tuple outliers. */
private int absMaxOutliers = 0;
/** Criteria. */
private PrivacyCriterion[] aCriteria = new PrivacyCriterion[0];
/** Criteria. */
private SampleBasedCriterion[] bCriteria = new SampleBasedCriterion[0];
/** A map of weights per attribute. */
private Map<String, Double> attributeWeights = null;
/** The criteria. */
private Set<PrivacyCriterion> criteria = new HashSet<PrivacyCriterion>();
/** The metric. */
private Metric<?> metric = Metric.createLossMetric();
/** Do we assume practical monotonicity. */
private boolean practicalMonotonicity = false;
/** Relative tuple outliers. */
private double relMaxOutliers = -1;
/** The requirements per equivalence class. */
private int requirements = 0x0;
/** The snapshot length. */
private int snapshotLength;
/** Defines values of which attribute type are to be replaced by the suppression string in suppressed tuples. */
private Integer suppressedAttributeTypes = 1 << AttributeType.ATTR_TYPE_QI;
/** Determines whether suppression is applied to the output of anonymous as well as non-anonymous transformations. */
private Boolean suppressionAlwaysEnabled = true;
/** Should microaggregation be based on data utility measurements */
private boolean utilityBasedMicroaggregation = false;
/** Should the mean-squared error be used to measure the impact of microaggregation */
private boolean utilityBasedMicroaggregationUseMeanSquaredError = false;
/** Internal variant of the class providing a broader interface. */
private transient ARXConfigurationInternal accessibleInstance = null;
/** Are we performing optimal anonymization for sample-based criteria? */
private boolean heuristicSearchForSampleBasedCriteria = false;
/** Should we use the heuristic search algorithm? */
private boolean heuristicSearchEnabled = false;
/**
* We will use the heuristic algorithm, if the size of the search space
* exceeds this threshold
*/
private Integer heuristicSearchThreshold = 100000;
/** The heuristic algorithm will terminate after the given time limit */
private Integer heuristicSearchTimeLimit = 30000;
/** Cost/benefit configuration */
private ARXCostBenefitConfiguration costBenefitConfiguration = ARXCostBenefitConfiguration.create();
/**
* Creates a new configuration without tuple suppression.
*/
private ARXConfiguration() {
this.relMaxOutliers = 0d;
}
/**
* Creates a new config that allows the given percentage of outliers and
* thus implements tuple suppression.
*
* @param supp
*/
private ARXConfiguration(double supp) {
if (supp < 0d || supp >= 1d) { throw new NullPointerException("Suppression must be >=0 and <1"); }
this.relMaxOutliers = supp;
}
/**
* Creates a new config that allows the given percentage of outliers and
* thus implements tuple suppression. Defines the metric for measuring information loss.
* @param supp
* @param metric
*/
private ARXConfiguration(double supp, Metric<?> metric) {
if (supp < 0d || supp > 1d) { throw new NullPointerException("Suppression must be >=0 and <=1"); }
this.relMaxOutliers = supp;
if (metric == null) { throw new NullPointerException("Metric must not be null"); }
this.metric = metric;
}
/**
* Creates a new config that allows to define the metric for measuring information loss.
* @param metric
*/
private ARXConfiguration(Metric<?> metric) {
if (metric == null) { throw new NullPointerException("Metric must not be null"); }
this.metric = metric;
}
/**
* Adds a privacy model to the configuration. This method is deprecated.
* Please use addPrivacyModel(...) instead.
*
* @param c
* @return
*/
@Deprecated
public ARXConfiguration addCriterion(PrivacyCriterion c) {
return addPrivacyModel(c);
}
/**
* Adds a privacy model to the configuration.
*
* @param c
* @return
*/
public ARXConfiguration addPrivacyModel(PrivacyCriterion c) {
// Check
checkArgument(c);
// Check models for which only one instance is supported
if ((c instanceof DPresence) && this.isPrivacyModelSpecified(DPresence.class)) {
throw new IllegalArgumentException("You must not add more than one instance of the d-presence model");
}
if ((c instanceof KMap) && this.isPrivacyModelSpecified(KMap.class)) {
throw new IllegalArgumentException("You must not add more than one instance of the k-map model");
}
if ((c instanceof KAnonymity) && this.isPrivacyModelSpecified(KAnonymity.class)) {
throw new IllegalArgumentException("You must not add more than one instance of the k-anonymity model");
}
if ((c instanceof EDDifferentialPrivacy) && this.isPrivacyModelSpecified(EDDifferentialPrivacy.class)) {
throw new IllegalArgumentException("You must not add more than one instance of the differential privacy model");
}
// Check whether different subsets have been defined
if (c.isSubsetAvailable()) {
// Collect all subsets
List<int[]> subsets = new ArrayList<int[]>();
subsets.add(c.getDataSubset().getArray());
for (PrivacyCriterion other : this.getPrivacyModels()) {
if (other.isSubsetAvailable()) {
subsets.add(other.getDataSubset().getArray());
}
}
// Compare
for (int i = 0; i < subsets.size() - 1; i++) {
if (!Arrays.equals(subsets.get(i), subsets.get(i + 1))) {
throw new IllegalArgumentException("Using different research subsets is not supported");
}
}
}
// Add
criteria.add(c);
// Check DP has been combined with a subset
if (this.isPrivacyModelSpecified(EDDifferentialPrivacy.class)) {
for (PrivacyCriterion other : this.getPrivacyModels()) {
if (!(other instanceof EDDifferentialPrivacy) && other.isSubsetAvailable()) {
// Remove and complain
criteria.remove(c);
throw new RuntimeException("Combining differential privacy with a research subset is not supported");
}
}
}
// Everything is fine
return this;
}
/**
* Clones this config.
*
* @return
*/
public ARXConfiguration clone() {
ARXConfiguration result = new ARXConfiguration();
result.practicalMonotonicity = this.practicalMonotonicity;
result.relMaxOutliers = this.relMaxOutliers;
result.absMaxOutliers = this.absMaxOutliers;
result.aCriteria = this.aCriteria.clone();
result.criteria = new HashSet<PrivacyCriterion>(this.criteria);
result.requirements = this.requirements;
result.metric = this.metric;
result.snapshotLength = this.snapshotLength;
result.suppressionAlwaysEnabled = this.suppressionAlwaysEnabled;
result.suppressedAttributeTypes = this.suppressedAttributeTypes;
result.heuristicSearchForSampleBasedCriteria = this.heuristicSearchForSampleBasedCriteria;
result.heuristicSearchEnabled = this.heuristicSearchEnabled;
result.heuristicSearchThreshold = this.heuristicSearchThreshold;
result.heuristicSearchTimeLimit = this.heuristicSearchTimeLimit;
result.utilityBasedMicroaggregation = this.utilityBasedMicroaggregation;
result.costBenefitConfiguration = this.getCostBenefitConfiguration().clone();
if (this.attributeWeights != null) {
result.attributeWeights = new HashMap<String, Double>(this.attributeWeights);
} else {
result.attributeWeights = null;
}
return result;
}
/**
* Returns whether the configuration contains a privacy model which is an instance of the given class.
* This method is deprecated. Please use isPrivacyModelSpecified(...) instead.
*
* @param clazz
* @return
*/
@Deprecated
public boolean containsCriterion(Class<? extends PrivacyCriterion> clazz) {
return this.isPrivacyModelSpecified(clazz);
}
/**
* Returns whether the configuration contains a privacy model which is an instance of the given class.
*
* @param clazz
* @return
*/
public boolean isPrivacyModelSpecified(Class<? extends PrivacyCriterion> clazz) {
checkArgument(clazz);
for (PrivacyCriterion c : criteria) {
if (clazz.isInstance(c)) { return true; }
}
return false;
}
/**
* Returns the weight for the given attribute.
*
* @param attribute
* @return
*/
public double getAttributeWeight(String attribute) {
// For backwards compatibility
if (this.attributeWeights==null) {
this.attributeWeights = new HashMap<String, Double>();
}
Double value = this.attributeWeights.get(attribute);
if (value == null) return 0.5d;
else return value;
}
/**
* Returns all configured attribute weights. For attributes which are not a key in this
* set the default attribute weight will be assumed by ARX. This default value is
* currently set to 0.5.
*
* @return
*/
public Map<String, Double> getAttributeWeights() {
// For backwards compatibility
if (this.attributeWeights==null) {
this.attributeWeights = new HashMap<String, Double>();
}
return new HashMap<String, Double>(this.attributeWeights);
}
/**
* Returns the cost/benefit configuration
*/
public ARXCostBenefitConfiguration getCostBenefitConfiguration() {
if (this.costBenefitConfiguration == null) {
this.costBenefitConfiguration = ARXCostBenefitConfiguration.create();
}
return this.costBenefitConfiguration;
}
/**
* Returns all criteria. This method is deprecated.
* Please use getPrivacyModels() instead.
* @return
*/
@Deprecated
public Set<PrivacyCriterion> getCriteria() {
return getPrivacyModels();
}
/**
* Returns all privacy models which are instances of the given class.
* This method is deprecated. Please use getPrivacyModels(...) instead.
*
* @param <T>
* @param clazz
* @return
*/
@Deprecated
public <T extends PrivacyCriterion> Set<T> getCriteria(Class<T> clazz) {
return this.getPrivacyModels(clazz);
}
/**
* Returns an instance of the class, if any. Throws an exception if more than one such model exists.
* This method is deprecated. Please use getPrivacyModel(...) instead.
*
* @param <T>
* @param clazz
* @return
*/
@Deprecated
public <T extends PrivacyCriterion> T getCriterion(Class<T> clazz) {
return this.getPrivacyModel(clazz);
}
/**
* When the size of the solution space exceeds the returned number of transformations,
* ARX will use a heuristic search strategy. The default is 100.000.
* @return
*/
public int getHeuristicSearchThreshold() {
if (this.heuristicSearchThreshold == null) {
this.heuristicSearchThreshold = 100000;
}
return this.heuristicSearchThreshold;
}
/**
* The heuristic search algorithm will terminate after the returned number of milliseconds.
* The default is 30 seconds.
* @param timeInMillis
*/
public int getHeuristicSearchTimeLimit() {
if (this.heuristicSearchTimeLimit == null) {
this.heuristicSearchTimeLimit = 30000;
}
return this.heuristicSearchTimeLimit;
}
/**
* Returns the maximum number of allowed outliers.
*
* @return
*/
public final double getMaxOutliers() {
return relMaxOutliers;
}
/**
* Returns the quality model to be used for optimizing output data.
* This method is deprecated. Please use getQualityModel() instead.
*
* @return
*/
@Deprecated
public Metric<?> getMetric() {
return getQualityModel();
}
/**
* Returns whether the privacy model is monotonic
* @return
*/
public Monotonicity getMonotonicityOfPrivacy() {
// Practical monotonicity
if (this.isPracticalMonotonicity()) {
return Monotonicity.FULL;
}
// Without suppression
if (this.getMaxOutliers() == 0d) {
for (PrivacyCriterion criterion : this.getPrivacyModels()) {
if (!criterion.isMonotonicWithGeneralization()) {
if (this.getMinimalGroupSize() != Integer.MAX_VALUE) {
return Monotonicity.PARTIAL;
} else {
return Monotonicity.NONE;
}
}
}
// With suppression
} else {
for (PrivacyCriterion criterion : this.getPrivacyModels()) {
if (!criterion.isMonotonicWithSuppression() ||
!criterion.isMonotonicWithGeneralization()) {
if (this.getMinimalGroupSize() != Integer.MAX_VALUE) {
return Monotonicity.PARTIAL;
} else {
return Monotonicity.NONE;
}
}
}
}
// Full
return Monotonicity.FULL;
}
/**
* Returns whether the utility measure is monotonic
* @return
*/
public Monotonicity getMonotonicityOfUtility() {
if (metric.isMonotonic(this.getMaxOutliers()) ||
this.isPracticalMonotonicity()) {
return Monotonicity.FULL;
} else {
return Monotonicity.NONE;
}
}
/**
* Returns an instance of the class, if any. Throws an exception if more than one such model exists.
*
* @param <T>
* @param clazz
* @return
*/
@SuppressWarnings("unchecked")
public <T extends PrivacyCriterion> T getPrivacyModel(Class<T> clazz) {
checkArgument(clazz);
Set<T> result = new HashSet<T>();
for (PrivacyCriterion c : criteria) {
if (clazz.isInstance(c)) {
result.add((T) c);
}
}
if (result.size() > 1) {
throw new RuntimeException("More than one matches the query!");
} else if (result.size() == 1) {
return result.iterator().next();
} else {
return null;
}
}
/**
* Returns all privacy models.
* @return
*/
public Set<PrivacyCriterion> getPrivacyModels() {
return this.criteria;
}
/**
* Returns all privacy models which are instances of the given class.
*
* @param <T>
* @param clazz
* @return
*/
@SuppressWarnings("unchecked")
public <T extends PrivacyCriterion> Set<T> getPrivacyModels(Class<T> clazz) {
checkArgument(clazz);
Set<T> result = new HashSet<T>();
for (PrivacyCriterion c : criteria) {
if (clazz.isInstance(c)) {
result.add((T) c);
}
}
return result;
}
/**
* Returns the quality model to be used for optimizing output data.
*
* @return
*/
public Metric<?> getQualityModel() {
return this.metric;
}
/**
* Return journalist risk threshold, 1 if there is none
* @return
*/
public double getRiskThresholdJournalist() {
double risk = 1d;
for (PrivacyCriterion criterion : this.criteria) {
risk = Math.min(risk, criterion.getRiskThresholdJournalist());
}
return risk;
}
/**
* Return marketer risk threshold, 1 if there is none
* @return
*/
public double getRiskThresholdMarketer() {
double risk = 1d;
for (PrivacyCriterion criterion : this.criteria) {
risk = Math.min(risk, criterion.getRiskThresholdMarketer());
}
return risk;
}
/**
* Return prosecutor risk threshold, 1 if there is none
* @return
*/
public double getRiskThresholdProsecutor() {
double risk = 1d;
for (PrivacyCriterion criterion : this.criteria) {
risk = Math.min(risk, criterion.getRiskThresholdProsecutor());
}
return risk;
}
/**
* Returns whether values of the given attribute type will be replaced by the suppression
* string in suppressed tuples.
* @param type
* @return
*/
public boolean isAttributeTypeSuppressed(final AttributeType type){
checkArgument(type);
// Ensure backwards compatibility
if (suppressedAttributeTypes == null) {
suppressedAttributeTypes = 1 << AttributeType.ATTR_TYPE_QI;
}
return (suppressedAttributeTypes & (1 << type.getType())) != 0;
}
/**
* Returns whether ARX will use a heuristic search strategy. The default is false.
* @return
*/
public boolean isHeuristicSearchEnabled() {
return this.heuristicSearchEnabled;
}
/**
* Is practical monotonicity assumed.
*
* @return
*/
public boolean isPracticalMonotonicity() {
return practicalMonotonicity;
}
/**
* Returns whether suppression is applied to the output of anonymous as well as non-anonymous transformations. If
* this flag is set to <code>true</code>, suppression will be applied to the output of non-anonymous
* transformations to make them anonymous (if possible). Default is <code>true</code>.
* @return
*/
public boolean isSuppressionAlwaysEnabled(){
// Ensure backwards compatibility
if (this.suppressionAlwaysEnabled == null) {
this.suppressionAlwaysEnabled = true;
}
return this.suppressionAlwaysEnabled;
}
/**
* Is optimality guaranteed for sample-based criteria?
*/
public boolean isUseHeuristicSearchForSampleBasedCriteria() {
return heuristicSearchForSampleBasedCriteria;
}
/**
* Returns whether the impact of microaggregation on data utility should be considered
* @return
*/
public boolean isUtilityBasedMicroaggregation() {
return this.utilityBasedMicroaggregation;
}
/**
* If set to true, mean squared error will be used to measure the impact of microaggregation
* on data quality. If set to false, a more simple measure of information loss will be used.
* Default is <code>false</code>.
* @return
*/
public boolean isUtilityBasedMicroaggregationUseMeanSquaredError() {
return this.utilityBasedMicroaggregationUseMeanSquaredError;
}
/**
* Removes the given criterion.
*
* @param <T>
* @param arg
* @return
*/
public <T extends PrivacyCriterion> boolean removeCriterion(PrivacyCriterion arg) {
checkArgument(arg);
return criteria.remove(arg);
}
/**
* Renders this object
* @return
*/
public List<ElementData> render() {
// Render attribute types
List<ElementData> result = new ArrayList<>();
result.add(renderWeights());
result.add(renderSettings());
result.add(renderReidentificationThresholds());
return result;
}
/**
* Defines values of which attribute type are to be replaced by the suppression string in suppressed tuples.
* With default settings, only quasi-identifiers will be suppressed.
*
* @param type the attribute type
* @param enabled whether suppression should be performed or not
*/
public void setAttributeTypeSuppressed(final AttributeType type, boolean enabled) {
checkArgument(type);
// Ensure backwards compatibility
if (suppressedAttributeTypes == null) {
suppressedAttributeTypes = 1 << AttributeType.ATTR_TYPE_QI;
}
if (enabled) {
suppressedAttributeTypes |= 1 << type.getType();
} else {
suppressedAttributeTypes &= ~(1 << type.getType());
}
}
/**
* Sets the weight for the given attribute.
*
* @param attribute
* @param weight
*/
public void setAttributeWeight(String attribute, double weight){
checkArgument(attribute);
setAttributeWeight(attribute, Double.valueOf(weight));
}
/**
* Sets the weight for the given attribute.
*
* @param attribute
* @param weight
*/
public void setAttributeWeight(String attribute, Double weight){
checkArgument(attribute);
// For backwards compatibility
if (this.attributeWeights==null) {
this.attributeWeights = new HashMap<String, Double>();
}
this.attributeWeights.put(attribute, weight);
}
/**
* Sets the cost/benefit configuration
* @param config
*/
public ARXConfiguration setCostBenefitConfiguration(ARXCostBenefitConfiguration config) {
if (config == null) {
throw new NullPointerException("Argument must not be null");
}
this.costBenefitConfiguration = config;
return this;
}
/**
* Sets whether ARX will use a heuristic search strategy. The default is false.
* @param heuristicSearchEnabled
* @return
*/
public void setHeuristicSearchEnabled(boolean heuristicSearchEnabled) {
this.heuristicSearchEnabled = heuristicSearchEnabled;
}
/**
* When the size of the solution space exceeds the given number of transformations,
* ARX will use a heuristic search strategy. The default is 100.000.
* @param numberOfTransformations
* @return
*/
public void setHeuristicSearchThreshold(int numberOfTransformations) {
if (numberOfTransformations <= 0) { throw new IllegalArgumentException("Parameter must be >= 0"); }
this.heuristicSearchThreshold = numberOfTransformations;
}
/**
* The heuristic search algorithm will terminate after the given number of milliseconds.
* The default is 30 seconds.
* @param timeInMillis
*/
public void setHeuristicSearchTimeLimit(int timeInMillis) {
if (timeInMillis <= 0) { throw new IllegalArgumentException("Parameter must be >= 0"); }
this.heuristicSearchTimeLimit = timeInMillis;
}
/**
* Allows for a certain percentage of outliers and thus
* triggers tuple suppression.
*
* @param max
*/
public void setMaxOutliers(double max) {
this.relMaxOutliers = max;
}
/**
* Sets the quality model to be used for optimizing output data.
* This method is deprecated. Please use setQualityModel(...) instead.
*
* @param model
*/
@Deprecated
public void setMetric(Metric<?> model) {
this.setQualityModel(model);
}
/**
* Set, if practical monotonicity assumed.
*
* @param assumeMonotonicity
*/
public void setPracticalMonotonicity(final boolean assumeMonotonicity) {
this.practicalMonotonicity = assumeMonotonicity;
}
/**
* Sets the quality model to be used for optimizing output data.
*
* @param model
*/
public void setQualityModel(Metric<?> model) {
if (model == null) { throw new NullPointerException("Quality model must not be null"); }
this.metric = model;
}
/**
* Sets whether suppression is applied to the output of anonymous as well as non-anonymous transformations. If
* this flag is set to <code>true</code>, suppression will be applied to the output of non-anonymous
* transformations to make them anonymous (if possible). Default is <code>true</code>.
* @param enabled
*/
public void setSuppressionAlwaysEnabled(boolean enabled){
this.suppressionAlwaysEnabled = enabled;
}
/**
* Sets the suppression limit. This is an alias for setMaxOutliers().
* @param limit
*/
public void setSuppressionLimit(double limit) {
this.relMaxOutliers = limit;
}
/**
* Do we guarantee optimality for sample-based criteria?
*/
public void setUseHeuristicSearchForSampleBasedCriteria(boolean value) {
this.heuristicSearchForSampleBasedCriteria = value;
}
/**
* Sets whether the impact of microaggregation on data utility should be considered
* @return
*/
public void setUtilityBasedMicroaggregation(boolean value) {
this.utilityBasedMicroaggregation = value;
}
/**
* If set to true, mean squared error will be used to measure the impact of microaggregation
* on data quality. If set to false, a more simple measure of information loss will be used.
* Default is <code>false</code>.
* @return
*/
public void setUtilityBasedMicroaggregationUseMeanSquaredError(boolean useMSE) {
this.utilityBasedMicroaggregationUseMeanSquaredError = useMSE;
}
/**
* Checks an argument.
*
* @param argument
*/
private void checkArgument(Object argument){
if (argument == null) {
throw new IllegalArgumentException("Argument must not be null");
}
}
/**
* Renders stuff
* @return
*/
private ElementData renderReidentificationThresholds() {
ElementData result = new ElementData("Risk thresholds");
result.addProperty("Prosecutor risk", this.getRiskThresholdProsecutor());
result.addProperty("Journalist risk", this.getRiskThresholdJournalist());
result.addProperty("Marketer risk", this.getRiskThresholdMarketer());
return result;
}
/**
* Renders the weights
* @return
*/
private ElementData renderSettings() {
ElementData result = new ElementData("Settings");
result.addProperty("Assume monotonicity", this.practicalMonotonicity);
result.addProperty("Suppression limit", this.relMaxOutliers);
result.addProperty("Consider mean squared error", this.utilityBasedMicroaggregation);
return result;
}
/**
* Renders the weights
* @return
*/
private ElementData renderWeights() {
ElementData result = new ElementData("Weights");
if (attributeWeights.isEmpty()) {
result.addItem("None specified");
} else {
for (Entry<String, Double> entry : attributeWeights.entrySet()) {
result.addProperty(entry.getKey(), entry.getValue());
}
}
return result;
}
/**
* Returns the maximum number of allowed outliers.
*
* @return
*/
protected final int getAbsoluteMaxOutliers() {
return this.absMaxOutliers;
}
/**
* Returns all criteria (except k-anonymity) as an array. Only used internally. If k-anonymity is included the minimal
* group size should be obtained and enforced
* @return
*/
protected PrivacyCriterion[] getPrivacyModelsAsArray() {
return this.aCriteria;
}
/**
* Clones this config and projects everything onto the given subset.<br>
* - All privacy models will be cloned<br>
* - Subsets in d-presence will be projected accordingly<br>
* - Utility measures will be cloned<br>
* - Replaces estimated k-map with according k-anonymity<br>
* @param gsFactor
*
* @return
*/
protected ARXConfiguration getInstanceForLocalRecoding(RowSet rowset, double gsFactor) {
// Check, if we can do this
for (PrivacyCriterion criterion : this.getPrivacyModels()) {
if (!criterion.isLocalRecodingSupported()) {
throw new IllegalStateException("Local recoding not supported.");
}
}
// Prepare a subset
DataSubset subset = this.getSubset();
if (subset != null) {
subset = subset.getSubsetInstance(rowset);
}
// Clone all criteria
HashSet<PrivacyCriterion> criteria = new HashSet<PrivacyCriterion>();
for (PrivacyCriterion criterion : this.getPrivacyModels()) {
// Clone and store
PrivacyCriterion clone = criterion.clone(subset);
// We need to make sure that we don't add multiple instances of k-anonymity
// because k-map can be converted into this model
if (clone instanceof KAnonymity) {
Iterator<PrivacyCriterion> iter = criteria.iterator();
while (iter.hasNext()) {
PrivacyCriterion other = iter.next();
if (other instanceof KAnonymity) {
if (((KAnonymity)other).getK() <= ((KAnonymity)clone).getK()) {
iter.remove();
} else {
clone = null;
}
}
}
if (clone != null) {
criteria.add(clone);
}
} else {
criteria.add(clone);
}
}
// Clone the config
ARXConfiguration result = this.clone();
result.aCriteria = null;
result.criteria = criteria;
MetricConfiguration utilityConfig = result.getQualityModel().getConfiguration();
utilityConfig.setGsFactor(gsFactor);
result.metric = result.getQualityModel().getDescription().createInstance(utilityConfig);
// Return
return result;
}
/**
* Returns an internal variant of the class which provides a broader interface
*
* @return
*/
protected ARXConfigurationInternal getInternalConfiguration(){
if (this.accessibleInstance == null) {
this.accessibleInstance = new ARXConfigurationInternal(this);
}
return this.accessibleInstance;
}
/**
* Returns the minimal size of an equivalence class induced by the defined privacy models.
* @return If k-anonymity is contained, k is returned. If l-diversity is contained, l is returned.
* If both are contained max(k,l) is returned. Otherwise, Integer.MAX_VALUE is returned.
*/
protected int getMinimalGroupSize() {
// Init
int result = -1;
// For each
for (PrivacyCriterion c : this.getPrivacyModels()) {
if (c.isMinimalClassSizeAvailable()) {
result = Math.max(result, c.getMinimalClassSize());
}
}
// Check & return
if (result == -1) return Integer.MAX_VALUE;
else return result;
}
/**
* Returns the criteria's requirements.
*
* @return
*/
protected int getRequirements() {
return this.requirements;
}
/**
* Returns all sample-based criteria as an array. Only used internally.
* @return
*/
protected SampleBasedCriterion[] getSampleBasedPrivacyModelsAsArray() {
return this.bCriteria;
}
/**
* Returns the specific length of each entry in a snapshot.
*
* @return
*/
protected int getSnapshotLength() {
return this.snapshotLength;
}
/**
* Returns the data subset, if any subset is defined.
* You may only call this, after the configuration has be initialized.
* @return
*/
protected DataSubset getSubset() {
for (PrivacyCriterion c : this.criteria) {
if (c.isSubsetAvailable()) {
DataSubset subset = c.getDataSubset();
if (subset != null) {
return subset;
}
}
}
return null;
}
/**
* Returns an integer representing all attribute types that must be suppressed.
*
* @return
*/
protected int getSuppressedAttributeTypes() {
// Ensure backwards compatibility
if (suppressedAttributeTypes == null) { return 1 << AttributeType.ATTR_TYPE_QI; }
return this.suppressedAttributeTypes;
}
/**
* Initializes the configuration.
*
* @param manager
*/
protected void initialize(DataManager manager) {
// Check
if (criteria.isEmpty()) {
throw new RuntimeException("At least one privacy model must be specified!");
}
// Compute requirements
this.requirements = 0x0;
for (PrivacyCriterion c : criteria) {
this.requirements |= c.getRequirements();
}
// Requirements for microaggregation
if (manager.getDataAnalyzed().getArray() != null) {
this.requirements |= ARXConfiguration.REQUIREMENT_DISTRIBUTION;
}
// Initialize
for (PrivacyCriterion c : criteria) {
c.initialize(manager, this);
}
int dataLength = 0;
if (this.getSubset() != null) {
dataLength = getSubset().getArray().length;
} else {
dataLength = manager.getDataGeneralized().getDataLength();
}
// Compute max outliers
if (this.isPrivacyModelSpecified(EDDifferentialPrivacy.class)) {
absMaxOutliers = (int) dataLength;
} else {
absMaxOutliers = (int) Math.floor(this.relMaxOutliers * (double) dataLength);
}
// Compute optimized array with criteria, assuming complexities
// dPresence <= dDisclosurePrivacy <= lDiversity <= tCloseness and ignoring kAnonymity
List<PrivacyCriterion> list = new ArrayList<PrivacyCriterion>();
if (this.isPrivacyModelSpecified(DPresence.class)) {
list.add(this.getPrivacyModel(DPresence.class));
}
if (this.isPrivacyModelSpecified(KMap.class)) {
list.add(this.getPrivacyModel(KMap.class));
}
if (this.isPrivacyModelSpecified(DDisclosurePrivacy.class)) {
list.addAll(this.getPrivacyModels(DDisclosurePrivacy.class));
}
if (this.isPrivacyModelSpecified(BasicBLikeness.class)) {
list.addAll(this.getPrivacyModels(BasicBLikeness.class));
}
if (this.isPrivacyModelSpecified(EnhancedBLikeness.class)) {
list.addAll(this.getPrivacyModels(EnhancedBLikeness.class));
}
if (this.isPrivacyModelSpecified(LDiversity.class)) {
list.addAll(this.getPrivacyModels(LDiversity.class));
}
if (this.isPrivacyModelSpecified(TCloseness.class)) {
list.addAll(this.getPrivacyModels(TCloseness.class));
}
if (this.isPrivacyModelSpecified(ProfitabilityProsecutor.class)) {
list.addAll(this.getPrivacyModels(ProfitabilityProsecutor.class));
}
if (this.isPrivacyModelSpecified(ProfitabilityProsecutorNoAttack.class)) {
list.addAll(this.getPrivacyModels(ProfitabilityProsecutorNoAttack.class));
}
if (this.isPrivacyModelSpecified(ProfitabilityJournalist.class)) {
list.addAll(this.getPrivacyModels(ProfitabilityJournalist.class));
}
if (this.isPrivacyModelSpecified(ProfitabilityJournalistNoAttack.class)) {
list.addAll(this.getPrivacyModels(ProfitabilityJournalistNoAttack.class));
}
this.aCriteria = list.toArray(new PrivacyCriterion[0]);
// Compute array of sample-based criteria
this.bCriteria = new SampleBasedCriterion[0];
if (this.isPrivacyModelSpecified(SampleBasedCriterion.class)) {
this.bCriteria = this.getPrivacyModels(SampleBasedCriterion.class).toArray(new SampleBasedCriterion[0]);
}
// Compute snapshot length
this.snapshotLength = 2;
if (this.requires(REQUIREMENT_DISTRIBUTION)) {
this.snapshotLength += 2 * manager.getDataAnalyzed().getHeader().length;
}
if (this.requires(REQUIREMENT_SECONDARY_COUNTER)) {
this.snapshotLength += 1;
}
}
/**
* Convenience method for checking the requirements.
*
* @param requirement
* @return
*/
protected boolean requires(int requirement) {
return (this.requirements & requirement) != 0;
}
}