/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.deidentifier.arx.AttributeType.MicroAggregationFunction;
import org.deidentifier.arx.algorithm.AbstractAlgorithm;
import org.deidentifier.arx.algorithm.FLASHAlgorithm;
import org.deidentifier.arx.algorithm.FLASHAlgorithmImpl;
import org.deidentifier.arx.algorithm.FLASHStrategy;
import org.deidentifier.arx.algorithm.LIGHTNINGAlgorithm;
import org.deidentifier.arx.criteria.BasicBLikeness;
import org.deidentifier.arx.criteria.DDisclosurePrivacy;
import org.deidentifier.arx.criteria.EDDifferentialPrivacy;
import org.deidentifier.arx.criteria.EnhancedBLikeness;
import org.deidentifier.arx.criteria.KAnonymity;
import org.deidentifier.arx.criteria.LDiversity;
import org.deidentifier.arx.criteria.TCloseness;
import org.deidentifier.arx.framework.check.NodeChecker;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction.DistributionAggregateFunctionGeneralization;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.Dictionary;
import org.deidentifier.arx.framework.data.GeneralizationHierarchy;
import org.deidentifier.arx.framework.lattice.SolutionSpace;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.Metric;
/**
* This class offers several methods to define parameters and execute the ARX
* algorithm.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class ARXAnonymizer {
/**
* Temporary result of the ARX algorithm.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
class Result {
/** The algorithm. */
final AbstractAlgorithm algorithm;
/** The checker. */
final NodeChecker checker;
/** The solution space. */
final SolutionSpace solutionSpace;
/** The data manager. */
final DataManager manager;
/** The metric. */
final Metric<?> metric;
/** The time. */
final long time;
/** The global optimum */
final Transformation optimum;
/**
* Creates a new instance.
*
* @param metric the metric
* @param checker the checker
* @param lattice the solution space
* @param manager the manager
* @param algorithm
* @param time
*/
Result(final Metric<?> metric,
final NodeChecker checker,
final SolutionSpace solutionSpace,
final DataManager manager,
final AbstractAlgorithm algorithm,
final long time) {
this.metric = metric;
this.checker = checker;
this.solutionSpace = solutionSpace;
this.manager = manager;
this.algorithm = algorithm;
this.time = time;
this.optimum = algorithm.getGlobalOptimum();
}
/**
* Creates a final result from this temporary result.
*
* @param config
* @param handle
* @return
*/
public ARXResult asResult(ARXConfiguration config, DataHandle handle) {
// Create lattice
final ARXLattice lattice = new ARXLattice(solutionSpace,
(algorithm instanceof FLASHAlgorithmImpl),
optimum,
manager.getDataGeneralized().getHeader(),
config.getInternalConfiguration());
// Create output handle
((DataHandleInput)handle).setLocked(true);
return new ARXResult(handle.getRegistry(),
this.manager,
this.checker,
handle.getDefinition(),
config,
lattice,
System.currentTimeMillis() - time,
solutionSpace);
}
}
/** History size. */
private int historySize = 200;
/** The listener, if any. */
private ARXListener listener = null;
/** Snapshot size. */
private double snapshotSizeDataset = 0.2d;
/** Snapshot size snapshot. */
private double snapshotSizeSnapshot = 0.8d;
/** The maximal number of QIs that can be processed. */
private int maxQuasiIdentifiers = Integer.MAX_VALUE;
/**
* Creates a new anonymizer with the default configuration.
*/
public ARXAnonymizer() {
// Empty by design
}
/**
* Creates a new anonymizer with the given configuration.
*
* @param historySize The maximum number of snapshots stored in the buffer [default=200]
* @param snapshotSizeDataset The maximum relative size of a snapshot compared to the dataset [default=0.2]
* @param snapshotSizeSnapshot The maximum relative size of a snapshot compared to its predecessor [default=0.8]
*/
public ARXAnonymizer(final int historySize, final double snapshotSizeDataset, final double snapshotSizeSnapshot) {
if (historySize<0)
throw new RuntimeException("History size must be >=0");
this.historySize = historySize;
if (snapshotSizeDataset<=0 || snapshotSizeDataset>=1)
throw new RuntimeException("SnapshotSizeDataset must be >0 and <1");
this.snapshotSizeDataset = snapshotSizeDataset;
if (snapshotSizeSnapshot<=0 || snapshotSizeSnapshot>=1)
throw new RuntimeException("snapshotSizeSnapshot must be >0 and <1");
this.snapshotSizeSnapshot = snapshotSizeSnapshot;
}
/**
* Performs data anonymization.
*
* @param data The data
* @param config The privacy config
* @return ARXResult
* @throws IOException
*/
public ARXResult anonymize(final Data data, ARXConfiguration config) throws IOException {
if (((DataHandleInput)data.getHandle()).isLocked()){
throw new RuntimeException("This data handle is locked. Please release it first");
}
// Update registry
DataHandle handle = data.getHandle();
handle.getDefinition().materializeHierarchies(handle);
checkBeforeEncoding(handle, config);
handle.getRegistry().reset();
// Create manager
DataManager manager = getDataManager(handle, handle.getDefinition(), config);
// Attach subset to handle
handle.getRegistry().createInputSubset(config);
// Attach arrays to data handle
((DataHandleInput)handle).update(manager.getDataGeneralized().getArray(),
manager.getDataAnalyzed().getArray(),
manager.getDataStatic().getArray());
// Execute
return anonymize(manager, handle.getDefinition(), config).asResult(config, handle);
}
/**
* Returns the maximum number of snapshots allowed to store in the history.
*
* @return The size
*/
public int getHistorySize() {
return historySize;
}
/**
* Gets the snapshot size.
*
* @return The maximum size of a snapshot relative to the dataset size
*/
public double getMaximumSnapshotSizeDataset() {
return snapshotSizeDataset;
}
/**
* Gets the snapshot size.
*
* @return The maximum size of a snapshot relative to the previous snapshot
* size
*/
public double getMaximumSnapshotSizeSnapshot() {
return snapshotSizeSnapshot;
}
/**
* Returns the maximal number of quasi-identifiers.
* @return
*/
public int getMaxQuasiIdentifiers() {
return maxQuasiIdentifiers;
}
/**
* Sets the maximum number of snapshots allowed to store in the history.
*
* @param historySize
* The size
*/
public void setHistorySize(final int historySize) {
if (historySize < 0) { throw new IllegalArgumentException("Max. number of snapshots must be positive or 0"); }
this.historySize = historySize;
}
/**
* Sets a listener.
*
* @param listener
* the new listener, if any
*/
public void setListener(final ARXListener listener) {
this.listener = listener;
}
/**
* Sets the maximum size of a snapshot relative to the dataset size.
*
* @param snapshotSize
*/
public void setMaximumSnapshotSizeDataset(final double snapshotSize) {
// Perform sanity checks
if ((snapshotSize <= 0d) || (snapshotSize > 1d)) { throw new IllegalArgumentException("Snapshot size " + snapshotSize + "must be in [0,1]"); }
snapshotSizeDataset = snapshotSize;
}
/**
* Sets the maximum size of a snapshot relative to the previous snapshot.
*
* @param snapshotSizeSnapshot The size
*/
public void setMaximumSnapshotSizeSnapshot(final double snapshotSizeSnapshot) {
// Perform sanity checks
if ((snapshotSizeSnapshot <= 0d) || (snapshotSizeSnapshot > 1d)) { throw new IllegalArgumentException("Snapshot size " + snapshotSizeSnapshot + "must be in [0,1]"); }
this.snapshotSizeSnapshot = snapshotSizeSnapshot;
}
/**
* Sets the maximal number of quasi-identifiers. Set to Integer.MAX_VALUE to disable the
* restriction. By default, the restriction is disabled.
* @param maxQuasiIdentifiers
*/
public void setMaxQuasiIdentifiers(int maxQuasiIdentifiers) {
this.maxQuasiIdentifiers = maxQuasiIdentifiers;
}
/**
* Performs some sanity checks.
*
* @param config
* @param manager the manager
*/
private void checkAfterEncoding(final ARXConfiguration config, final DataManager manager) {
if (config.isPrivacyModelSpecified(KAnonymity.class)){
KAnonymity c = config.getPrivacyModel(KAnonymity.class);
// TODO: getDataGeneralized().getDataLength() does not consider data subsets
if ((c.getK() > manager.getDataGeneralized().getDataLength()) || (c.getK() < 1)) {
throw new IllegalArgumentException("Parameter k (" + c.getK() + ") must be >=1 and less or equal than the number of rows (" + manager.getDataGeneralized().getDataLength()+")");
}
}
if (config.isPrivacyModelSpecified(LDiversity.class)){
for (LDiversity c : config.getPrivacyModels(LDiversity.class)){
// TODO: getDataGeneralized().getDataLength() does not consider data subsets
if ((c.getL() > manager.getDataGeneralized().getDataLength()) || (c.getL() < 1)) {
throw new IllegalArgumentException("Parameter l (" + c.getL() + ") must be >=1 and less or equal than the number of rows (" + manager.getDataGeneralized().getDataLength()+")");
}
}
}
if (config.isPrivacyModelSpecified(DDisclosurePrivacy.class)){
for (DDisclosurePrivacy c : config.getPrivacyModels(DDisclosurePrivacy.class)){
if (c.getD() <= 0) {
throw new IllegalArgumentException("Parameter d (" + c.getD() + ") must be positive and larger than 0");
}
}
}
if (config.isPrivacyModelSpecified(BasicBLikeness.class)){
for (BasicBLikeness c : config.getPrivacyModels(BasicBLikeness.class)){
if (c.getB() <= 0) {
throw new IllegalArgumentException("Parameter b (" + c.getB() + ") must be positive and larger than 0");
}
}
}
if (config.isPrivacyModelSpecified(EnhancedBLikeness.class)){
for (EnhancedBLikeness c : config.getPrivacyModels(EnhancedBLikeness.class)){
if (c.getB() <= 0) {
throw new IllegalArgumentException("Parameter b (" + c.getB() + ") must be positive and larger than 0");
}
}
}
// Check whether all hierarchies are monotonic
for (final GeneralizationHierarchy hierarchy : manager.getHierarchies()) {
hierarchy.checkMonotonicity(manager);
}
// check min and max sizes
final int[] hierarchyHeights = manager.getHierachiesHeights();
final int[] minLevels = manager.getHierarchiesMinLevels();
final int[] maxLevels = manager.getHierarchiesMaxLevels();
for (int i = 0; i < hierarchyHeights.length; i++) {
if (minLevels[i] > (hierarchyHeights[i] - 1)) { throw new IllegalArgumentException("Invalid minimum generalization for attribute '" + manager.getHierarchies()[i].getName() + "': " +
minLevels[i] + " > " + (hierarchyHeights[i] - 1)); }
if (minLevels[i] < 0) { throw new IllegalArgumentException("The minimum generalization for attribute '" + manager.getHierarchies()[i].getName() + "' has to be positive!"); }
if (maxLevels[i] > (hierarchyHeights[i] - 1)) { throw new IllegalArgumentException("Invalid maximum generalization for attribute '" + manager.getHierarchies()[i].getName() + "': " +
maxLevels[i] + " > " + (hierarchyHeights[i] - 1)); }
if (maxLevels[i] < minLevels[i]) { throw new IllegalArgumentException("The minimum generalization for attribute '" + manager.getHierarchies()[i].getName() +
"' has to be lower than or equal to the defined maximum!"); }
}
}
/**
* Performs some sanity checks.
*
* @param handle
* the data handle
* @param config
* the configuration
*/
private void checkBeforeEncoding(final DataHandle handle, final ARXConfiguration config) {
// Lots of checks
if (handle == null) { throw new NullPointerException("Data must not be null!"); }
if (config.isPrivacyModelSpecified(LDiversity.class) ||
config.isPrivacyModelSpecified(TCloseness.class)){
if (handle.getDefinition().getSensitiveAttributes().size() == 0) { throw new IllegalArgumentException("You need to specify a sensitive attribute!"); }
}
for (String attr : handle.getDefinition().getSensitiveAttributes()){
boolean found = false;
for (LDiversity c : config.getPrivacyModels(LDiversity.class)) {
if (c.getAttribute().equals(attr)) {
found = true;
break;
}
}
if (!found) {
for (TCloseness c : config.getPrivacyModels(TCloseness.class)) {
if (c.getAttribute().equals(attr)) {
found = true;
break;
}
}
}
if (!found) {
for (DDisclosurePrivacy c : config.getPrivacyModels(DDisclosurePrivacy.class)) {
if (c.getAttribute().equals(attr)) {
found = true;
break;
}
}
}
if (!found) {
for (BasicBLikeness c : config.getPrivacyModels(BasicBLikeness.class)) {
if (c.getAttribute().equals(attr)) {
found = true;
break;
}
}
}
if (!found) {
for (EnhancedBLikeness c : config.getPrivacyModels(EnhancedBLikeness.class)) {
if (c.getAttribute().equals(attr)) {
found = true;
break;
}
}
}
if (!found) {
throw new IllegalArgumentException("No privacy model specified for sensitive attribute: '"+attr+"'!");
}
}
for (LDiversity c : config.getPrivacyModels(LDiversity.class)) {
if (handle.getDefinition().getAttributeType(c.getAttribute()) != AttributeType.SENSITIVE_ATTRIBUTE) {
throw new RuntimeException("L-Diversity model defined for non-sensitive attribute '"+c.getAttribute()+"'!");
}
}
for (TCloseness c : config.getPrivacyModels(TCloseness.class)) {
if (handle.getDefinition().getAttributeType(c.getAttribute()) != AttributeType.SENSITIVE_ATTRIBUTE) {
throw new RuntimeException("T-Closeness model defined for non-sensitive attribute '"+c.getAttribute()+"'!");
}
}
for (DDisclosurePrivacy c : config.getPrivacyModels(DDisclosurePrivacy.class)) {
if (handle.getDefinition().getAttributeType(c.getAttribute()) != AttributeType.SENSITIVE_ATTRIBUTE) {
throw new RuntimeException("D-Disclosure privacy model defined for non-sensitive attribute '"+c.getAttribute()+"'!");
}
}
for (BasicBLikeness c : config.getPrivacyModels(BasicBLikeness.class)) {
if (handle.getDefinition().getAttributeType(c.getAttribute()) != AttributeType.SENSITIVE_ATTRIBUTE) {
throw new RuntimeException("Basic-b-likeness model defined for non-sensitive attribute '"+c.getAttribute()+"'!");
}
}
for (EnhancedBLikeness c : config.getPrivacyModels(EnhancedBLikeness.class)) {
if (handle.getDefinition().getAttributeType(c.getAttribute()) != AttributeType.SENSITIVE_ATTRIBUTE) {
throw new RuntimeException("Enhanced-b-likeness model defined for non-sensitive attribute '"+c.getAttribute()+"'!");
}
}
// Check handle
if (!(handle instanceof DataHandleInput)) { throw new IllegalArgumentException("Invalid data handle provided!"); }
// Check if all defines are correct
DataDefinition definition = handle.getDefinition();
Set<String> attributes = new HashSet<String>();
for (int i=0; i<handle.getNumColumns(); i++){
attributes.add(handle.getAttributeName(i));
}
for (String attribute : handle.getDefinition().getSensitiveAttributes()){
if (!attributes.contains(attribute)) {
throw new IllegalArgumentException("Sensitive attribute '"+attribute+"' is not contained in the dataset");
}
}
for (String attribute : handle.getDefinition().getInsensitiveAttributes()){
if (!attributes.contains(attribute)) {
throw new IllegalArgumentException("Insensitive attribute '"+attribute+"' is not contained in the dataset");
}
}
for (String attribute : handle.getDefinition().getIdentifyingAttributes()){
if (!attributes.contains(attribute)) {
throw new IllegalArgumentException("Identifying attribute '"+attribute+"' is not contained in the dataset");
}
}
for (String attribute : handle.getDefinition().getQuasiIdentifyingAttributes()){
if (!attributes.contains(attribute)) {
throw new IllegalArgumentException("Quasi-identifying attribute '"+attribute+"' is not contained in the dataset");
}
}
for (String attribute : handle.getDefinition().getQuasiIdentifiersWithMicroaggregation()) {
MicroAggregationFunction f = (MicroAggregationFunction) definition.getMicroAggregationFunction(attribute);
DataType<?> t = definition.getDataType(attribute);
if (!t.getDescription().getScale().provides(f.getRequiredScale())) {
throw new IllegalArgumentException("Attribute '" + attribute + "' has an aggregation function specified wich needs a datatype with a scale of measure of at least " + f.getRequiredScale());
}
if (f.getFunction() instanceof DistributionAggregateFunctionGeneralization) {
if (definition.getHierarchy(attribute) == null) {
throw new IllegalArgumentException("Attribute '" + attribute + "' has an aggregation function specified wich needs a generalization hierarchy");
}
}
}
// Check constraints for (e,d)-DP
if (config.isPrivacyModelSpecified(EDDifferentialPrivacy.class)) {
if (!definition.getQuasiIdentifiersWithMicroaggregation().isEmpty()) {
throw new IllegalArgumentException("Differential privacy must not be combined with micro-aggregation");
}
}
// Perform sanity checks
Set<String> genQis = definition.getQuasiIdentifiersWithGeneralization();
if ((config.getMaxOutliers() < 0d) || (config.getMaxOutliers() > 1d)) { throw new IllegalArgumentException("Suppression rate " + config.getMaxOutliers() + "must be in [0, 1]"); }
if (genQis.size() == 0) { throw new IllegalArgumentException("You need to specify at least one quasi-identifier with generalization"); }
if (genQis.size() > maxQuasiIdentifiers) {
throw new IllegalArgumentException("Too many quasi-identifiers (" + genQis.size()+"). This restriction is configurable.");
}
}
/**
* Returns a map of all microaggregation functions
* @param definition
* @return
*/
private Map<String, DistributionAggregateFunction> getAggregateFunctions(DataDefinition definition) {
Map<String, DistributionAggregateFunction> result = new HashMap<String, DistributionAggregateFunction>();
for (String key : definition.getQuasiIdentifiersWithMicroaggregation()) {
result.put(key, definition.getMicroAggregationFunction(key).getFunction());
}
return result;
}
/**
* Returns an algorithm for the given problem instance
* @param config
* @param manager
* @param solutionSpace
* @param checker
* @return
*/
private AbstractAlgorithm getAlgorithm(final ARXConfiguration config,
final DataManager manager,
final SolutionSpace solutionSpace,
final NodeChecker checker) {
if (config.isHeuristicSearchEnabled() ||
solutionSpace.getSize() > config.getHeuristicSearchThreshold()) {
return LIGHTNINGAlgorithm.create(solutionSpace, checker, config.getHeuristicSearchTimeLimit());
} else {
FLASHStrategy strategy = new FLASHStrategy(solutionSpace, manager.getHierarchies());
return FLASHAlgorithm.create(solutionSpace, checker, strategy);
}
}
/**
* Prepares the data manager.
*
* @param handle the handle
* @param definition the definition
* @param config the config
* @return the data manager
* @throws IOException Signals that an I/O exception has occurred.
*/
private DataManager getDataManager(final DataHandle handle, final DataDefinition definition, final ARXConfiguration config) throws IOException {
// Extract data
final String[] header = ((DataHandleInput) handle).header;
final int[][] dataArray = ((DataHandleInput) handle).data;
final Dictionary dictionary = ((DataHandleInput) handle).dictionary;
final DataManager manager = new DataManager(header, dataArray, dictionary, definition, config.getPrivacyModels(), getAggregateFunctions(definition));
return manager;
}
/**
* Reset a previous lattice and run the algorithm .
*
* @param manager
* @param definition
* @param config
* @return
* @throws IOException
*/
protected Result anonymize(final DataManager manager,
final DataDefinition definition,
final ARXConfiguration config) throws IOException {
// Initialize
config.initialize(manager);
// Check
checkAfterEncoding(config, manager);
// Build or clean the lattice
SolutionSpace solutionSpace = new SolutionSpace(manager.getHierarchiesMinLevels(), manager.getHierarchiesMaxLevels());
// Initialize the metric
config.getQualityModel().initialize(manager, definition, manager.getDataGeneralized(), manager.getHierarchies(), config);
// Build a node checker
final NodeChecker checker = new NodeChecker(manager,
config.getQualityModel(),
config.getInternalConfiguration(),
historySize,
snapshotSizeDataset,
snapshotSizeSnapshot,
solutionSpace);
// Create an algorithm instance
AbstractAlgorithm algorithm = getAlgorithm(config,
manager,
solutionSpace,
checker);
algorithm.setListener(listener);
// Execute
final long time = System.currentTimeMillis();
algorithm.traverse();
// Deactivate history to prevent bugs when sorting data
checker.getHistory().reset();
checker.getHistory().setSize(0);
// Return the result
return new Result(config.getQualityModel(), checker, solutionSpace, manager, algorithm, time);
}
}