/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.framework.check;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXConfiguration.ARXConfigurationInternal;
import org.deidentifier.arx.framework.check.StateMachine.Transition;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction;
import org.deidentifier.arx.framework.check.distribution.IntArrayDictionary;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.history.History;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.Dictionary;
import org.deidentifier.arx.framework.lattice.SolutionSpace;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.InformationLoss;
import org.deidentifier.arx.metric.InformationLossWithBound;
import org.deidentifier.arx.metric.Metric;
/**
* This class orchestrates the process of transforming and analyzing a dataset.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class NodeChecker {
/**
* The result of a check.
*/
public static class Result {
/** Overall anonymity. */
public final Boolean privacyModelFulfilled;
/** k-Anonymity sub-criterion. */
public final Boolean minimalClassSizeFulfilled;
/** Information loss. */
public final InformationLoss<?> informationLoss;
/** Lower bound. */
public final InformationLoss<?> lowerBound;
/**
* Creates a new instance.
*
* @param privacyModelFulfilled
* @param minimalClassSizeFulfilled
* @param infoLoss
* @param lowerBound
*/
Result(Boolean privacyModelFulfilled,
Boolean minimalClassSizeFulfilled,
InformationLoss<?> infoLoss,
InformationLoss<?> lowerBound) {
this.privacyModelFulfilled = privacyModelFulfilled;
this.minimalClassSizeFulfilled = minimalClassSizeFulfilled;
this.informationLoss = infoLoss;
this.lowerBound = lowerBound;
}
}
/** The config. */
private final ARXConfigurationInternal config;
/** The data. */
private final Data dataGeneralized;
/** The microaggregation functions. */
private final DistributionAggregateFunction[] microaggregationFunctions;
/** The start index of the attributes with microaggregation in the data array */
private final int microaggregationStartIndex;
/** The number of attributes with microaggregation in the data array */
private final int microaggregationNumAttributes;
/** Map for the microaggregated data subset */
private final int[] microaggregationMap;
/** Header of the microaggregated data subset */
private final String[] microaggregationHeader;
/** The current hash groupify. */
private HashGroupify currentGroupify;
/** The last hash groupify. */
private HashGroupify lastGroupify;
/** The history. */
private final History history;
/** The metric. */
private final Metric<?> metric;
/** The state machine. */
private final StateMachine stateMachine;
/** The data transformer. */
private final Transformer transformer;
/** The solution space */
private final SolutionSpace solutionSpace;
/** Is a minimal class size required */
private final boolean minimalClassSizeRequired;
/**
* Creates a new NodeChecker instance.
*
* @param manager The manager
* @param metric The metric
* @param config The configuration
* @param historyMaxSize The history max size
* @param snapshotSizeDataset A history threshold
* @param snapshotSizeSnapshot A history threshold
* @param solutionSpace
*/
public NodeChecker(final DataManager manager,
final Metric<?> metric,
final ARXConfigurationInternal config,
final int historyMaxSize,
final double snapshotSizeDataset,
final double snapshotSizeSnapshot,
final SolutionSpace solutionSpace) {
// Initialize all operators
this.metric = metric;
this.config = config;
this.dataGeneralized = manager.getDataGeneralized();
this.microaggregationFunctions = manager.getMicroaggregationFunctions();
this.microaggregationStartIndex = manager.getMicroaggregationStartIndex();
this.microaggregationNumAttributes = manager.getMicroaggregationNumAttributes();
this.microaggregationMap = manager.getMicroaggregationMap();
this.microaggregationHeader = manager.getMicroaggregationHeader();
this.solutionSpace = solutionSpace;
this.minimalClassSizeRequired = config.getMinimalGroupSize() != Integer.MAX_VALUE;
int initialSize = (int) (manager.getDataGeneralized().getDataLength() * 0.01d);
IntArrayDictionary dictionarySensValue;
IntArrayDictionary dictionarySensFreq;
if ((config.getRequirements() & ARXConfiguration.REQUIREMENT_DISTRIBUTION) != 0) {
dictionarySensValue = new IntArrayDictionary(initialSize);
dictionarySensFreq = new IntArrayDictionary(initialSize);
} else {
// Just to allow byte code instrumentation
dictionarySensValue = new IntArrayDictionary(0);
dictionarySensFreq = new IntArrayDictionary(0);
}
this.history = new History(manager.getDataGeneralized().getArray().length,
historyMaxSize,
snapshotSizeDataset,
snapshotSizeSnapshot,
config,
dictionarySensValue,
dictionarySensFreq,
solutionSpace);
this.stateMachine = new StateMachine(history);
this.currentGroupify = new HashGroupify(initialSize, config);
this.lastGroupify = new HashGroupify(initialSize, config);
this.transformer = new Transformer(manager.getDataGeneralized().getArray(),
manager.getDataAnalyzed().getArray(),
manager.getHierarchies(),
config,
dictionarySensValue,
dictionarySensFreq);
}
/**
* Applies the given transformation and returns the dataset
* @param transformation
* @return
*/
public TransformedData applyTransformation(final Transformation transformation) {
return applyTransformation(transformation,
new Dictionary(microaggregationNumAttributes));
}
/**
* Applies the given transformation and returns the dataset
* @param transformation
* @param microaggregationDictionary A dictionary for microaggregated values
* @return
*/
public TransformedData applyTransformation(final Transformation transformation,
final Dictionary microaggregationDictionary) {
// Prepare
microaggregationDictionary.definalizeAll();
// Apply transition and groupify
currentGroupify = transformer.apply(0L, transformation.getGeneralization(), currentGroupify);
currentGroupify.stateAnalyze(transformation, true);
if (!currentGroupify.isPrivacyModelFulfilled() && !config.isSuppressionAlwaysEnabled()) {
currentGroupify.stateResetSuppression();
}
// Determine information loss
InformationLoss<?> loss = transformation.getInformationLoss();
if (loss == null) {
loss = metric.getInformationLoss(transformation, currentGroupify).getInformationLoss();
}
// Prepare buffers
Data microaggregatedOutput = new Data(new int[0][0], new String[0], new int[0], new Dictionary(0));
Data generalizedOutput = new Data(transformer.getBuffer(), dataGeneralized.getHeader(), dataGeneralized.getMap(), dataGeneralized.getDictionary());
// Perform microaggregation. This has to be done before suppression.
if (microaggregationFunctions.length > 0) {
microaggregatedOutput = currentGroupify.performMicroaggregation(transformer.getBuffer(),
microaggregationStartIndex,
microaggregationNumAttributes,
microaggregationFunctions,
microaggregationMap,
microaggregationHeader,
microaggregationDictionary);
}
// Perform suppression
if (config.getAbsoluteMaxOutliers() != 0 || !currentGroupify.isPrivacyModelFulfilled()) {
currentGroupify.performSuppression(transformer.getBuffer());
}
// Return the buffer
return new TransformedData(generalizedOutput, microaggregatedOutput,
new Result(currentGroupify.isPrivacyModelFulfilled(),
minimalClassSizeRequired ? currentGroupify.isMinimalClassSizeFulfilled() : null,
loss, null));
}
/**
* Checks the given transformation, computes the utility if it fulfills the privacy model
* @param node
* @return
*/
public NodeChecker.Result check(final Transformation node) {
return check(node, false);
}
/**
* Checks the given transformation
* @param node
* @param forceMeasureInfoLoss
* @return
*/
public NodeChecker.Result check(final Transformation node, final boolean forceMeasureInfoLoss) {
// If the result is already know, simply return it
if (node.getData() != null && node.getData() instanceof NodeChecker.Result) {
return (NodeChecker.Result) node.getData();
}
// Store snapshot from last check
if (stateMachine.getLastNode() != null) {
history.store(solutionSpace.getTransformation(stateMachine.getLastNode()), currentGroupify, stateMachine.getLastTransition().snapshot);
}
// Transition
final Transition transition = stateMachine.transition(node.getGeneralization());
// Switch groupifies
final HashGroupify temp = lastGroupify;
lastGroupify = currentGroupify;
currentGroupify = temp;
// Apply transition
switch (transition.type) {
case UNOPTIMIZED:
currentGroupify = transformer.apply(transition.projection, node.getGeneralization(), currentGroupify);
break;
case ROLLUP:
currentGroupify = transformer.applyRollup(transition.projection, node.getGeneralization(), lastGroupify, currentGroupify);
break;
case SNAPSHOT:
currentGroupify = transformer.applySnapshot(transition.projection, node.getGeneralization(), currentGroupify, transition.snapshot);
break;
}
// We are done with transforming and adding
currentGroupify.stateAnalyze(node, forceMeasureInfoLoss);
if (forceMeasureInfoLoss && !currentGroupify.isPrivacyModelFulfilled() && !config.isSuppressionAlwaysEnabled()) {
currentGroupify.stateResetSuppression();
}
// Compute information loss and lower bound
InformationLossWithBound<?> result = (currentGroupify.isPrivacyModelFulfilled() || forceMeasureInfoLoss) ?
metric.getInformationLoss(node, currentGroupify) : null;
InformationLoss<?> loss = result != null ? result.getInformationLoss() : null;
InformationLoss<?> bound = result != null ? result.getLowerBound() : metric.getLowerBound(node, currentGroupify);
// Return result;
return new NodeChecker.Result(currentGroupify.isPrivacyModelFulfilled(),
minimalClassSizeRequired ? currentGroupify.isMinimalClassSizeFulfilled() : null,
loss,
bound);
}
/**
* Returns the configuration
* @return
*/
public ARXConfigurationInternal getConfiguration() {
return config;
}
/**
* Returns the checkers history, if any.
*
* @return
*/
public History getHistory() {
return history;
}
/**
* Returns the input buffer
* @return
*/
public int[][] getInputBuffer() {
return this.dataGeneralized.getArray();
}
/**
* Returns the utility measure
* @return
*/
public Metric<?> getMetric() {
return metric;
}
}