/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.framework.check.groupify;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyDistribution.PrivacyCondition.State;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.InformationLossWithBound;
import org.deidentifier.arx.metric.Metric;
import org.deidentifier.arx.metric.v2.MetricMDHeight;
import org.deidentifier.arx.metric.v2.MetricMDNMPrecision;
import org.deidentifier.arx.metric.v2.MetricMDNUEntropy;
import org.deidentifier.arx.metric.v2.MetricMDNUEntropyPotentiallyPrecomputed;
import org.deidentifier.arx.metric.v2.MetricMDNUEntropyPrecomputed;
import org.deidentifier.arx.metric.v2.MetricMDNUNMEntropy;
import org.deidentifier.arx.metric.v2.MetricMDNUNMEntropyPotentiallyPrecomputed;
import org.deidentifier.arx.metric.v2.MetricMDNUNMEntropyPrecomputed;
import org.deidentifier.arx.metric.v2.MetricMDPrecision;
import org.deidentifier.arx.metric.v2.MetricMDStatic;
import org.deidentifier.arx.metric.v2.MetricSDAECS;
import org.deidentifier.arx.metric.v2.MetricSDDiscernability;
import org.deidentifier.arx.metric.v2.MetricSDNMDiscernability;
import org.deidentifier.arx.risk.RiskModelHistogram;
import com.carrotsearch.hppc.IntIntOpenHashMap;
/**
* A distribution of equivalence classes
* @author Fabian Prasser
*/
public class HashGroupifyDistribution {
/**
* A condition that may or may not be fulfilled for the distribution
* @author Fabian Prasser
*/
public static interface PrivacyCondition {
/**
* The current state of the search condition
* @author Fabian Prasser
*/
public static enum State {
FULFILLED,
NOT_FULFILLED,
ABORT
}
/**
* Evaluates the condition on the given distribution
* @param distribution
* @return
*/
public State isFulfilled(HashGroupifyDistribution distribution);
}
/** The backing map */
private IntIntOpenHashMap distribution = new IntIntOpenHashMap();
/** The number of suppressed tuples */
private int numSuppressed = 0;
/** Entries that can be suppressed */
private HashGroupifyEntry[] entries;
/** Number of tuples in the data set */
private double numRecords = 0;
/** Number of classes in the data set */
private double numClasses = 0;
/**
* Creates a new instance
*
* @param metric, null if ordering should not be applied
* @param transformation
* @param entry
*/
HashGroupifyDistribution(final Metric<?> metric,
final Transformation transformation,
HashGroupifyEntry entry) {
// Initialize
List<HashGroupifyEntry> list = new ArrayList<HashGroupifyEntry>();
while(entry != null) {
if (entry.isNotOutlier && entry.count > 0) {
addToDistribution(entry.count);
list.add(entry);
} else {
this.numSuppressed += entry.count;
}
entry = entry.nextOrdered;
}
Comparator<HashGroupifyEntry> comparator;
// Blacklist metrics for which information loss of individual entries
// is equal to the size of the class
if ((metric == null) ||
(metric instanceof MetricMDHeight) ||
(metric instanceof MetricMDNMPrecision) ||
(metric instanceof MetricMDNUEntropy) ||
(metric instanceof MetricMDNUEntropyPotentiallyPrecomputed) ||
(metric instanceof MetricMDNUEntropyPrecomputed) ||
(metric instanceof MetricMDNUNMEntropy) ||
(metric instanceof MetricMDNUNMEntropyPotentiallyPrecomputed) ||
(metric instanceof MetricMDNUNMEntropyPrecomputed) ||
(metric instanceof MetricMDPrecision) ||
(metric instanceof MetricMDStatic) ||
(metric instanceof MetricSDAECS) ||
(metric instanceof MetricSDDiscernability) ||
(metric instanceof MetricSDNMDiscernability)) {
// Create comparator
comparator = new Comparator<HashGroupifyEntry>(){
public int compare(HashGroupifyEntry o1, HashGroupifyEntry o2) {
int cmp = Integer.compare(o1.count, o2.count);
return cmp != 0 ? cmp : Integer.compare(o1.representative, o2.representative);
}
};
} else {
// Cache for information loss
final Map<HashGroupifyEntry, InformationLossWithBound<?>> cache =
new HashMap<HashGroupifyEntry, InformationLossWithBound<?>>();
// Create comparator
comparator = new Comparator<HashGroupifyEntry>(){
public int compare(HashGroupifyEntry o1, HashGroupifyEntry o2) {
int cmp = Integer.compare(o1.count, o2.count);
if (cmp != 0) {
return cmp;
}
InformationLossWithBound<?> loss1 = cache.get(o1);
InformationLossWithBound<?> loss2 = cache.get(o2);
if (loss1 == null) {
loss1 = metric.getInformationLoss(transformation, o1);
cache.put(o1, loss1);
}
if (loss2 == null) {
loss2 = metric.getInformationLoss(transformation, o2);
cache.put(o2, loss2);
}
cmp = loss1.getInformationLoss().compareTo(loss2.getInformationLoss());
return cmp != 0 ? cmp : Integer.compare(o1.representative, o2.representative);
}
};
}
// Sort & store suppressible entries
Collections.sort(list, comparator);
this.entries = list.toArray(new HashGroupifyEntry[list.size()]);
}
/**
* Returns the average class size
* @return
*/
public double getAverageClassSize() {
return numRecords / numClasses;
}
/**
* Returns the fraction of tuples that are in classes of the given size
* @param size
* @return
*/
public double getFractionOfRecordsInClassesOfSize(int size) {
return (double)distribution.get(size) * (double)size / numRecords;
}
/**
* Returns a set of classes as an input for the risk model
*/
public RiskModelHistogram getHistogram() {
return new RiskModelHistogram(this.distribution);
}
/**
* Returns the number of records
* @return
*/
public int getNumRecords() {
return (int)this.numRecords;
}
/**
* Returns the number of suppressed records
* @return
*/
public int getNumSuppressedRecords() {
return this.numSuppressed;
}
/**
* Suppresses entries until the condition is fulfilled
* @param condition
* @return the number of tuples that have been suppressed
*/
public int suppressWhileNotFulfilledBinary(PrivacyCondition condition) {
// Nothing to suppress
if (entries.length == 0) {
return this.numSuppressed;
}
// Start parameters
int low = 0;
int high = entries.length - 1;
int mid = (low + high) / 2;
int initiallySuppressed = this.numSuppressed;
State state = State.ABORT;
// Initially suppress from low to mid
for (int i=low; i <= mid; i++) {
suppressEntry(entries[i]);
}
// While not done
while (low <= high) {
// Binary search
state = condition.isFulfilled(this);
if (state == State.ABORT) {
break;
} else if (state == State.FULFILLED) {
high = mid - 1;
mid = (low + high) / 2;
// Clear suppression from mid
for (int i = mid + 1; i < entries.length && !entries[i].isNotOutlier; i++) {
unSuppressEntry(entries[i]);
}
} else { // state == State.NOT_FULFILLED
low = mid + 1;
mid = (low + high) / 2;
// Suppress from low to mid
for (int i=low; i <= mid; i++) {
suppressEntry(entries[i]);
}
}
}
// Finally check mid+1
if (state != State.ABORT) {
state = condition.isFulfilled(this);
if (state == State.NOT_FULFILLED && mid + 1 < entries.length && entries[mid + 1].isNotOutlier) {
suppressEntry(entries[mid + 1]);
}
}
return this.numSuppressed - initiallySuppressed;
}
/**
* Suppresses entries until the condition is fulfilled
* @param condition
* @return the number of tuples that have been suppressed
*/
public int suppressWhileNotFulfilledLinear(PrivacyCondition condition) {
int initiallySuppressed = this.numSuppressed;
for (int i=0; i<entries.length; i++) {
State state = condition.isFulfilled(this);
if (state == State.NOT_FULFILLED) {
suppressEntry(entries[i]);
} else {
// State.FULFILLED || State.ABORT
break;
}
}
return this.numSuppressed - initiallySuppressed;
}
/**
* Adds an entry
* @param size
*/
private void addToDistribution(int size) {
this.numClasses++;
this.numRecords += size;
this.distribution.putOrAdd(size, 1, 1);
}
/**
* Removes an entry
* @param size
*/
private void removeFromDistribution(int size) {
this.numClasses--;
this.numRecords -= size;
int previous = distribution.remove(size);
if (previous != 1) {
distribution.put(size, previous - 1);
}
}
/**
* Suppresses the given entry
* @param entry
*/
private void suppressEntry(HashGroupifyEntry entry) {
entry.isNotOutlier = false;
removeFromDistribution(entry.count);
this.numSuppressed += entry.count;
// No need to adjust "numRecords", because this is done in "removeFromDistribution"
}
/**
* Unsuppresses the given entry
* @param entry
*/
private void unSuppressEntry(HashGroupifyEntry entry) {
if (this.numSuppressed == 0 || entry.isNotOutlier) {
throw new IllegalStateException("Internal error. There are not suppressed entries.");
}
entry.isNotOutlier = true;
this.numSuppressed -= entry.count;
addToDistribution(entry.count);
// No need to adjust "numRecords", because this is done in "addToDistribution"
}
}