/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.MetricConfiguration;
/**
* This class provides an implementation of the (normalized) average equivalence class size metric.
* We dont normailze the metric as proposed in the original publication [1], as this would only be possible for k-anonymity.
* [1] LeFevre K, DeWitt DJ, Ramakrishnan R. Mondrian Multidimensional K-Anonymity. IEEE; 2006:25-25.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class MetricSDAECS extends AbstractMetricSingleDimensional {
/** SVUID. */
private static final long serialVersionUID = 8076459507565472479L;
/**
* Creates a new instance.
*/
protected MetricSDAECS() {
super(true, false, false);
}
/**
* Creates a new instance.
*
* @param gsFactor
*/
protected MetricSDAECS(double gsFactor) {
super(true, false, false, gsFactor);
}
/**
* Creates a new instance. Preinitialized
*
* @param rowCount
*/
protected MetricSDAECS(int rowCount) {
super(true, false, false);
super.setNumTuples((double)rowCount);
}
@Override
public ILSingleDimensional createMaxInformationLoss() {
Double rows = getNumTuples();
if (rows == null) {
throw new IllegalStateException("Metric must be initialized first");
} else {
return new ILSingleDimensional(rows);
}
}
@Override
public ILSingleDimensional createMinInformationLoss() {
return new ILSingleDimensional(1d);
}
/**
* Returns the configuration of this metric.
*
* @return
*/
public MetricConfiguration getConfiguration() {
return new MetricConfiguration(false, // monotonic
super.getGeneralizationSuppressionFactor(), // gs-factor
false, // precomputed
0.0d, // precomputation threshold
AggregateFunction.SUM // aggregate function
);
}
@Override
public boolean isGSFactorSupported() {
return true;
}
@Override
public ElementData render(ARXConfiguration config) {
ElementData result = new ElementData("Average equivalence class size");
result.addProperty("Monotonic", this.isMonotonic(config.getMaxOutliers()));
result.addProperty("Generalization factor", this.getGeneralizationFactor());
result.addProperty("Suppression factor", this.getSuppressionFactor());
return result;
}
@Override
public String toString() {
return "Average equivalence class size";
}
@Override
protected ILSingleDimensionalWithBound getInformationLossInternal(final Transformation node, final HashGroupify g) {
// The total number of groups with and without suppression
double groupsWithSuppression = 0;
double groupsWithoutSuppression = 0;
double gFactor = super.getSuppressionFactor(); // Note: factors are switched on purpose
double sFactor = super.getGeneralizationFactor(); // Note: factors are switched on purpose
HashGroupifyEntry m = g.getFirstEquivalenceClass();
while (m != null) {
if (m.count > 0) {
groupsWithSuppression += m.isNotOutlier ? 1 : 0;
groupsWithoutSuppression++;
}
m = m.nextOrdered;
}
// If there are suppressed tuples, they form one additional group
boolean someRecordsSuppressed = (groupsWithSuppression != groupsWithoutSuppression);
groupsWithSuppression *= gFactor;
groupsWithSuppression = !someRecordsSuppressed ? groupsWithSuppression : groupsWithSuppression + 1 * sFactor;
// Compute AECS
return new ILSingleDimensionalWithBound(getNumTuples() / groupsWithSuppression,
getNumTuples() / (groupsWithoutSuppression * gFactor));
}
@Override
protected ILSingleDimensionalWithBound getInformationLossInternal(Transformation node, HashGroupifyEntry entry) {
return new ILSingleDimensionalWithBound(entry.count);
}
@Override
protected ILSingleDimensional getLowerBoundInternal(Transformation node) {
return null;
}
@Override
protected ILSingleDimensional getLowerBoundInternal(Transformation node,
HashGroupify groupify) {
// Ignore suppression for the lower bound
int groups = 0;
HashGroupifyEntry m = groupify.getFirstEquivalenceClass();
while (m != null) {
groups += (m.count > 0) ? 1 : 0;
m = m.nextOrdered;
}
// Compute AECS
double gFactor = super.getSuppressionFactor(); // Note: factors are switched on purpose
return new ILSingleDimensional(getNumTuples() / ((double)groups * gFactor));
}
}