/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import java.util.Arrays;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.GeneralizationHierarchy;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.MetricConfiguration;
import com.carrotsearch.hppc.IntIntOpenHashMap;
/**
* This class provides an implementation of the non-uniform entropy
* metric. TODO: Add reference
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class MetricMDNUNMEntropyPrecomputed extends MetricMDNUEntropyPrecomputed {
/** SVUID. */
private static final long serialVersionUID = -7428794463838685004L;
/**
* Creates a new instance.
*/
protected MetricMDNUNMEntropyPrecomputed() {
super(true, false, false, 0.5d, AggregateFunction.SUM);
}
/**
* Creates a new instance.
*
* @param gsFactor
* @param function
*/
protected MetricMDNUNMEntropyPrecomputed(double gsFactor, AggregateFunction function){
super(true, false, false, gsFactor, function);
}
/**
* Returns the configuration of this metric.
*
* @return
*/
public MetricConfiguration getConfiguration() {
return new MetricConfiguration(false, // monotonic
super.getGeneralizationSuppressionFactor(), // gs-factor
true, // precomputed
1.0d, // precomputation threshold
this.getAggregateFunction() // aggregate function
);
}
@Override
public boolean isGSFactorSupported() {
return true;
}
@Override
public boolean isPrecomputed() {
return true;
}
@Override
public ElementData render(ARXConfiguration config) {
ElementData result = new ElementData("Non-uniform entropy");
result.addProperty("Aggregate function", super.getAggregateFunction().toString());
result.addProperty("Monotonic", this.isMonotonic(config.getMaxOutliers()));
result.addProperty("Generalization factor", this.getGeneralizationFactor());
result.addProperty("Suppression factor", this.getSuppressionFactor());
return result;
}
@Override
public String toString() {
return "Non-monotonic non-uniform entropy";
}
@Override
protected ILMultiDimensionalWithBound getInformationLossInternal(final Transformation node, final HashGroupify g) {
// Prepare
double sFactor = super.getSuppressionFactor();
// Compute non-uniform entropy
double[] result = super.getInformationLossInternalRaw(node, g);
double[] bound = new double[result.length];
System.arraycopy(result, 0, bound, 0, result.length);
// Compute loss induced by suppression
double suppressed = 0;
final IntIntOpenHashMap[] original = new IntIntOpenHashMap[node.getGeneralization().length];
for (int i = 0; i < original.length; i++) {
original[i] = new IntIntOpenHashMap();
}
// Compute counts for suppressed values in each column
// m.count only counts tuples from the research subset
HashGroupifyEntry m = g.getFirstEquivalenceClass();
while (m != null) {
if (!m.isNotOutlier && m.count > 0) {
suppressed += m.count;
for (int i = 0; i < original.length; i++) {
original[i].putOrAdd(m.key[i], m.count, m.count);
}
}
m = m.nextOrdered;
}
// Evaluate non-uniform entropy for suppressed tuples
if (suppressed != 0){
for (int i = 0; i < original.length; i++) {
IntIntOpenHashMap map = original[i];
for (int j = 0; j < map.allocated.length; j++) {
if (map.allocated[j]) {
double count = map.values[j];
result[i] += count * log2(count / suppressed) * sFactor;
}
}
}
}
// Switch sign bit and round
for (int column = 0; column < result.length; column++) {
result[column] = round(result[column] == 0.0d ? result[column] : -result[column]);
}
// Return
return new ILMultiDimensionalWithBound(createInformationLoss(result),
createInformationLoss(bound));
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node) {
return super.getInformationLossInternal(node, (HashGroupify)null).getLowerBound();
}
@Override
protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node,
HashGroupify groupify) {
return super.getInformationLossInternal(node, (HashGroupify)null).getLowerBound();
}
@Override
protected void initializeInternal(final DataManager manager,
final DataDefinition definition,
final Data input,
final GeneralizationHierarchy[] hierarchies,
final ARXConfiguration config) {
super.initializeInternal(manager, definition, input, hierarchies, config);
// Prepare
double gFactor = super.getGeneralizationFactor();
double sFactor = super.getSuppressionFactor();
// Compute a reasonable minimum & maximum
double[] min = new double[hierarchies.length];
Arrays.fill(min, 0d);
double[] max = new double[hierarchies.length];
for (int i=0; i<max.length; i++) {
max[i] = (2d * input.getDataLength() * log2(input.getDataLength())) * Math.max(gFactor, sFactor);
}
super.setMax(max);
super.setMin(min);
}
}