MetricMDNMLoss.java example

Explorer
ARX-master
- src
/*
 * ARX: Powerful Data Anonymization
 * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
 * 
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.deidentifier.arx.metric.v2;

import java.util.Arrays;

import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.DataDefinition;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.GeneralizationHierarchy;
import org.deidentifier.arx.framework.lattice.Transformation;
import org.deidentifier.arx.metric.MetricConfiguration;

/**
 * This class implements a variant of the Loss metric.
 *
 * @author Fabian Prasser
 */
public class MetricMDNMLoss extends AbstractMetricMultiDimensional {

    /** SUID. */
    private static final long serialVersionUID = -573670902335136600L;

    /** Total number of tuples, depends on existence of research subset. */
    private double            tuples;

    /** Domain shares for each dimension. */
    private DomainShare[]     shares;

    /** TODO: We must override this for backward compatibility. Remove, when re-implemented. */
    private final double      gFactor;
    
    /** TODO: We must override this for backward compatibility. Remove, when re-implemented. */
    private final double      gsFactor;
    
    /** TODO: We must override this for backward compatibility. Remove, when re-implemented. */
    private final double      sFactor;
    
    /**
     * Default constructor which treats all transformation methods equally.
     */
    public MetricMDNMLoss(){
        this(0.5d, AggregateFunction.GEOMETRIC_MEAN);
    }

    /**
     * Default constructor which treats all transformation methods equally.
     *
     * @param function
     */
    public MetricMDNMLoss(AggregateFunction function){
        this(0.5d, function);
    }
    
    /**
     * A constructor that allows to define a factor weighting generalization and suppression.
     *
     * @param gsFactor A factor [0,1] weighting generalization and suppression.
     *            The default value is 0.5, which means that generalization
     *            and suppression will be treated equally. A factor of 0
     *            will favor suppression, and a factor of 1 will favor
     *            generalization. The values in between can be used for
     *            balancing both methods.
     * @param function
     */
    public MetricMDNMLoss(double gsFactor, AggregateFunction function){
        super(true, false, false, function);
        if (gsFactor < 0d || gsFactor > 1d) {
            throw new IllegalArgumentException("Parameter must be in [0, 1]");
        }
        this.gsFactor = gsFactor;
        this.sFactor = gsFactor <  0.5d ? 2d * gsFactor : 1d;
        this.gFactor = gsFactor <= 0.5d ? 1d            : 1d - 2d * (gsFactor - 0.5d);
    }
    
    /**
     * Returns the configuration of this metric.
     *
     * @return
     */
    public MetricConfiguration getConfiguration() {
        return new MetricConfiguration(false,                        // monotonic
                                       gsFactor,                     // gs-factor
                                       false,                        // precomputed
                                       0.0d,                         // precomputation threshold
                                       this.getAggregateFunction()   // aggregate function
                                       );
    }
    
    @Override
    // TODO: We must override this for backward compatibility. Remove, when re-implemented.
    public double getGeneralizationFactor() {
        return gFactor;
    }
    
    @Override
    // TODO: We must override this for backward compatibility. Remove, when re-implemented.
    public double getGeneralizationSuppressionFactor() {
        return gsFactor;
    }

    @Override
    public String getName() {
        return "Loss";
    }
    
    @Override
    // TODO: We must override this for backward compatibility. Remove, when re-implemented.
    public double getSuppressionFactor() {
        return sFactor;
    }

    @Override
    public boolean isAbleToHandleMicroaggregation() {
        return true;
    }

    @Override
    public boolean isGSFactorSupported() {
        return true;
    }

    @Override
    public ElementData render(ARXConfiguration config) {
        ElementData result = new ElementData("Loss");
        result.addProperty("Aggregate function", super.getAggregateFunction().toString());
        result.addProperty("Monotonic", this.isMonotonic(config.getMaxOutliers()));
        result.addProperty("Generalization factor", this.getGeneralizationFactor());
        result.addProperty("Suppression factor", this.getSuppressionFactor());
        return result;
    }
    
    @Override
    public String toString() {
        return "Loss ("+gsFactor+"/"+gFactor+"/"+sFactor+")";
    }

    @Override
    protected ILMultiDimensionalWithBound getInformationLossInternal(Transformation node, HashGroupify g) {
        
        // Prepare
        int dimensions = getDimensions();
        int dimensionsGeneralized = getDimensionsGeneralized();
        int dimensionsAggregated = getDimensionsAggregated();
        int microaggregationStart = getMicroaggregationStartIndex();
        DistributionAggregateFunction[] microaggregationFunctions = getMicroaggregationFunctions();
        
        int[] transformation = node.getGeneralization();
        double[] result = new double[dimensions];
        double[] bound = new double[dimensions];

        // Compute information loss and lower bound
        HashGroupifyEntry m = g.getFirstEquivalenceClass();
        while (m != null) {
            if (m.count>0) {
                for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
                    int value = m.key[dimension];
                    int level = transformation[dimension];
                    double share = (double)m.count * shares[dimension].getShare(value, level);
                    result[dimension] += m.isNotOutlier ? share * gFactor :
                                         (sFactor == 1d ? m.count : share + sFactor * ((double)m.count - share));
                    bound[dimension] += share * gFactor;
                }
                for (int dimension=0; dimension<dimensionsAggregated; dimension++){
                    
                    double share = (double)m.count * super.getError(microaggregationFunctions[dimension],
                                                                    m.distributions[microaggregationStart + dimension]);
                    result[dimensionsGeneralized + dimension] += m.isNotOutlier ? share * gFactor :
                                         (sFactor == 1d ? m.count : share + sFactor * ((double)m.count - share));
                    // Note: we ignore a bound for microaggregation, as we cannot compute it
                    // this means that the according entries in the resulting array are not changed and remain 0d
                    // This is not a problem, as it is OK to underestimate information loss when computing lower bounds
                }
            }
            m = m.nextOrdered;
        }
        
        // Normalize
        for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
            result[dimension] = normalizeGeneralized(result[dimension], dimension);
            bound[dimension] = normalizeGeneralized(bound[dimension], dimension);
        }
        
        // Normalize
        for (int dimension=dimensionsGeneralized; dimension<dimensionsGeneralized + dimensionsAggregated; dimension++){
            result[dimension] = normalizeAggregated(result[dimension]);
        }
        
        // Return information loss and lower bound
        return new ILMultiDimensionalWithBound(super.createInformationLoss(result),
                                               super.createInformationLoss(bound));
        
    }

    @Override
    protected ILMultiDimensionalWithBound getInformationLossInternal(Transformation node, HashGroupifyEntry entry) {

        // Init
        int dimensions = getDimensions();
        int dimensionsGeneralized = getDimensionsGeneralized();
        int dimensionsAggregated = getDimensionsAggregated();
        int microaggregationStart = getMicroaggregationStartIndex();
        DistributionAggregateFunction[] microaggregationFunctions = getMicroaggregationFunctions();
        
        double[] result = new double[dimensions];
        int[] transformation = node.getGeneralization();

        // Compute
        for (int dimension = 0; dimension < dimensionsGeneralized; dimension++) {
            int value = entry.key[dimension];
            int level = transformation[dimension];
            result[dimension] = (double) entry.count * shares[dimension].getShare(value, level);
        }

        // Compute
        for (int dimension=0; dimension<dimensionsAggregated; dimension++){
            result[dimensionsGeneralized + dimension] = (double)entry.count * super.getError(microaggregationFunctions[dimension],
                                                                                             entry.distributions[microaggregationStart + dimension]); 
        }
        
        // Return
        return new ILMultiDimensionalWithBound(super.createInformationLoss(result));
    }
    
    @Override
    protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node) {
        return null;
    }
    
    @Override
    protected AbstractILMultiDimensional getLowerBoundInternal(Transformation node, HashGroupify g) {
        
        // Prepare
        int dimensions = getDimensions();
        int dimensionsGeneralized = getDimensionsGeneralized();
        int[] transformation = node.getGeneralization();
        double[] bound = new double[dimensions];

        // Compute lower bound
        HashGroupifyEntry m = g.getFirstEquivalenceClass();
        while (m != null) {
            if (m.count>0) {
                for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
                    int value = m.key[dimension];
                    int level = transformation[dimension];
                    double share = (double)m.count * shares[dimension].getShare(value, level);
                    bound[dimension] += share * gFactor;
                }
                // Note: we ignore microaggregation, as we cannot compute a bound for it
                // this means that the according entries in the resulting array are not changed and remain 0d
                // This is not a problem, as it is OK to underestimate information loss when computing lower bounds
            }
            m = m.nextOrdered;
        }
        
        // Normalize
        for (int dimension=0; dimension<dimensionsGeneralized; dimension++){
            bound[dimension] = normalizeGeneralized(bound[dimension], dimension);
        }
        
        // Return
        return super.createInformationLoss(bound);
    }

    /**
     * For subclasses.
     *
     * @return
     */
    protected DomainShare[] getShares(){
        return this.shares;
    }

    @Override
    protected void initializeInternal(final DataManager manager,
                                      final DataDefinition definition, 
                                      final Data input, 
                                      final GeneralizationHierarchy[] hierarchies, 
                                      final ARXConfiguration config) {
        
        // Prepare weights
        super.initializeInternal(manager, definition, input, hierarchies, config);

        // Determine total number of tuples
        this.tuples = (double)super.getNumRecords(config, input);
        
        // Save domain shares
        this.shares = manager.getDomainShares();
        
        // Min and max
        double[] min = new double[getDimensions()];
        Arrays.fill(min, 0d);
        double[] max = new double[getDimensions()];
        Arrays.fill(max, 1d);
        super.setMin(min);
        super.setMax(max);
    }

    /**
     * Normalizes the aggregate.
     *
     * @param aggregate
     * @param dimension
     * @return
     */
    protected double normalizeAggregated(double aggregate) {
        double result = aggregate / tuples;
        result = result >= 0d ? result : 0d;
        return round(result);
    }

    /**
     * Normalizes the aggregate.
     *
     * @param aggregate
     * @param dimension
     * @return
     */
    protected double normalizeGeneralized(double aggregate, int dimension) {

        double min = gFactor * tuples / shares[dimension].getDomainSize();
        double max = tuples;
        double result = (aggregate - min) / (max - min);
        result = result >= 0d ? result : 0d;
        return round(result);
    }
}