/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.criteria;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.ARXSolverConfiguration;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyDistribution;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.risk.RiskModelPopulationUniqueness;
import org.deidentifier.arx.risk.RiskModelPopulationUniqueness.PopulationUniquenessModel;
/**
* This criterion ensures that the population uniqueness falls below a given threshold.
*
* @author Fabian Prasser
*/
public class PopulationUniqueness extends RiskBasedCriterion {
/** SVUID */
private static final long serialVersionUID = 618039085843721351L;
/** The statistical model */
private PopulationUniquenessModel statisticalModel;
/** The population model */
private ARXPopulationModel populationModel;
/** The solver config */
private ARXSolverConfiguration solverConfig;
/**
* Creates a new instance of this criterion. Uses Dankar's method for estimating population uniqueness.
* This constructor will clone the population model, making further changes to it will not influence
* the results. The default accuracy is 10e-6 and the default maximal number of iterations is 1000.
*
* @param riskThreshold
* @param populationModel
*/
public PopulationUniqueness(double riskThreshold, ARXPopulationModel populationModel){
this(riskThreshold, PopulationUniquenessModel.DANKAR, populationModel);
}
/**
* Creates a new instance of this criterion. Uses Dankar's method for estimating population uniqueness.
* This constructor will clone the population model, making further changes to it will not influence
* the results.
*
* @param riskThreshold
* @param populationModel
* @param config
*/
public PopulationUniqueness(double riskThreshold,
ARXPopulationModel populationModel,
ARXSolverConfiguration config) {
this(riskThreshold, PopulationUniquenessModel.DANKAR, populationModel, config);
}
/**
* Creates a new instance of this criterion. Uses the specified method for estimating population uniqueness.
* This constructor will clone the population model, making further changes to it will not influence
* the results.
*
* @param riskThreshold
* @param statisticalModel
* @param populationModel
*/
public PopulationUniqueness(double riskThreshold,
PopulationUniquenessModel statisticalModel,
ARXPopulationModel populationModel) {
this(riskThreshold, statisticalModel, populationModel, ARXSolverConfiguration.create());
}
/**
* Creates a new instance of this criterion. Uses the specified method for estimating population uniqueness.
* This constructor will clone the population model, making further changes to it will not influence
* the results. The default accuracy is 10e-6 and the default maximal number of iterations is 1000.
*
* @param riskThreshold
* @param statisticalModel
* @param populationModel
* @param config
*/
public PopulationUniqueness(double riskThreshold,
PopulationUniquenessModel statisticalModel,
ARXPopulationModel populationModel,
ARXSolverConfiguration config) {
super(false, statisticalModel == PopulationUniquenessModel.ZAYATZ, riskThreshold);
this.statisticalModel = statisticalModel;
this.populationModel = populationModel.clone();
this.solverConfig = config;
}
@Override
public PopulationUniqueness clone() {
return new PopulationUniqueness(this.getRiskThreshold(),
this.getStatisticalModel(),
this.getPopulationModel(),
this.solverConfig);
}
@Override
public ARXPopulationModel getPopulationModel() {
return populationModel;
}
/**
* Return marketer risk threshold, 1 if there is none
* @return
*/
public double getRiskThresholdMarketer() {
// TODO: Risk is estimated different from the other models, here
return getRiskThreshold();
}
/**
* @return the statisticalModel
*/
public PopulationUniquenessModel getStatisticalModel() {
return statisticalModel;
}
@Override
@SuppressWarnings("deprecation")
public void initialize(DataManager manager, ARXConfiguration config) {
super.initialize(manager, config);
// TODO: Needed for backwards compatibility of ARX 3.4.0 with previous versions
if (this.populationModel != null) {
this.populationModel.makeBackwardsCompatible(manager.getDataGeneralized().getDataLength());
}
}
@Override
public boolean isLocalRecodingSupported() {
return false;
}
@Override
public ElementData render() {
ElementData result = new ElementData("Population uniqueness");
result.addProperty("Threshold", super.getRiskThreshold());
result.addProperty("Population", this.populationModel.getPopulationSize());
result.addProperty("Estimator", this.statisticalModel.toString());
return result;
}
@Override
public String toString() {
return "(" + getRiskThreshold() + ")-population-uniqueness (" + statisticalModel.toString().toLowerCase() + ")";
}
/**
* We currently assume that at any time, at least one statistical model converges.
* This might not be the case, and 0 may be returned instead. That's why we only
* accept estimates of 0, if the number of equivalence classes of size 1 in the sample is also zero
*
* @param distribution
* @return
*/
protected boolean isFulfilled(HashGroupifyDistribution distribution) {
RiskModelPopulationUniqueness riskModel = new RiskModelPopulationUniqueness(this.populationModel,
distribution.getHistogram(),
solverConfig);
double populationUniques = 0d;
if (this.statisticalModel == PopulationUniquenessModel.DANKAR) {
populationUniques = riskModel.getFractionOfUniqueTuplesDankar(false);
} else {
populationUniques = riskModel.getFractionOfUniqueTuples(this.statisticalModel);
}
if (populationUniques > 0d && populationUniques <= getRiskThreshold()) {
return true;
} else if (populationUniques == 0d && distribution.getFractionOfRecordsInClassesOfSize(1) == 0d) {
return true;
} else {
return false;
}
}
}