/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.risk;
import java.io.Serializable;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.ARXSolverConfiguration;
import org.deidentifier.arx.common.WrappedBoolean;
import org.deidentifier.arx.common.WrappedInteger;
/**
* Class for risks based on population uniqueness. It implements Dankar et al.'s
* decision rule.
*
* @author Fabian Prasser
*/
public class RiskModelPopulationUniqueness extends RiskModelPopulation {
/**
* The statistical model used for computing Dankar's estimate.
*
* @author Fabian Prasser
*/
public static enum PopulationUniquenessModel implements Serializable {
PITMAN,
ZAYATZ,
SNB,
DANKAR,
}
/** Estimate */
private double numUniquesZayatz = -1d;
/** Estimate */
private double numUniquesSNB = -1d;
/** Estimate */
private double numUniquesPitman = -1d;
/** Estimate */
private double numUniquesDankar = -1d;
/** Model */
private PopulationUniquenessModel dankarModel = null;
/** Parameter */
private int numClassesOfSize1;
/** Parameter */
private double samplingFraction;
/** Parameter */
private ARXPopulationModel model;
/** Parameter */
private RiskModelHistogram histogram;
/** Parameter */
private ARXSolverConfiguration config;
/** Parameter */
private WrappedBoolean stop;
/**
* Creates a new instance
*
* @param model
* @param classes
* @param config
*/
public RiskModelPopulationUniqueness(ARXPopulationModel model,
RiskModelHistogram classes,
ARXSolverConfiguration config) {
this(model,
classes,
new WrappedBoolean(),
new WrappedInteger(),
config,
false);
}
/**
* Creates a new instance
*
* @param model
* @param histogram
* @param stop
* @param progress
* @param config
* @param precompute
*/
RiskModelPopulationUniqueness(ARXPopulationModel model,
RiskModelHistogram histogram,
WrappedBoolean stop,
WrappedInteger progress,
ARXSolverConfiguration config,
boolean precompute) {
super(histogram, model, stop, progress);
// Init
this.numClassesOfSize1 = (int) super.getNumClassesOfSize(1);
this.samplingFraction = super.getSamplingFraction();
this.model = model;
this.histogram = histogram;
this.config = config;
this.stop = stop;
// Handle cases where there are no sample uniques
if (numClassesOfSize1 == 0) {
numUniquesZayatz = 0d;
numUniquesSNB = 0d;
numUniquesPitman = 0d;
numUniquesDankar = 0d;
dankarModel = PopulationUniquenessModel.DANKAR;
progress.value = 100;
return;
}
// If precomputation (for interruptible builders)
if (precompute) {
// Estimate with Zayatz's model
getNumUniqueTuplesZayatz();
progress.value = 50;
// Estimate with Pitman's model
getNumUniqueTuplesPitman();
progress.value = 75;
// Estimate with SNB model
getNumUniqueTuplesSNB();
// Decision rule by Dankar et al.
getNumUniqueTuplesDankar();
progress.value = 100;
}
}
/**
* Estimated number of unique tuples in the population according to the
* given model
*/
public double getFractionOfUniqueTuples(PopulationUniquenessModel model) {
return getNumUniqueTuples(model) / super.getPopulationSize();
}
/**
* Estimated number of unique tuples in the population according to Dankar's
* decision rule
*/
public double getFractionOfUniqueTuplesDankar() {
return getFractionOfUniqueTuplesDankar(true);
}
/**
* Estimated number of unique tuples in the population according to Dankar's
* decision rule
* @param useZayatzAsFallback
*/
public double getFractionOfUniqueTuplesDankar(boolean useZayatzAsFallback) {
return getNumUniqueTuplesDankar(useZayatzAsFallback) / super.getPopulationSize();
}
/**
* Estimated number of unique tuples in the population according to Pitman's
* statistical model
*/
public double getFractionOfUniqueTuplesPitman() {
return getNumUniqueTuplesPitman() / super.getPopulationSize();
}
/**
* Estimated number of unique tuples in the population according to the SNB
* statistical model
*/
public double getFractionOfUniqueTuplesSNB() {
return getNumUniqueTuplesSNB() / super.getPopulationSize();
}
/**
* Estimated number of unique tuples in the population according to Zayatz's
* statistical model
*/
public double getFractionOfUniqueTuplesZayatz() {
return getNumUniqueTuplesZayatz() / super.getPopulationSize();
}
/**
* Estimated number of unique tuples in the population according to the
* given model
*/
public double getNumUniqueTuples(PopulationUniquenessModel model) {
switch (model) {
case ZAYATZ:
return getNumUniqueTuplesZayatz();
case PITMAN:
return getNumUniqueTuplesPitman();
case SNB:
return getNumUniqueTuplesSNB();
case DANKAR:
return getNumUniqueTuplesDankar();
}
throw new IllegalArgumentException("Unknown model");
}
/**
* Estimated number of unique tuples in the population according to Dankar's
* decision rule.
*/
public double getNumUniqueTuplesDankar() {
return getNumUniqueTuplesDankar(true);
}
/**
* Estimated number of unique tuples in the population according to Dankar's
* decision rule
*
* @param useZayatzAsFallback
*/
public double getNumUniqueTuplesDankar(boolean useZayatzAsFallback) {
if (numUniquesDankar == -1) {
if (this.numClassesOfSize1 == 0) {
numUniquesDankar = 0;
dankarModel = PopulationUniquenessModel.DANKAR;
} else {
// Decision rule by Dankar et al.
if (samplingFraction <= 0.1) {
getNumUniqueTuplesPitman();
if (isValid(numUniquesPitman)) {
numUniquesDankar = numUniquesPitman;
dankarModel = PopulationUniquenessModel.PITMAN;
} else if (useZayatzAsFallback) {
getNumUniqueTuplesZayatz();
numUniquesDankar = numUniquesZayatz;
dankarModel = PopulationUniquenessModel.ZAYATZ;
}
} else {
getNumUniqueTuplesSNB();
getNumUniqueTuplesZayatz();
if (isValid(numUniquesSNB)) {
if (numUniquesZayatz < numUniquesSNB) {
numUniquesDankar = numUniquesZayatz;
dankarModel = PopulationUniquenessModel.ZAYATZ;
} else {
numUniquesDankar = numUniquesSNB;
dankarModel = PopulationUniquenessModel.SNB;
}
} else {
numUniquesDankar = numUniquesZayatz;
dankarModel = PopulationUniquenessModel.ZAYATZ;
}
}
}
}
return isValid(numUniquesDankar) ? numUniquesDankar : 0d;
}
/**
* Estimated number of unique tuples in the population according to Pitman's
* statistical model
*/
public double getNumUniqueTuplesPitman() {
if (numUniquesPitman == -1) {
if (this.numClassesOfSize1 == 0) {
numUniquesPitman = 0;
} else {
numUniquesPitman = new ModelPitman(model,
histogram,
config,
stop).getNumUniques();
}
}
return isValid(numUniquesPitman) ? numUniquesPitman : 0d;
}
/**
* Estimated number of unique tuples in the population according to the SNB
* model
*/
public double getNumUniqueTuplesSNB() {
if (numUniquesSNB == -1) {
if (this.numClassesOfSize1 == 0) {
numUniquesSNB = 0;
} else {
numUniquesSNB = new ModelSNB(model,
histogram,
config,
stop).getNumUniques();
}
}
return isValid(numUniquesSNB) ? numUniquesSNB : 0d;
}
/**
* Estimated number of unique tuples in the population according to Zayatz's
* statistical model
*/
public double getNumUniqueTuplesZayatz() {
if (numUniquesZayatz == -1) {
if (this.numClassesOfSize1 == 0) {
numUniquesZayatz = 0;
} else {
numUniquesZayatz = new ModelZayatz(model,
histogram,
stop).getNumUniques();
}
}
return isValid(numUniquesZayatz) ? numUniquesZayatz : 0d;
}
/**
* Returns the statistical model, used by Dankar et al.'s decision rule for
* estimating population uniqueness
*/
public PopulationUniquenessModel getPopulationUniquenessModel() {
getNumUniqueTuplesDankar();
return dankarModel;
}
/**
* Returns whether the according estimate is available
*
* @return
*/
public boolean isAvailableEstimate(PopulationUniquenessModel model) {
return getNumUniqueTuples(model) != 0d || numClassesOfSize1 == 0;
}
/**
* Returns whether the according estimate is available
*
* @return
*/
public boolean isAvailableEstimateDankar() {
return getNumUniqueTuplesDankar() != 0d || numClassesOfSize1 == 0;
}
/**
* Returns whether the according estimate is available
*
* @return
*/
public boolean isAvailableEstimatePitman() {
return getNumUniqueTuplesPitman() != 0d || numClassesOfSize1 == 0;
}
/**
* Returns whether the according estimate is available
*
* @return
*/
public boolean isAvailableEstimateSNB() {
return getNumUniqueTuplesSNB() != 0d || numClassesOfSize1 == 0;
}
/**
* Returns whether the according estimate is available
*
* @return
*/
public boolean isAvailableEstimateZayatz() {
return getNumUniqueTuplesZayatz() != 0d || numClassesOfSize1 == 0;
}
/**
* Is an estimate valid?
*
* @param value
* @return
*/
private boolean isValid(double value) {
return !Double.isNaN(value) && value != 0d;
}
}