/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.risk;
import java.util.Set;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.ARXSolverConfiguration;
import org.deidentifier.arx.DataHandleInternal;
import org.deidentifier.arx.common.WrappedBoolean;
import org.deidentifier.arx.common.WrappedInteger;
/**
* A builder for risk estimates
*
* @author Fabian Prasser
* @author Maximilian Zitzmann
*/
public class RiskEstimateBuilder {
/** Fields */
private final ARXPopulationModel population;
/** Fields */
private final DataHandleInternal handle;
/** Fields */
private final Set<String> identifiers;
/** Equivalence classes */
private RiskModelHistogram classes;
/** Asynchronous computation */
private final WrappedBoolean stop;
/** Model */
private final ARXSolverConfiguration solverconfig;
/** Model */
private final WrappedInteger progress = new WrappedInteger();
/** Model */
private final ARXConfiguration arxconfig;
/**
* Creates a new instance
*
* @param population
* @param handle
* @param classes
*/
public RiskEstimateBuilder(ARXPopulationModel population,
DataHandleInternal handle,
RiskModelHistogram classes,
ARXConfiguration arxconfig) {
this(population, handle, null, classes, ARXSolverConfiguration.create(), arxconfig);
}
/**
* Creates a new instance
*
* @param population
* @param handle
* @param classes
* @param solverconfig
*/
public RiskEstimateBuilder(ARXPopulationModel population,
DataHandleInternal handle,
RiskModelHistogram classes,
ARXSolverConfiguration solverconfig,
ARXConfiguration arxconfig) {
this(population, handle, null, classes, solverconfig, arxconfig);
}
/**
* Creates a new instance
*
* @param population
* @param handle
* @param identifiers
*/
public RiskEstimateBuilder(ARXPopulationModel population,
DataHandleInternal handle,
Set<String> identifiers,
ARXConfiguration arxconfig) {
this(population,
handle,
identifiers,
(RiskModelHistogram) null,
ARXSolverConfiguration.create(), arxconfig);
}
/**
* Creates a new instance
*
* @param population
* @param handle
* @param identifiers
* @param solverconfig
*/
public RiskEstimateBuilder(ARXPopulationModel population,
DataHandleInternal handle,
Set<String> identifiers,
ARXSolverConfiguration solverconfig,
ARXConfiguration arxconfig) {
this(population, handle, identifiers, (RiskModelHistogram) null, solverconfig, arxconfig);
}
/**
* Creates a new instance
*
* @param population
* @param handle
* @param identifiers
* @param classes
* @param solverconfig
*/
private RiskEstimateBuilder(ARXPopulationModel population,
DataHandleInternal handle,
Set<String> identifiers,
RiskModelHistogram classes,
ARXSolverConfiguration solverconfig,
ARXConfiguration arxconfig) {
this.population = population;
this.handle = handle;
this.identifiers = identifiers;
this.classes = classes;
this.solverconfig = solverconfig;
this.arxconfig = arxconfig;
synchronized (this) {
stop = new WrappedBoolean();
}
}
/**
* Creates a new instance
*
* @param population
* @param handle
* @param identifiers
* @param solverconfig
*/
RiskEstimateBuilder(ARXPopulationModel population,
DataHandleInternal handle,
Set<String> identifiers,
WrappedBoolean stop,
ARXSolverConfiguration solverconfig,
ARXConfiguration arxconfig) {
this.population = population;
this.handle = handle;
this.identifiers = identifiers;
this.classes = null;
this.solverconfig = solverconfig;
this.arxconfig = arxconfig;
synchronized (this) {
this.stop = stop;
}
}
/**
* Returns a class providing access to an analysis of potential quasi-identifiers using
* the concepts of alpha distinction and alpha separation.
*
* @return the RiskModelAttributes data from risk analysis
*/
public RiskModelAttributes getAttributeRisks() {
progress.value = 0;
return new RiskModelAttributes(this.population, this.handle, this.identifiers, this.stop, progress, this.solverconfig, this.arxconfig);
}
/**
* Returns a model of the equivalence classes in this data set
*
* @return
*/
public RiskModelHistogram getEquivalenceClassModel() {
return getHistogram(1.0d);
}
/**
* Returns the a set of potential HIPAA identifiers. Values are matched with a
* confidence threshold of 50%
*
* @return
*/
public HIPAAIdentifierMatch[] getHIPAAIdentifiers() {
return new RiskModelHIPAASafeHarbor().getMatches(handle, 0.5d, stop);
}
/**
* Returns the a set of potential HIPAA identifiers. Values are matched with the
* given confidence threshold.
*
* @param threshold Confidence threshold
* @return
*/
public HIPAAIdentifierMatch[] getHIPAAIdentifiers(double threshold) {
return new RiskModelHIPAASafeHarbor().getMatches(handle, threshold, stop);
}
/**
* Returns an interruptible instance of this object.
*
* @return
*/
public RiskEstimateBuilderInterruptible getInterruptibleInstance() {
progress.value = 0;
return new RiskEstimateBuilderInterruptible(this);
}
/**
* Returns a class providing population-based uniqueness estimates
*
* @return
*/
public RiskModelPopulationUniqueness getPopulationBasedUniquenessRisk() {
progress.value = 0;
return new RiskModelPopulationUniqueness(population,
getHistogram(0.25),
stop,
progress,
solverconfig,
false);
}
/**
* Returns a class providing sample-based re-identification risk estimates
*
* @return
*/
public RiskModelSampleRisks getSampleBasedReidentificationRisk() {
progress.value = 0;
return new RiskModelSampleRisks(getEquivalenceClassModel(), arxconfig, handle.isAnonymous());
}
/**
* Returns a class representing the distribution of prosecutor risks in the sample
*
* @return
*/
public RiskModelSampleRiskDistribution getSampleBasedRiskDistribution() {
progress.value = 0;
return new RiskModelSampleRiskDistribution(getEquivalenceClassModel(), arxconfig, handle.isAnonymous());
}
/**
* Returns a risk summary
*
* @param threshold Acceptable highest probability of re-identification for a single record
* @return
*/
public RiskModelSampleSummary getSampleBasedRiskSummary(double threshold) {
progress.value = 0;
return new RiskModelSampleSummary(handle, identifiers, threshold, stop, progress);
}
/**
* Returns a class providing sample-based uniqueness estimates
*
* @return
*/
public RiskModelSampleUniqueness getSampleBasedUniquenessRisk() {
progress.value = 0;
return new RiskModelSampleUniqueness(getEquivalenceClassModel());
}
/**
* Returns a model of the equivalence classes in this data set
*
* @return
*/
private RiskModelHistogram getHistogram(double factor) {
synchronized (this) {
if (classes == null) {
progress.value = 0;
classes = new RiskModelHistogram(handle, identifiers, stop, progress, factor);
}
return classes;
}
}
/**
* Returns a class providing population-based uniqueness estimates
*
* @return
*/
RiskModelPopulationUniqueness getPopulationBasedUniquenessRiskInterruptible() {
progress.value = 0;
return new RiskModelPopulationUniqueness(population,
getHistogram(0.25),
stop,
progress,
solverconfig,
true);
}
/**
* Returns progress data, if available
*
* @return
*/
int getProgress() {
return this.progress.value;
}
/**
* Interrupts this instance
*/
void interrupt() {
synchronized (this) {
this.stop.value = true;
}
}
}