/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.examples;
import java.io.IOException;
import org.deidentifier.arx.ARXAnonymizer;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.ARXPopulationModel.Region;
import org.deidentifier.arx.ARXResult;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.AttributeType.Hierarchy.DefaultHierarchy;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.Data.DefaultData;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.criteria.AverageReidentificationRisk;
import org.deidentifier.arx.risk.RiskEstimateBuilder;
import org.deidentifier.arx.risk.RiskModelAttributes;
import org.deidentifier.arx.risk.RiskModelAttributes.QuasiIdentifierRisk;
import org.deidentifier.arx.risk.RiskModelHistogram;
import org.deidentifier.arx.risk.RiskModelPopulationUniqueness;
import org.deidentifier.arx.risk.RiskModelPopulationUniqueness.PopulationUniquenessModel;
import org.deidentifier.arx.risk.RiskModelSampleRisks;
import org.deidentifier.arx.risk.RiskModelSampleUniqueness;
/**
* This class implements an example of how to perform risk analyses with the API
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class Example29 extends Example {
/**
* Entry point.
*
* @param args the arguments
*/
public static void main(String[] args) throws IOException {
// Define data
DefaultData data = Data.create();
data.add("age", "gender", "zipcode");
data.add("45", "female", "81675");
data.add("34", "male", "81667");
data.add("66", "male", "81925");
data.add("70", "female", "81931");
data.add("34", "female", "81931");
data.add("70", "male", "81931");
data.add("45", "male", "81931");
// Define hierarchies
DefaultHierarchy age = Hierarchy.create();
age.add("34", "<50", "*");
age.add("45", "<50", "*");
age.add("66", ">=50", "*");
age.add("70", ">=50", "*");
DefaultHierarchy gender = Hierarchy.create();
gender.add("male", "*");
gender.add("female", "*");
// Only excerpts for readability
DefaultHierarchy zipcode = Hierarchy.create();
zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****");
zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****");
zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****");
zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****");
data.getDefinition().setAttributeType("age", age);
data.getDefinition().setAttributeType("gender", gender);
data.getDefinition().setAttributeType("zipcode", zipcode);
// Perform risk analysis
System.out.println("\n - Input data");
print(data.getHandle());
System.out.println("\n - Quasi-identifiers sorted by risk:");
analyzeAttributes(data.getHandle());
System.out.println("\n - Risk analysis:");
analyzeData(data.getHandle());
// Create an instance of the anonymizer
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new AverageReidentificationRisk(0.5d));
config.setMaxOutliers(1d);
// Anonymize
ARXResult result = anonymizer.anonymize(data, config);
// Perform risk analysis
System.out.println("\n - Output data");
print(result.getOutput());
System.out.println("\n - Risk analysis:");
analyzeData(result.getOutput());
}
/**
* Perform risk analysis
* @param handle
*/
private static void analyzeAttributes(DataHandle handle) {
ARXPopulationModel populationmodel = ARXPopulationModel.create(Region.USA);
RiskEstimateBuilder builder = handle.getRiskEstimator(populationmodel);
RiskModelAttributes riskmodel = builder.getAttributeRisks();
for (QuasiIdentifierRisk risk : riskmodel.getAttributeRisks()) {
System.out.println(" * Distinction: " + risk.getDistinction() + ", Separation: " + risk.getSeparation() + ", Identifier: " + risk.getIdentifier());
}
}
/**
* Perform risk analysis
* @param handle
*/
private static void analyzeData(DataHandle handle) {
ARXPopulationModel populationmodel = ARXPopulationModel.create(Region.USA);
RiskEstimateBuilder builder = handle.getRiskEstimator(populationmodel);
RiskModelHistogram classes = builder.getEquivalenceClassModel();
RiskModelSampleRisks sampleReidentifiationRisk = builder.getSampleBasedReidentificationRisk();
RiskModelSampleUniqueness sampleUniqueness = builder.getSampleBasedUniquenessRisk();
RiskModelPopulationUniqueness populationUniqueness = builder.getPopulationBasedUniquenessRisk();
int[] histogram = classes.getHistogram();
System.out.println(" * Equivalence classes:");
System.out.println(" - Average size: " + classes.getAvgClassSize());
System.out.println(" - Num classes : " + classes.getNumClasses());
System.out.println(" - Histogram :");
for (int i = 0; i < histogram.length; i += 2) {
System.out.println(" [Size: " + histogram[i] + ", count: " + histogram[i + 1] + "]");
}
System.out.println(" * Risk estimates:");
System.out.println(" - Sample-based measures");
System.out.println(" + Average risk : " + sampleReidentifiationRisk.getAverageRisk());
System.out.println(" + Lowest risk : " + sampleReidentifiationRisk.getLowestRisk());
System.out.println(" + Tuples affected : " + sampleReidentifiationRisk.getFractionOfTuplesAffectedByLowestRisk());
System.out.println(" + Highest risk : " + sampleReidentifiationRisk.getHighestRisk());
System.out.println(" + Tuples affected : " + sampleReidentifiationRisk.getFractionOfTuplesAffectedByHighestRisk());
System.out.println(" + Sample uniqueness: " + sampleUniqueness.getFractionOfUniqueTuples());
System.out.println(" - Population-based measures");
System.out.println(" + Population unqiueness (Zayatz): " + populationUniqueness.getFractionOfUniqueTuples(PopulationUniquenessModel.ZAYATZ));
}
}