/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.examples; import java.io.IOException; import org.deidentifier.arx.ARXAnonymizer; import org.deidentifier.arx.ARXConfiguration; import org.deidentifier.arx.ARXPopulationModel; import org.deidentifier.arx.ARXPopulationModel.Region; import org.deidentifier.arx.ARXResult; import org.deidentifier.arx.AttributeType.Hierarchy; import org.deidentifier.arx.AttributeType.Hierarchy.DefaultHierarchy; import org.deidentifier.arx.Data; import org.deidentifier.arx.Data.DefaultData; import org.deidentifier.arx.DataHandle; import org.deidentifier.arx.criteria.AverageReidentificationRisk; import org.deidentifier.arx.risk.RiskEstimateBuilder; import org.deidentifier.arx.risk.RiskModelAttributes; import org.deidentifier.arx.risk.RiskModelAttributes.QuasiIdentifierRisk; import org.deidentifier.arx.risk.RiskModelHistogram; import org.deidentifier.arx.risk.RiskModelPopulationUniqueness; import org.deidentifier.arx.risk.RiskModelPopulationUniqueness.PopulationUniquenessModel; import org.deidentifier.arx.risk.RiskModelSampleRisks; import org.deidentifier.arx.risk.RiskModelSampleUniqueness; /** * This class implements an example of how to perform risk analyses with the API * * @author Fabian Prasser * @author Florian Kohlmayer */ public class Example29 extends Example { /** * Entry point. * * @param args the arguments */ public static void main(String[] args) throws IOException { // Define data DefaultData data = Data.create(); data.add("age", "gender", "zipcode"); data.add("45", "female", "81675"); data.add("34", "male", "81667"); data.add("66", "male", "81925"); data.add("70", "female", "81931"); data.add("34", "female", "81931"); data.add("70", "male", "81931"); data.add("45", "male", "81931"); // Define hierarchies DefaultHierarchy age = Hierarchy.create(); age.add("34", "<50", "*"); age.add("45", "<50", "*"); age.add("66", ">=50", "*"); age.add("70", ">=50", "*"); DefaultHierarchy gender = Hierarchy.create(); gender.add("male", "*"); gender.add("female", "*"); // Only excerpts for readability DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****"); zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****"); zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****"); zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("gender", gender); data.getDefinition().setAttributeType("zipcode", zipcode); // Perform risk analysis System.out.println("\n - Input data"); print(data.getHandle()); System.out.println("\n - Quasi-identifiers sorted by risk:"); analyzeAttributes(data.getHandle()); System.out.println("\n - Risk analysis:"); analyzeData(data.getHandle()); // Create an instance of the anonymizer ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new AverageReidentificationRisk(0.5d)); config.setMaxOutliers(1d); // Anonymize ARXResult result = anonymizer.anonymize(data, config); // Perform risk analysis System.out.println("\n - Output data"); print(result.getOutput()); System.out.println("\n - Risk analysis:"); analyzeData(result.getOutput()); } /** * Perform risk analysis * @param handle */ private static void analyzeAttributes(DataHandle handle) { ARXPopulationModel populationmodel = ARXPopulationModel.create(Region.USA); RiskEstimateBuilder builder = handle.getRiskEstimator(populationmodel); RiskModelAttributes riskmodel = builder.getAttributeRisks(); for (QuasiIdentifierRisk risk : riskmodel.getAttributeRisks()) { System.out.println(" * Distinction: " + risk.getDistinction() + ", Separation: " + risk.getSeparation() + ", Identifier: " + risk.getIdentifier()); } } /** * Perform risk analysis * @param handle */ private static void analyzeData(DataHandle handle) { ARXPopulationModel populationmodel = ARXPopulationModel.create(Region.USA); RiskEstimateBuilder builder = handle.getRiskEstimator(populationmodel); RiskModelHistogram classes = builder.getEquivalenceClassModel(); RiskModelSampleRisks sampleReidentifiationRisk = builder.getSampleBasedReidentificationRisk(); RiskModelSampleUniqueness sampleUniqueness = builder.getSampleBasedUniquenessRisk(); RiskModelPopulationUniqueness populationUniqueness = builder.getPopulationBasedUniquenessRisk(); int[] histogram = classes.getHistogram(); System.out.println(" * Equivalence classes:"); System.out.println(" - Average size: " + classes.getAvgClassSize()); System.out.println(" - Num classes : " + classes.getNumClasses()); System.out.println(" - Histogram :"); for (int i = 0; i < histogram.length; i += 2) { System.out.println(" [Size: " + histogram[i] + ", count: " + histogram[i + 1] + "]"); } System.out.println(" * Risk estimates:"); System.out.println(" - Sample-based measures"); System.out.println(" + Average risk : " + sampleReidentifiationRisk.getAverageRisk()); System.out.println(" + Lowest risk : " + sampleReidentifiationRisk.getLowestRisk()); System.out.println(" + Tuples affected : " + sampleReidentifiationRisk.getFractionOfTuplesAffectedByLowestRisk()); System.out.println(" + Highest risk : " + sampleReidentifiationRisk.getHighestRisk()); System.out.println(" + Tuples affected : " + sampleReidentifiationRisk.getFractionOfTuplesAffectedByHighestRisk()); System.out.println(" + Sample uniqueness: " + sampleUniqueness.getFractionOfUniqueTuples()); System.out.println(" - Population-based measures"); System.out.println(" + Population unqiueness (Zayatz): " + populationUniqueness.getFractionOfUniqueTuples(PopulationUniquenessModel.ZAYATZ)); } }