/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.test; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.IOException; import java.util.Arrays; import java.util.HashSet; import org.deidentifier.arx.ARXAnonymizer; import org.deidentifier.arx.ARXConfiguration; import org.deidentifier.arx.ARXResult; import org.deidentifier.arx.AttributeType; import org.deidentifier.arx.AttributeType.Hierarchy; import org.deidentifier.arx.AttributeType.Hierarchy.DefaultHierarchy; import org.deidentifier.arx.Data; import org.deidentifier.arx.Data.DefaultData; import org.deidentifier.arx.DataSubset; import org.deidentifier.arx.criteria.DPresence; import org.deidentifier.arx.criteria.DistinctLDiversity; import org.deidentifier.arx.criteria.EntropyLDiversity; import org.deidentifier.arx.criteria.EqualDistanceTCloseness; import org.deidentifier.arx.criteria.HierarchicalDistanceTCloseness; import org.deidentifier.arx.criteria.KAnonymity; import org.deidentifier.arx.criteria.RecursiveCLDiversity; import org.junit.Assert; import org.junit.Before; import org.junit.Test; /** * Test for data transformations. * * @author Fabian Prasser * @author Florian Kohlmayer */ public class TestAnonymization extends AbstractTest { @Override @Before public void setUp() { super.setUp(); } /** * Performs a test * * @throws IOException */ @Test public void testAllAttributesIdentifying() throws IOException { try { provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", AttributeType.IDENTIFYING_ATTRIBUTE); data.getDefinition().setAttributeType("gender", AttributeType.IDENTIFYING_ATTRIBUTE); data.getDefinition().setAttributeType("zipcode", AttributeType.IDENTIFYING_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); anonymizer.anonymize(provider.getData(), config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); } /** * Performs a test * * @throws IOException */ @Test public void testAllAttributesInsensitive() throws IOException { try { provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", AttributeType.INSENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("gender", AttributeType.INSENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("zipcode", AttributeType.INSENSITIVE_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); anonymizer.anonymize(provider.getData(), config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); } /** * Performs a test * * @throws IOException */ @Test public void testAllAttributesSensitive() throws IOException { try { final ARXAnonymizer anonymizer = new ARXAnonymizer(); provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", AttributeType.SENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("gender", AttributeType.SENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("zipcode", AttributeType.SENSITIVE_ATTRIBUTE); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(-0.2d); anonymizer.anonymize(provider.getData(), config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); } /** * Performs a test * * @throws IOException */ @Test public void testDPresenceWithoutOutliers() throws IOException { // Example taken from the d-presence paper // Define Public Data P final DefaultData data = Data.create(); data.add("identifier", "name", "zip", "age", "nationality", "sen"); data.add("a", "Alice", "47906", "35", "USA", "0"); // 0 data.add("b", "Bob", "47903", "59", "Canada", "1"); // 1 data.add("c", "Christine", "47906", "42", "USA", "1"); // 2 data.add("d", "Dirk", "47630", "18", "Brazil", "0"); // 3 data.add("e", "Eunice", "47630", "22", "Brazil", "0"); // 4 data.add("f", "Frank", "47633", "63", "Peru", "1"); // 5 data.add("g", "Gail", "48973", "33", "Spain", "0"); // 6 data.add("h", "Harry", "48972", "47", "Bulgaria", "1"); // 7 data.add("i", "Iris", "48970", "52", "France", "1"); // 8 final HashSet<Integer> indices = new HashSet<Integer>(); indices.add(1); indices.add(2); indices.add(5); indices.add(7); indices.add(8); final DataSubset subset = DataSubset.create(data, indices); // Define hierarchies final DefaultHierarchy age = Hierarchy.create(); age.add("18", "1*", "<=40", "*"); age.add("22", "2*", "<=40", "*"); age.add("33", "3*", "<=40", "*"); age.add("35", "3*", "<=40", "*"); age.add("42", "4*", ">40", "*"); age.add("47", "4*", ">40", "*"); age.add("52", "5*", ">40", "*"); age.add("59", "5*", ">40", "*"); age.add("63", "6*", ">40", "*"); final DefaultHierarchy nationality = Hierarchy.create(); nationality.add("Canada", "N. America", "America", "*"); nationality.add("USA", "N. America", "America", "*"); nationality.add("Peru", "S. America", "America", "*"); nationality.add("Brazil", "S. America", "America", "*"); nationality.add("Bulgaria", "E. Europe", "Europe", "*"); nationality.add("France", "W. Europe", "Europe", "*"); nationality.add("Spain", "W. Europe", "Europe", "*"); final DefaultHierarchy zip = Hierarchy.create(); zip.add("47630", "4763*", "476*", "47*", "4*", "*"); zip.add("47633", "4763*", "476*", "47*", "4*", "*"); zip.add("47903", "4790*", "479*", "47*", "4*", "*"); zip.add("47906", "4790*", "479*", "47*", "4*", "*"); zip.add("48970", "4897*", "489*", "48*", "4*", "*"); zip.add("48972", "4897*", "489*", "48*", "4*", "*"); zip.add("48973", "4897*", "489*", "48*", "4*", "*"); // Set data attribute types data.getDefinition().setAttributeType("identifier", AttributeType.IDENTIFYING_ATTRIBUTE); data.getDefinition().setAttributeType("name", AttributeType.IDENTIFYING_ATTRIBUTE); data.getDefinition().setAttributeType("zip", zip); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("nationality", nationality); data.getDefinition().setAttributeType("sen", AttributeType.INSENSITIVE_ATTRIBUTE); // Create an instance of the anonymizer final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.addPrivacyModel(new DPresence(1d / 2d, 2d / 3d, subset)); config.setMaxOutliers(0d); config.setQualityModel(org.deidentifier.arx.metric.Metric.createPrecisionMetric()); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); // TODO: check if result is correct! final String[][] expected = { { "identifier", "name", "zip", "age", "nationality", "sen" }, { "*", "*", "47*", "*", "America", "0" }, { "*", "*", "47*", "*", "America", "1" }, { "*", "*", "47*", "*", "America", "1" }, { "*", "*", "47*", "*", "America", "0" }, { "*", "*", "47*", "*", "America", "0" }, { "*", "*", "47*", "*", "America", "1" }, { "*", "*", "48*", "*", "Europe", "0" }, { "*", "*", "48*", "*", "Europe", "1" }, { "*", "*", "48*", "*", "Europe", "1" } }; assertTrue(Arrays.deepEquals(result, expected)); } /** * Performs a test * * @throws IllegalArgumentException * @throws IOException */ @Test public void testHierarchyWithHeightOne() throws IllegalArgumentException, IOException { provider.createDataDefinitionWithHeightOne(); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); ARXResult result = anonymizer.anonymize(provider.getData(), config); assertFalse(result.isResultAvailable()); } /** * Performs a test * * @throws IOException */ @Test public void testKAnonymizationWithoutOutliers() throws IOException { provider.createDataDefinition(); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(provider.getData(), config)); final String[][] expected = { { "age", "gender", "zipcode" }, { "<50", "*", "816**" }, { "<50", "*", "816**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" } }; assertTrue(Arrays.deepEquals(result, expected)); } /** * Performs a test * * @throws IOException */ @Test public void testLDiversityDistinctWithoutOutliers() throws IOException { provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", AttributeType.SENSITIVE_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new DistinctLDiversity("age", 2)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); // TODO: check if result is correct! final String[][] expected = { { "age", "gender", "zipcode" }, { "34", "male", "81***" }, { "45", "female", "81***" }, { "66", "male", "81***" }, { "70", "female", "81***" }, { "34", "female", "81***" }, { "70", "male", "81***" }, { "45", "male", "81***" } }; assertTrue(Arrays.deepEquals(result, expected)); } /** * Performs a test * * @throws IOException */ @Test public void testLDiversityEntropyWithoutOutliers() throws IOException { provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", AttributeType.SENSITIVE_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new EntropyLDiversity("age", 2)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); // TODO: check if result is correct! final String[][] expected = { { "age", "gender", "zipcode" }, { "34", "male", "81***" }, { "45", "female", "81***" }, { "66", "male", "81***" }, { "70", "female", "81***" }, { "34", "female", "81***" }, { "70", "male", "81***" }, { "45", "male", "81***" } }; assertTrue(Arrays.deepEquals(result, expected)); } /** * Performs a test * * @throws IOException */ @Test public void testLDiversityWithoutOutliers() throws IOException { provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("age", AttributeType.SENSITIVE_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new RecursiveCLDiversity("age", 3.0d, 2)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); // TODO: check if result is correct! final String[][] expected = { { "age", "gender", "zipcode" }, { "34", "male", "81***" }, { "45", "female", "81***" }, { "66", "male", "81***" }, { "70", "female", "81***" }, { "34", "female", "81***" }, { "70", "male", "81***" }, { "45", "male", "81***" } }; assertTrue(Arrays.deepEquals(result, expected)); } /** * Performs a test * * @throws IOException */ @Test public void testMoreThanOneAttributeSensitive() throws IOException { try { final ARXAnonymizer anonymizer = new ARXAnonymizer(); provider.createDataDefinition(); final Data data = provider.getData(); data.getDefinition().setAttributeType("gender", AttributeType.SENSITIVE_ATTRIBUTE); data.getDefinition().setAttributeType("zipcode", AttributeType.SENSITIVE_ATTRIBUTE); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); anonymizer.anonymize(data, config); } catch (final IllegalArgumentException e) { return; } Assert.fail(); } /** * Performs a test * * @throws IOException */ @Test public void testMultipleUsesOfDataDefinition() throws IOException { provider.createDataDefinition(); final Data data = provider.getData(); final ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); data.getHandle().release(); config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(3)); config.setMaxOutliers(0d); final String[][] result3 = resultToArray(anonymizer.anonymize(data, config)); data.getHandle().release(); config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); final String[][] result2 = resultToArray(anonymizer.anonymize(data, config)); data.getHandle().release(); final String[][] expected = { { "age", "gender", "zipcode" }, { "<50", "*", "816**" }, { "<50", "*", "816**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" } }; final String[][] expected2 = { { "age", "gender", "zipcode" }, { "*", "male", "81***" }, { "*", "female", "81***" }, { "*", "male", "81***" }, { "*", "female", "81***" }, { "*", "female", "81***" }, { "*", "male", "81***" }, { "*", "male", "81***" } }; assertTrue(Arrays.deepEquals(result, expected)); assertTrue(Arrays.deepEquals(result3, expected2)); assertTrue(Arrays.deepEquals(result2, expected)); assertTrue(Arrays.deepEquals(result, result2)); } /** * Performs a test * * @throws IOException */ @Test public void testSaveData() throws IOException { final Data data = provider.data; data.getHandle().save(new File("junit_test_data.csv"), ';'); } /** * Performs a test * * @throws IOException */ @Test public void testSaveHierarchy() throws IOException { final Hierarchy hier = provider.age; hier.save(new File("junit_test_hierarchy_age.csv"), ';'); } /** * Performs a test * * @throws IOException */ @Test public void testTClosenessEqualWithoutOutliers() throws IOException { // Define data final DefaultData data = Data.create(); data.add("zipcode", "age", "disease"); data.add("47677", "29", "gastric ulcer"); data.add("47602", "22", "gastritis"); data.add("47678", "27", "stomach cancer"); data.add("47905", "43", "gastritis"); data.add("47909", "52", "flu"); data.add("47906", "47", "bronchitis"); data.add("47605", "30", "bronchitis"); data.add("47673", "36", "pneumonia"); data.add("47607", "32", "stomach cancer"); // Define hierarchies final DefaultHierarchy age = Hierarchy.create(); age.add("29", "<=40", "*"); age.add("22", "<=40", "*"); age.add("27", "<=40", "*"); age.add("43", ">40", "*"); age.add("52", ">40", "*"); age.add("47", ">40", "*"); age.add("30", "<=40", "*"); age.add("36", "<=40", "*"); age.add("32", "<=40", "*"); // Only excerpts for readability final DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("zipcode", zipcode); data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.addPrivacyModel(new EqualDistanceTCloseness("disease", 0.6d)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); // TODO: check if result is correct! final String[][] expected = { { "zipcode", "age", "disease" }, { "4767*", "<=40", "gastric ulcer" }, { "4760*", "<=40", "gastritis" }, { "4767*", "<=40", "stomach cancer" }, { "4790*", ">40", "gastritis" }, { "4790*", ">40", "flu" }, { "4790*", ">40", "bronchitis" }, { "4760*", "<=40", "bronchitis" }, { "4767*", "<=40", "pneumonia" }, { "4760*", "<=40", "stomach cancer" } }; assertTrue(Arrays.deepEquals(result, expected)); } /** * Performs a test * * @throws IOException */ @Test public void testTClosenessHierarchicalWithoutOutliers() throws IOException { // Define data final DefaultData data = Data.create(); data.add("zipcode", "age", "disease"); data.add("47677", "29", "gastric ulcer"); data.add("47602", "22", "gastritis"); data.add("47678", "27", "stomach cancer"); data.add("47905", "43", "gastritis"); data.add("47909", "52", "flu"); data.add("47906", "47", "bronchitis"); data.add("47605", "30", "bronchitis"); data.add("47673", "36", "pneumonia"); data.add("47607", "32", "stomach cancer"); // Define hierarchies final DefaultHierarchy age = Hierarchy.create(); age.add("29", "<=40", "*"); age.add("22", "<=40", "*"); age.add("27", "<=40", "*"); age.add("43", ">40", "*"); age.add("52", ">40", "*"); age.add("47", ">40", "*"); age.add("30", "<=40", "*"); age.add("36", "<=40", "*"); age.add("32", "<=40", "*"); // Only excerpts for readability final DefaultHierarchy zipcode = Hierarchy.create(); zipcode.add("47677", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47602", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47678", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47905", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47909", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47906", "4790*", "479**", "47***", "4****", "*****"); zipcode.add("47605", "4760*", "476**", "47***", "4****", "*****"); zipcode.add("47673", "4767*", "476**", "47***", "4****", "*****"); zipcode.add("47607", "4760*", "476**", "47***", "4****", "*****"); // Define sensitive value hierarchy final DefaultHierarchy disease = Hierarchy.create(); disease.add("flu", "respiratory infection", "vascular lung disease", "respiratory&digestive system disease"); disease.add("pneumonia", "respiratory infection", "vascular lung disease", "respiratory&digestive system disease"); disease.add("bronchitis", "respiratory infection", "vascular lung disease", "respiratory&digestive system disease"); disease.add("pulmonary edema", "vascular lung disease", "vascular lung disease", "respiratory&digestive system disease"); disease.add("pulmonary embolism", "vascular lung disease", "vascular lung disease", "respiratory&digestive system disease"); disease.add("gastric ulcer", "stomach disease", "digestive system disease", "respiratory&digestive system disease"); disease.add("stomach cancer", "stomach disease", "digestive system disease", "respiratory&digestive system disease"); disease.add("gastritis", "stomach disease", "digestive system disease", "respiratory&digestive system disease"); disease.add("colitis", "colon disease", "digestive system disease", "respiratory&digestive system disease"); disease.add("colon cancer", "colon disease", "digestive system disease", "respiratory&digestive system disease"); data.getDefinition().setAttributeType("age", age); data.getDefinition().setAttributeType("zipcode", zipcode); data.getDefinition().setAttributeType("disease", AttributeType.SENSITIVE_ATTRIBUTE); final ARXAnonymizer anonymizer = new ARXAnonymizer(); ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.addPrivacyModel(new HierarchicalDistanceTCloseness("disease", 0.4d, disease)); config.setMaxOutliers(0d); final String[][] result = resultToArray(anonymizer.anonymize(data, config)); // TODO: check if result is correct! final String[][] expected = { { "zipcode", "age", "disease" }, { "4767*", "<=40", "gastric ulcer" }, { "4760*", "<=40", "gastritis" }, { "4767*", "<=40", "stomach cancer" }, { "4790*", ">40", "gastritis" }, { "4790*", ">40", "flu" }, { "4790*", ">40", "bronchitis" }, { "4760*", "<=40", "bronchitis" }, { "4767*", "<=40", "pneumonia" }, { "4760*", "<=40", "stomach cancer" } }; assertTrue(Arrays.deepEquals(result, expected)); } }