/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.File; import java.io.FilenameFilter; import java.io.IOException; import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.deidentifier.arx.ARXAnonymizer; import org.deidentifier.arx.ARXConfiguration; import org.deidentifier.arx.ARXResult; import org.deidentifier.arx.AttributeType.Hierarchy; import org.deidentifier.arx.AttributeType.MicroAggregationFunction; import org.deidentifier.arx.Data; import org.deidentifier.arx.DataHandle; import org.deidentifier.arx.DataType; import org.deidentifier.arx.criteria.KAnonymity; import org.deidentifier.arx.io.CSVHierarchyInput; import org.deidentifier.arx.metric.Metric; import org.deidentifier.arx.metric.Metric.AggregateFunction; import org.junit.Test; /** * Test for microaggregation. * * @author Fabian Prasser * @author Florian Kohlmayer */ public class TestMicroaggregation extends AbstractTest { /** * Returns the data object for a given dataset * * @param dataset the dataset * @return the data object * @throws IOException Signals that an I/O exception has occurred. */ private static Data getDataObject(final String dataset) throws IOException { final Data data = Data.create(dataset, StandardCharsets.UTF_8, ';'); // Read generalization hierachies final FilenameFilter hierarchyFilter = new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { if (name.matches(dataset.substring(dataset.lastIndexOf("/") + 1, dataset.length() - 4) + "_hierarchy_(.)+.csv")) { return true; } else { return false; } } }; final File testDir = new File(dataset.substring(0, dataset.lastIndexOf("/"))); final File[] genHierFiles = testDir.listFiles(hierarchyFilter); final Pattern pattern = Pattern.compile("_hierarchy_(.*?).csv"); for (final File file : genHierFiles) { final Matcher matcher = pattern.matcher(file.getName()); if (matcher.find()) { final CSVHierarchyInput hier = new CSVHierarchyInput(file, StandardCharsets.UTF_8, ';'); final String attributeName = matcher.group(1); // use all found attribute hierarchies as qis data.getDefinition().setAttributeType(attributeName, Hierarchy.create(hier.getHierarchy())); } } return data; } /** * Test microaggregation arithmetic mean with larger dataset * @throws IOException */ @Test public void testMicroaggregationAdult() throws IOException { Data data = getDataObject("./data/adult.csv"); data.getDefinition().setAttributeType("age", MicroAggregationFunction.createArithmeticMean()); data.getDefinition().setDataType("age", DataType.INTEGER); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(5)); config.setMaxOutliers(1d); config.setQualityModel(Metric.createLossMetric(AggregateFunction.RANK)); ARXResult result = anonymizer.anonymize(data, config); DataHandle exptectedOutput = Data.create("./data/adult_age_microaggregated.csv", StandardCharsets.UTF_8, ';').getHandle(); DataHandle output = result.getOutput(); for (int i = 0; i < output.getNumRows(); i++) { for (int j = 0; j < output.getNumColumns(); j++) { assertEquals(exptectedOutput.getValue(i, j), output.getValue(i, j)); } } } /** * Test microaggregation arithmetic mean * @throws IOException */ @Test public void testMicroaggregationArithmeticMean() throws IOException { DataProvider provider = new DataProvider(); provider.createDataDefinition(); provider.data.getDefinition().setAttributeType("age", MicroAggregationFunction.createArithmeticMean()); provider.data.getDefinition().setAttributeType("gender", provider.getGender()); provider.data.getDefinition().setAttributeType("zipcode", provider.getZipcode()); provider.data.getDefinition().setDataType("age", DataType.INTEGER); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); ARXResult result = anonymizer.anonymize(provider.data, config); final String[][] resultArray = resultToArray(result); final String[][] expectedArray = { { "age", "gender", "zipcode" }, { "54", "male", "81***" }, { "50", "female", "81***" }, { "54", "male", "81***" }, { "50", "female", "81***" }, { "50", "female", "81***" }, { "54", "male", "81***" }, { "54", "male", "81***" } }; assertTrue(Arrays.deepEquals(resultArray, expectedArray)); } /** * Test microaggregation geometric mean * @throws IOException */ @Test public void testMicroaggregationGeometricMean() throws IOException { DataProvider provider = new DataProvider(); provider.createDataDefinition(); provider.data.getDefinition().setAttributeType("age", MicroAggregationFunction.createGeometricMean()); provider.data.getDefinition().setAttributeType("gender", provider.getGender()); provider.data.getDefinition().setAttributeType("zipcode", provider.getZipcode()); provider.data.getDefinition().setDataType("age", DataType.INTEGER); final ARXAnonymizer anonymizer = new ARXAnonymizer(); final ARXConfiguration config = ARXConfiguration.create(); config.addPrivacyModel(new KAnonymity(2)); config.setMaxOutliers(0d); ARXResult result = anonymizer.anonymize(provider.data, config); final String[][] resultArray = resultToArray(result); final String[][] expectedArray = { { "age", "gender", "zipcode" }, { "52", "male", "81***" }, { "48", "female", "81***" }, { "52", "male", "81***" }, { "48", "female", "81***" }, { "48", "female", "81***" }, { "52", "male", "81***" }, { "52", "male", "81***" } }; assertTrue(Arrays.deepEquals(resultArray, expectedArray)); } }