/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.io.BufferedWriter; import java.io.File; import java.io.FileWriter; import java.io.FilenameFilter; import java.io.IOException; import java.io.Writer; import java.lang.management.ManagementFactory; import java.nio.charset.StandardCharsets; import java.text.SimpleDateFormat; import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Set; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.deidentifier.arx.ARXAnonymizer; import org.deidentifier.arx.ARXConfiguration; import org.deidentifier.arx.ARXLattice.ARXNode; import org.deidentifier.arx.ARXLattice.Anonymity; import org.deidentifier.arx.ARXResult; import org.deidentifier.arx.AttributeType; import org.deidentifier.arx.AttributeType.Hierarchy; import org.deidentifier.arx.Data; import org.deidentifier.arx.DataHandle; import org.deidentifier.arx.aggregates.HierarchyBuilder; import org.deidentifier.arx.criteria.BasicBLikeness; import org.deidentifier.arx.criteria.DDisclosurePrivacy; import org.deidentifier.arx.criteria.EnhancedBLikeness; import org.deidentifier.arx.criteria.LDiversity; import org.deidentifier.arx.criteria.TCloseness; import org.deidentifier.arx.exceptions.RollbackRequiredException; import org.deidentifier.arx.gui.resources.Resources; import org.deidentifier.arx.io.CSVHierarchyInput; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TestName; /** * Test for data transformations. * * @author Fabian Prasser * @author Florian Kohlmayer */ public abstract class AbstractAnonymizationTest extends AbstractTest { /** * Represents a test case. * * @author Fabian Prasser * @author Florian Kohlmayer */ public static class ARXAnonymizationTestCase { /** Random test variable */ private static int counter; /** Random test variable */ public final int id = counter++; /** Random test variable */ public ARXConfiguration config; /** Random test variable */ public String dataset; /** Random test variable */ public String sensitiveAttribute; /** Random test variable */ public String optimalInformationLoss; /** Random test variable */ public int[] optimalTransformation; /** Random test variable */ public boolean practical; /** Random test variable */ public int[] statistics; /** Random test variable */ public int hashcode = -1; /** Random test variable */ public boolean optimizable = false; /** Hierarchy builders */ public Map<String, HierarchyBuilder<?>> builders; /** * Creates a new instance. * * @param config * @param dataset * @param optimalInformationLoss * @param optimalTransformation * @param practical */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String dataset, final double optimalInformationLoss, final int[] optimalTransformation, final boolean practical) { this(config, "", dataset, optimalInformationLoss, optimalTransformation, practical, null); } /** * Creates a new instance. * * @param config * @param dataset * @param optimalInformationLoss * @param optimalTransformation * @param practical * @param statistics */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String dataset, final double optimalInformationLoss, final int[] optimalTransformation, final boolean practical, int[] statistics) { this(config, "", dataset, optimalInformationLoss, optimalTransformation, practical, statistics); } /** * Creates a new instance. * * @param config * @param sensitiveAttribute * @param dataset * @param optimalInformationLoss * @param optimalTransformation * @param practical */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String sensitiveAttribute, final String dataset, final double optimalInformationLoss, final int[] optimalTransformation, final boolean practical) { this(config, sensitiveAttribute, dataset, optimalInformationLoss, optimalTransformation, practical, null); } /** * Creates a new instance. * * @param config * @param sensitiveAttribute * @param dataset * @param optimalInformationLoss * @param optimalTransformation * @param practical * @param statistics */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String sensitiveAttribute, final String dataset, final double optimalInformationLoss, final int[] optimalTransformation, final boolean practical, int[] statistics) { this.config = config; this.sensitiveAttribute = sensitiveAttribute; this.dataset = dataset; this.optimalInformationLoss = String.valueOf(optimalInformationLoss); this.optimalTransformation = optimalTransformation; this.practical = practical; this.statistics = statistics; } /** * Creates a new instance. * * @param config * @param sensitiveAttribute * @param dataset * @param optimalInformationLoss * @param optimalTransformation * @param practical */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String sensitiveAttribute, final String dataset, final String optimalInformationLoss, final int[] optimalTransformation, final boolean practical) { this(config, sensitiveAttribute, dataset, optimalInformationLoss, optimalTransformation, practical, null); } /** * Creates a new instance. * * @param config * @param sensitiveAttribute * @param dataset * @param optimalInformationLoss * @param optimalTransformation * @param practical * @param statistics */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String sensitiveAttribute, final String dataset, final String optimalInformationLoss, final int[] optimalTransformation, final boolean practical, int[] statistics) { this.config = config; this.sensitiveAttribute = sensitiveAttribute; this.dataset = dataset; this.optimalInformationLoss = optimalInformationLoss; this.optimalTransformation = optimalTransformation; this.practical = practical; this.statistics = statistics; } /** * Constructor for local recoding tests * @param config * @param dataset * @param sensitiveAttribute * @param hashcode */ public ARXAnonymizationTestCase(final ARXConfiguration config, final String dataset, final String sensitiveAttribute, final int hashcode) { this.config = config; this.dataset = dataset; this.sensitiveAttribute = sensitiveAttribute; this.hashcode = hashcode; } /** * Creates a new test case with hierarchy builders * @param config * @param dataset * @param hierarchyBuilders * @param informationLoss * @param transformation * @param practicalMonotonicity */ public ARXAnonymizationTestCase(ARXConfiguration config, String dataset, Map<String, HierarchyBuilder<?>> hierarchyBuilders, double informationLoss, int[] transformation, boolean practicalMonotonicity) { this(config, dataset, informationLoss, transformation, practicalMonotonicity); this.builders = hierarchyBuilders; } @Override public String toString() { return config.getPrivacyModels() + "-" + config.getMaxOutliers() + "-" + config.getQualityModel() + "-" + dataset + "-PM:" + config.isPracticalMonotonicity(); } } private static final String timestamp = new SimpleDateFormat("yyyy-MM-dd'T'HH-mm-ss").format(new Date()); /** * Transforms it into a string representation. * * @param classification * @return */ public static String getClassification(int[] classification) { StringBuilder builder = new StringBuilder(); builder.append("Classification {\n"); builder.append(" Transformations: ").append(classification[0]).append("\n"); builder.append(" Checked: ").append(classification[1]).append("\n"); builder.append(" Anonymous: ").append(classification[2]).append("\n"); builder.append(" Non-anonymous: ").append(classification[3]).append("\n"); builder.append(" Probably anonymous: ").append(classification[4]).append("\n"); builder.append(" Probably non-anonymous: ").append(classification[5]).append("\n"); builder.append(" Utility available: ").append(classification[6]).append("\n"); builder.append("}"); return builder.toString(); } /** * Returns the data object for the test case. * * @param testCase * @return * @throws IOException */ public static Data getDataObject(final ARXAnonymizationTestCase testCase) throws IOException { final Data data = Data.create(testCase.dataset, StandardCharsets.UTF_8, ';'); // Read generalization hierachies final FilenameFilter hierarchyFilter = new FilenameFilter() { @Override public boolean accept(final File dir, final String name) { if (name.matches(testCase.dataset.substring(testCase.dataset.lastIndexOf("/") + 1, testCase.dataset.length() - 4) + "_hierarchy_(.)+.csv")) { return true; } else { return false; } } }; final File testDir = new File(testCase.dataset.substring(0, testCase.dataset.lastIndexOf("/"))); final File[] genHierFiles = testDir.listFiles(hierarchyFilter); final Pattern pattern = Pattern.compile("_hierarchy_(.*?).csv"); for (final File file : genHierFiles) { final Matcher matcher = pattern.matcher(file.getName()); if (matcher.find()) { final CSVHierarchyInput hier = new CSVHierarchyInput(file, StandardCharsets.UTF_8, ';'); final String attributeName = matcher.group(1); if (!attributeName.equalsIgnoreCase(testCase.sensitiveAttribute)) { data.getDefinition().setAttributeType(attributeName, Hierarchy.create(hier.getHierarchy())); } else { // sensitive attribute if (testCase.config.isPrivacyModelSpecified(LDiversity.class) || testCase.config.isPrivacyModelSpecified(TCloseness.class) || testCase.config.isPrivacyModelSpecified(DDisclosurePrivacy.class) || testCase.config.isPrivacyModelSpecified(BasicBLikeness.class) || testCase.config.isPrivacyModelSpecified(EnhancedBLikeness.class)) { data.getDefinition().setAttributeType(attributeName, AttributeType.SENSITIVE_ATTRIBUTE); } } } } if (testCase.builders != null) { // Remove all QIs Set<String> qis = data.getDefinition().getQuasiIdentifyingAttributes(); for (String qi : qis) { data.getDefinition().resetHierarchy(qi); data.getDefinition().setAttributeType(qi, AttributeType.INSENSITIVE_ATTRIBUTE); } // Set only builders as QIs for (Entry<String, HierarchyBuilder<?>> entry : testCase.builders.entrySet()){ data.getDefinition().setAttributeType(entry.getKey(), AttributeType.QUASI_IDENTIFYING_ATTRIBUTE); data.getDefinition().setHierarchy(entry.getKey(), entry.getValue()); } } return data; } /** The test case. */ protected final ARXAnonymizationTestCase testCase; /** To access the test name */ @Rule public TestName name = new TestName(); /** * Creates a new instance. * * @param testCase */ public AbstractAnonymizationTest(final ARXAnonymizationTestCase testCase) { this.testCase = testCase; } @Override @Before public void setUp() { // Empty by design // We also intentionally don't call super.setUp() } /** * * * @throws IOException */ @Test public void test() throws IOException { boolean benchmark = false; List<String> arguments = ManagementFactory.getRuntimeMXBean().getInputArguments(); for (String argument : arguments) { if (argument.startsWith("-DBenchmark")) { benchmark = true; break; } } final Data data = getDataObject(testCase); // Create an instance of the anonymizer final ARXAnonymizer anonymizer = new ARXAnonymizer(); testCase.config.setPracticalMonotonicity(testCase.practical); // Test or warmup ARXResult result = anonymizer.anonymize(data, testCase.config); DataHandle output = null; if (testCase.hashcode != -1) { try { output = result.getOutput(); result.optimizeIterative(output, 0.05d, 100, 0.05d); } catch (RollbackRequiredException e) { throw new RuntimeException(e); } } // Benchmark if (benchmark) { String version = System.getProperty("Version"); String path = System.getProperty("Benchmark"); if (path == null || path.length() == 0) { path = "."; } String testClass = this.getClass().getSimpleName(); final int REPETITIONS = 5; long time = System.currentTimeMillis(); long time2 = 0; for (int i = 0; i < REPETITIONS; i++) { data.getHandle().release(); result = anonymizer.anonymize(data, testCase.config); if (testCase.hashcode != -1) { try { output = result.getOutput(); result.optimizeIterative(output, 0.05d, 100, 0.05d); } catch (RollbackRequiredException e) { throw new RuntimeException(e); } } time2 += result.getTime(); } time = (System.currentTimeMillis() - time) / REPETITIONS; time2 /= REPETITIONS; StringBuilder line = new StringBuilder(); line.append(Resources.getVersion()); line.append(";"); line.append(version); line.append(";"); line.append(testClass); line.append(";"); line.append(testCase.id); line.append(";"); line.append(time); line.append(";"); line.append(time2); output(line.toString(), path + "/benchmark_" + version + "_" + timestamp + "_" + testClass + ".csv"); } // Check if local recoding experiment if (testCase.hashcode != -1) { // Compute hashcode of result int hashcode = 23; for (int row = 0; row < output.getNumRows(); row++) { for (int column = 0; column < output.getNumColumns(); column++) { hashcode = (37 * hashcode) + output.getValue(row, column).hashCode(); } } // Assert assertEquals("Hash code not equal", hashcode, testCase.hashcode); return; } // Check if no solution if (testCase.optimalTransformation == null) { assertTrue(result.getGlobalOptimum() == null); } else { String lossActual = result.getGlobalOptimum().getHighestScore().toString(); String lossExpected = testCase.optimalInformationLoss; assertEquals(testCase.dataset + "-should: " + lossExpected + " is: " + lossActual + "(" + result.getGlobalOptimum().getLowestScore().toString() + ")", lossExpected, lossActual); if (!Arrays.equals(result.getGlobalOptimum().getTransformation(), testCase.optimalTransformation)) { System.err.println("Note: Information loss equals, but the optimum differs:"); System.err.println("Should: " + Arrays.toString(testCase.optimalTransformation) + " is: " + Arrays.toString(result.getGlobalOptimum().getTransformation())); System.err.println("Test case: " + testCase.toString()); } } if (testCase.statistics != null) { // Collect statistics int[] statistics = new int[7]; for (ARXNode[] level : result.getLattice().getLevels()) { for (ARXNode arxNode : level) { statistics[0]++; if (arxNode.isChecked()) { statistics[1]++; } if (arxNode.getAnonymity() == Anonymity.ANONYMOUS) { statistics[2]++; } if (arxNode.getAnonymity() == Anonymity.NOT_ANONYMOUS) { statistics[3]++; } if (arxNode.getAnonymity() == Anonymity.PROBABLY_ANONYMOUS) { statistics[4]++; } if (arxNode.getAnonymity() == Anonymity.PROBABLY_NOT_ANONYMOUS) { statistics[5]++; } if (arxNode.getHighestScore() == arxNode.getLowestScore()) { statistics[6]++; } } } // Compare String algorithmConfiguration = getAlgorithmConfiguration(testCase.config); assertEquals(algorithmConfiguration + ". Mismatch: number of transformations", testCase.statistics[0], statistics[0]); assertEquals(algorithmConfiguration + ". Mismatch: number of checks", testCase.statistics[1], statistics[1]); assertEquals(algorithmConfiguration + ". Mismatch: number of anonymous transformations", testCase.statistics[2], statistics[2]); assertEquals(algorithmConfiguration + ". Mismatch: number of non-anonymous transformations", testCase.statistics[3], statistics[3]); assertEquals(algorithmConfiguration + ". Mismatch: number of probably anonymous transformations", testCase.statistics[4], statistics[4]); assertEquals(algorithmConfiguration + ". Mismatch: number of probably non-anonymous transformations", testCase.statistics[5], statistics[5]); assertEquals(algorithmConfiguration + ". Mismatch: number of transformations with utility available", testCase.statistics[6], statistics[6]); } } /** * Appends the given value to the file * @param value * @param file */ private void output(String value, String file) { Writer writer = null; try { createHeader(file); writer = new FileWriter(file, true); writer = new BufferedWriter(writer); writer.write(value); writer.write(System.lineSeparator()); System.out.println(value); } catch (Exception e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * Writes a header to the given file * @param file */ private void createHeader(String file) { File f = new File(file); if (!f.exists()) { Writer writer = null; try { writer = new FileWriter(f); writer = new BufferedWriter(writer); StringBuilder line = new StringBuilder(); line.append(""); line.append(";"); line.append(""); line.append(";"); line.append(""); line.append(";"); line.append(""); line.append(";"); line.append("Execution Time"); line.append(";"); line.append("Internal Execution Time"); writer.write(line.toString()); writer.write(System.lineSeparator()); line = new StringBuilder(); line.append("Version"); line.append(";"); line.append("Git Commit"); line.append(";"); line.append("Test"); line.append(";"); line.append("Testid"); line.append(";"); line.append("Arithmetic Mean"); line.append(";"); line.append("Arithmetic Mean"); writer.write(line.toString()); writer.write(System.lineSeparator()); } catch (Exception e) { e.printStackTrace(); } finally { if (writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } } } /** * Returns the configuration of FLASH. * * @param config * @return */ private String getAlgorithmConfiguration(ARXConfiguration config) { return config.getMonotonicityOfPrivacy() + " monotonicity of privacy with " + config.getMonotonicityOfUtility() + " monotonicity of utility"; } }