/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.test; import org.deidentifier.arx.AttributeType; import org.deidentifier.arx.Data; import org.deidentifier.arx.risk.RiskEstimateBuilder; import org.deidentifier.arx.risk.RiskModelAttributes; import org.junit.Test; import static org.junit.Assert.assertTrue; /** * Test calculations of alpha-distinction and alpha-separation. * * @author Maximilan Zitzmann * @author Fabian Prasser */ public class TestRiskQuasiIdentifiers { /** * Private helper class * * @author Maximilian Zitzmann * @author Fabian Prasser */ private class ResultSet { /** Identifier*/ private final String identifier; /** Distinction*/ private final double distinction; /** Separation*/ private final double separation; /** * Creates a new instance * @param identifier * @param distinction * @param separation */ private ResultSet(String identifier, double distinction, double separation) { this.identifier = identifier; this.distinction = distinction; this.separation = separation; } } @Test public void testWithDefinedDataSet() { // Define data Data.DefaultData data = Data.create(); data.add("age", "sex", "state"); data.add("20", "Female", "CA"); data.add("30", "Female", "CA"); data.add("40", "Female", "TX"); data.add("20", "Male", "NY"); data.add("40", "Male", "CA"); // Calculated by hand ResultSet[] expectedResults = new ResultSet[]{ new ResultSet("[sex]", 0.4, 0.6), new ResultSet("[state]", 0.6, 0.7), new ResultSet("[age]", 0.6, 0.8), new ResultSet("[sex, state]", 0.8, 0.9), new ResultSet("[age, sex]", 1.0, 1.0), new ResultSet("[age, state]", 1.0, 1.0), new ResultSet("[age, sex, state]", 1.0, 1.0), }; // Flag every identifier as quasi identifier for (int i = 0; i < data.getHandle().getNumColumns(); i++) { data.getDefinition().setAttributeType(data.getHandle().getAttributeName(i), AttributeType.QUASI_IDENTIFYING_ATTRIBUTE); } // Perform calculation RiskEstimateBuilder builder = data.getHandle().getRiskEstimator(null); RiskModelAttributes riskmodel = builder.getAttributeRisks(); RiskModelAttributes.QuasiIdentifierRisk risks[] = riskmodel.getAttributeRisks(); // Check length assertTrue("Number of potential quasi-identifiers expected: " + risks.length, expectedResults.length == risks.length); // Check each entry for (int i = 0; i < risks.length; i++) { assertTrue("Identifier expected: " + expectedResults[i].identifier + "; got: " + risks[i].getIdentifier(), expectedResults[i].identifier.equals(risks[i].getIdentifier().toString())); assertTrue("Distinction expected: " + expectedResults[i].distinction + "; got: " + risks[i].getDistinction(), expectedResults[i].distinction == risks[i].getDistinction()); assertTrue("Separation expected: " + expectedResults[i].separation + "; got: " + risks[i].getSeparation(), expectedResults[i].separation == risks[i].getSeparation()); } } }