/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.test;
import static org.junit.Assert.assertEquals;
import java.io.File;
import java.io.FilenameFilter;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.deidentifier.arx.ARXAnonymizer;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXLattice.ARXNode;
import org.deidentifier.arx.ARXResult;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.criteria.LDiversity;
import org.deidentifier.arx.criteria.PrivacyCriterion;
import org.deidentifier.arx.criteria.TCloseness;
import org.deidentifier.arx.io.CSVHierarchyInput;
import org.junit.Before;
import org.junit.Test;
/**
* Test for utility metrics.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public abstract class AbstractTestUtilityMetrics extends AbstractTest {
/**
* Represents a test case.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public static class ARXUtilityMetricsTestCase {
/** Config */
public ARXConfiguration config;
/** Dataset */
public String dataset;
/** Attribute */
public String sensitiveAttribute;
/** Score */
public Map<String, String> informationLoss;
/**
* Creates a new instance.
*
* @param config
* @param sensitiveAttribute
* @param dataset
* @param informationLoss pairs of (Arrays.toString(transformation), informationLoss.toString())
*/
public ARXUtilityMetricsTestCase(final ARXConfiguration config,
final String sensitiveAttribute,
final String dataset,
final String... informationLoss) {
this.config = config;
this.sensitiveAttribute = sensitiveAttribute;
this.dataset = dataset;
this.informationLoss = new HashMap<String, String>();
if (informationLoss != null) {
for (int i = 0; i < informationLoss.length; i += 2) {
this.informationLoss.put(informationLoss[i], informationLoss[i + 1]);
}
}
}
/**
* Returns a string description.
*
* @return
*/
public String getDescription() {
StringBuilder builder = new StringBuilder();
builder.append("TestCase{\n");
builder.append(" - Dataset: ").append(dataset).append("\n");
builder.append(" - Sensitive: ").append(sensitiveAttribute).append("\n");
builder.append(" - Suppression: ").append(config.getMaxOutliers()).append("\n");
builder.append(" - Metric: ").append(config.getQualityModel().toString()).append("\n");
builder.append(" - Criteria:\n");
for (PrivacyCriterion c : config.getPrivacyModels()) {
builder.append(" * ").append(c.toString()).append("\n");
}
builder.append("}");
return builder.toString();
}
@Override
public String toString() {
return config.getPrivacyModels() + "-" + config.getMaxOutliers() + "-" + config.getQualityModel() + "-" + dataset + "-PM:" +
config.isPracticalMonotonicity();
}
}
/**
* Returns the data object for the test case.
*
* @param testCase
* @return
* @throws IOException
*/
public static Data getDataObject(final ARXUtilityMetricsTestCase testCase) throws IOException {
final Data data = Data.create(testCase.dataset, StandardCharsets.UTF_8, ';');
// Read generalization hierachies
final FilenameFilter hierarchyFilter = new FilenameFilter() {
@Override
public boolean accept(final File dir, final String name) {
if (name.matches(testCase.dataset.substring(testCase.dataset.lastIndexOf("/") + 1, testCase.dataset.length() - 4) +
"_hierarchy_(.)+.csv")) {
return true;
} else {
return false;
}
}
};
final File testDir = new File(testCase.dataset.substring(0, testCase.dataset.lastIndexOf("/")));
final File[] genHierFiles = testDir.listFiles(hierarchyFilter);
final Pattern pattern = Pattern.compile("_hierarchy_(.*?).csv");
for (final File file : genHierFiles) {
final Matcher matcher = pattern.matcher(file.getName());
if (matcher.find()) {
final CSVHierarchyInput hier = new CSVHierarchyInput(file, StandardCharsets.UTF_8, ';');
final String attributeName = matcher.group(1);
if (!attributeName.equalsIgnoreCase(testCase.sensitiveAttribute)) {
data.getDefinition().setAttributeType(attributeName, Hierarchy.create(hier.getHierarchy()));
} else { // sensitive attribute
if (testCase.config.isPrivacyModelSpecified(LDiversity.class) || testCase.config.isPrivacyModelSpecified(TCloseness.class)) {
data.getDefinition().setAttributeType(attributeName, AttributeType.SENSITIVE_ATTRIBUTE);
}
}
}
}
return data;
}
/** The test case. */
protected final ARXUtilityMetricsTestCase testcase;
/**
* Creates a new instance.
*
* @param testCase
*/
public AbstractTestUtilityMetrics(final ARXUtilityMetricsTestCase testCase) {
this.testcase = testCase;
}
@Override
@Before
public void setUp() {
// Empty by design
}
/**
*
*
* @throws IOException
*/
@Test
public void test() throws IOException {
// Anonymize
Data data = getDataObject(testcase);
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXResult result = anonymizer.anonymize(data, testcase.config);
// Test information loss for some transformations
for (ARXNode[] level : result.getLattice().getLevels()) {
for (ARXNode node : level) {
String label = Arrays.toString(node.getTransformation());
String loss = testcase.informationLoss.get(label);
if (loss != null) {
if (node.getHighestScore().compareTo(node.getLowestScore()) != 0) {
result.getOutput(node, false);
}
String actualLoss = node.getHighestScore().toString();
String expectedLoss = testcase.informationLoss.get(label);
assertEquals(label, expectedLoss, actualLoss);
}
}
}
}
}