/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.test;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import org.deidentifier.arx.ARXAnonymizer;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXLattice;
import org.deidentifier.arx.ARXLattice.ARXNode;
import org.deidentifier.arx.ARXResult;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.DataSelector;
import org.deidentifier.arx.DataSubset;
import org.deidentifier.arx.criteria.DPresence;
import org.deidentifier.arx.criteria.Inclusion;
import org.deidentifier.arx.criteria.KAnonymity;
import org.junit.Assert;
import org.junit.Test;
/**
* Tests for data handles
*
* @author Fabian Prasser
*/
public class TestDataHandle extends AbstractTest {
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testGetters() throws IllegalArgumentException, IOException {
final DataHandle inHandle = provider.getData().getHandle();
// Read the encoded data
assertTrue(inHandle.getNumRows() == 7);
assertTrue(inHandle.getNumColumns() == 3);
assertTrue(inHandle.getAttributeName(0).equals("age"));
assertTrue(inHandle.getValue(3, 2).equals("81931"));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testMultipleDataHandlesFork() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final DataHandle inHandle = provider.getData().getHandle();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
config.setSuppressionAlwaysEnabled(false);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
// get top and bottom node
ARXLattice lattice = result.getLattice();
ARXNode topNode = lattice.getTop();
ARXNode bottomNode = lattice.getBottom();
// get various handle copies
DataHandle optimal = result.getOutput();
DataHandle top = result.getOutput(topNode);
DataHandle bottom = result.getOutput(bottomNode);
final String[][] inArray = iteratorToArray(inHandle.iterator());
final String[][] optimalArray = iteratorToArray(optimal.iterator());
final String[][] topArray = iteratorToArray(top.iterator());
final String[][] bottomArray = iteratorToArray(bottom.iterator());
final String[][] topExpected = { { "age", "gender", "zipcode" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" } };
final String[][] bottomExpected = { { "age", "gender", "zipcode" }, { "34", "male", "81667" }, { "45", "female", "81675" }, { "66", "male", "81925" }, { "70", "female", "81931" }, { "34", "female", "81931" }, { "70", "male", "81931" }, { "45", "male", "81931" } };
final String[][] optimalExpected = { { "age", "gender", "zipcode" }, { "<50", "*", "816**" }, { "<50", "*", "816**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" } };
assertTrue(Arrays.deepEquals(optimalArray, optimalExpected));
assertTrue(Arrays.deepEquals(topArray, topExpected));
assertTrue(Arrays.deepEquals(inArray, bottomExpected));
assertTrue(Arrays.deepEquals(bottomArray, bottomExpected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testMultipleDataHandlesForkSync() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final DataHandle inHandle = provider.getData().getHandle();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
config.setSuppressionAlwaysEnabled(false);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
// get top and bottom node
ARXLattice lattice = result.getLattice();
ARXNode topNode = lattice.getTop();
ARXNode bottomNode = lattice.getBottom();
// get various handle copies
DataHandle optimal = result.getOutput();
DataHandle top = result.getOutput(topNode);
DataHandle bottom = result.getOutput(bottomNode);
// sort input data
optimal.sort(false, 0);
// sort bottom handle
bottom.sort(true, 2);
final String[][] inArray = iteratorToArray(inHandle.iterator());
final String[][] optimalArray = iteratorToArray(optimal.iterator());
final String[][] topArray = iteratorToArray(top.iterator());
final String[][] bottomArray = iteratorToArray(bottom.iterator());
final String[][] topExpected = { { "age", "gender", "zipcode" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" }, { "*", "*", "*****" } };
final String[][] bottomExpected = { { "age", "gender", "zipcode" }, { "34", "male", "81667" }, { "45", "female", "81675" }, { "66", "male", "81925" }, { "70", "female", "81931" }, { "70", "male", "81931" }, { "34", "female", "81931" }, { "45", "male", "81931" } };
final String[][] optimalExpected = { { "age", "gender", "zipcode" }, { "<50", "*", "816**" }, { "<50", "*", "816**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" }, { "<50", "*", "819**" } };
assertTrue(Arrays.deepEquals(optimalArray, optimalExpected));
assertTrue(Arrays.deepEquals(topArray, topExpected));
assertTrue(Arrays.deepEquals(bottomArray, bottomExpected));
assertTrue(Arrays.deepEquals(inArray, bottomExpected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testMultipleDataHandlesNoForkLocked() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
// get top and bottom node
ARXLattice lattice = result.getLattice();
ARXNode topNode = lattice.getTop();
// get various handle copies
@SuppressWarnings("unused")
DataHandle optimal = result.getOutput(false);
try {
@SuppressWarnings("unused")
DataHandle top = result.getOutput(topNode);
} catch (RuntimeException e) {
if (e.getMessage().contains("locked")) {
return;
}
}
Assert.fail();
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testMultipleDataHandlesNoForkOrphaned() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
// get top and bottom node
ARXLattice lattice = result.getLattice();
ARXNode topNode = lattice.getTop();
ARXNode bottomNode = lattice.getBottom();
// get various handle copies
@SuppressWarnings("unused")
DataHandle optimal = result.getOutput();
DataHandle top = result.getOutput(topNode, false);
@SuppressWarnings("unused")
DataHandle bottom = result.getOutput(bottomNode, false);
try {
top.getValue(0, 0);
} catch (RuntimeException e) {
if (e.getMessage().contains("orphaned")) {
return;
}
}
Assert.fail();
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testSorting() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
final DataHandle outHandle = result.getOutput(false);
final DataHandle inHandle = provider.getData().getHandle();
inHandle.sort(true, 0);
final String[][] inArray = iteratorToArray(inHandle.iterator());
final String[][] resultArray = iteratorToArray(outHandle.iterator());
final String[][] expected = { { "age", "gender", "zipcode" }, { "<50", "*", "816**" }, { "<50", "*", "819**" }, { "<50", "*", "816**" }, { "<50", "*", "819**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" } };
final String[][] expectedIn = { { "age", "gender", "zipcode" }, { "34", "male", "81667" }, { "34", "female", "81931" }, { "45", "female", "81675" }, { "45", "male", "81931" }, { "66", "male", "81925" }, { "70", "female", "81931" }, { "70", "male", "81931" } };
assertTrue(Arrays.deepEquals(inArray, expectedIn));
assertTrue(Arrays.deepEquals(resultArray, expected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testStableSorting() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final DataHandle inHandle = provider.getData().getHandle();
// Alter the definition
provider.getData().getDefinition().setAttributeType("gender", AttributeType.IDENTIFYING_ATTRIBUTE);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
final DataHandle outHandle = result.getOutput(false);
outHandle.sort(true, 2);
final String[][] inArray = iteratorToArray(inHandle.iterator());
final String[][] resultArray = iteratorToArray(outHandle.iterator());
final String[][] expected = { { "age", "gender", "zipcode" }, { "<50", "*", "816**" }, { "<50", "*", "816**" }, { ">=50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" }, { ">=50", "*", "819**" }, { "<50", "*", "819**" } };
final String[][] expectedIn = { { "age", "gender", "zipcode" }, { "34", "male", "81667" }, { "45", "female", "81675" }, { "66", "male", "81925" }, { "70", "female", "81931" }, { "34", "female", "81931" }, { "70", "male", "81931" }, { "45", "male", "81931" } };
assertTrue(Arrays.deepEquals(resultArray, expected));
assertTrue(Arrays.deepEquals(inArray, expectedIn));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testSubset1() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final DataHandle inHandle = provider.getData().getHandle();
// Alter the definition
provider.getData().getDefinition().setAttributeType("gender", AttributeType.IDENTIFYING_ATTRIBUTE);
DataSelector selector = DataSelector.create(provider.getData()).field("age").equals("70").or().equals("34");
DataSubset subset = DataSubset.create(provider.getData(), selector);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.addPrivacyModel(new DPresence(0, 1, subset));
config.setMaxOutliers(0d);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
final DataHandle outHandle = result.getOutput(false);
outHandle.sort(true, 2);
outHandle.getView().sort(false, 0);
String[][] given = iteratorToArray(inHandle.getView().iterator());
String[][] expected = { { "age", "gender", "zipcode" }, { "70", "female", "81931" }, { "70", "male", "81931" }, { "34", "male", "81667" }, { "34", "female", "81931" } };
assertTrue(Arrays.deepEquals(given, expected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testSubset2() throws IllegalArgumentException, IOException {
provider.createDataDefinition();
final ARXAnonymizer anonymizer = new ARXAnonymizer();
// Alter the definition
provider.getData().getDefinition().setAttributeType("gender", AttributeType.IDENTIFYING_ATTRIBUTE);
DataSelector selector = DataSelector.create(provider.getData()).field("age").equals("70").or().equals("34");
DataSubset subset = DataSubset.create(provider.getData(), selector);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.addPrivacyModel(new DPresence(0, 1, subset));
config.setMaxOutliers(0d);
final ARXResult result = anonymizer.anonymize(provider.getData(), config);
final DataHandle outHandle = result.getOutput(false);
outHandle.sort(true, 2);
outHandle.getView().sort(false, 0);
String[][] given = iteratorToArray(outHandle.getView().iterator());
String[][] expected = { { "age", "gender", "zipcode" }, { "70", "*", "81***" }, { "70", "*", "81***" }, { "34", "*", "81***" }, { "34", "*", "81***" } };
assertTrue(Arrays.deepEquals(given, expected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testSubset3() throws IllegalArgumentException, IOException {
Data data = Data.create("./data/dis.csv", StandardCharsets.UTF_8, ';');
data.getDefinition().setAttributeType("age", Hierarchy.create("./data/dis_hierarchy_age.csv", StandardCharsets.UTF_8, ';'));
data.getDefinition().setAttributeType("gender", AttributeType.INSENSITIVE_ATTRIBUTE);
data.getDefinition().setAttributeType("zipcode", AttributeType.INSENSITIVE_ATTRIBUTE);
DataSelector selector = DataSelector.create(data).field("gender").equals("male");
DataSubset subset = DataSubset.create(data, selector);
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.addPrivacyModel(new Inclusion(subset));
final ARXResult result = anonymizer.anonymize(data, config);
final DataHandle outHandle = result.getOutput(false);
data.getHandle().sort(false, 0);
String[][] given = iteratorToArray(outHandle.getView().iterator());
String[][] expected = { { "age", "gender", "zipcode" }, { ">=61", "male", "81825" }, { ">=61", "male", "81925" }, { "20-60", "male", "82667" }, { "20-60", "male", "82451" } };
assertTrue(Arrays.deepEquals(given, expected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testSubset4() throws IllegalArgumentException, IOException {
Data data = Data.create("./data/dis.csv", StandardCharsets.UTF_8, ';');
data.getDefinition().setAttributeType("age", Hierarchy.create("./data/dis_hierarchy_age.csv", StandardCharsets.UTF_8, ';'));
data.getDefinition().setAttributeType("gender", AttributeType.INSENSITIVE_ATTRIBUTE);
data.getDefinition().setAttributeType("zipcode", AttributeType.INSENSITIVE_ATTRIBUTE);
DataSelector selector = DataSelector.create(data).field("gender").equals("male");
DataSubset subset = DataSubset.create(data, selector);
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new Inclusion(subset));
anonymizer.anonymize(data, config);
String[][] given = iteratorToArray(data.getHandle().getView().iterator());
String[][] expected = { { "age", "gender", "zipcode" }, { "34", "male", "82667" }, { "66", "male", "81925" }, { "70", "male", "81825" }, { "21", "male", "82451" } };
assertTrue(Arrays.deepEquals(given, expected));
}
/**
* Test case
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testSubset5() throws IllegalArgumentException, IOException {
Data data = Data.create("./data/dis.csv", StandardCharsets.UTF_8, ';');
data.getDefinition().setAttributeType("age", Hierarchy.create("./data/dis_hierarchy_age.csv", StandardCharsets.UTF_8, ';'));
data.getDefinition().setAttributeType("gender", Hierarchy.create("./data/dis_hierarchy_gender.csv", StandardCharsets.UTF_8, ';'));
data.getDefinition().setAttributeType("zipcode", AttributeType.INSENSITIVE_ATTRIBUTE);
DataSelector selector = DataSelector.create(data).field("gender").equals("male");
DataSubset subset = DataSubset.create(data, selector);
final ARXAnonymizer anonymizer = new ARXAnonymizer();
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new Inclusion(subset));
// Transform
ARXResult result = anonymizer.anonymize(data, config);
// Sort
data.getHandle().sort(false, 0, 1, 2);
// Transform
ARXNode n = result.getLattice().getLevels()[2][1];
DataHandle h = result.getOutput(n, false);
String[][] given = iteratorToArray(h.getView().iterator());
String[][] expected = { { "age", "gender", "zipcode" }, { ">=61", "*", "81825" }, { ">=61", "*", "81925" }, { "20-60", "*", "82667" }, { "20-60", "*", "82451" } };
assertTrue(Arrays.deepEquals(given, expected));
}
}