/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.test;
import static org.junit.Assert.assertTrue;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.deidentifier.arx.ARXAnonymizer;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXResult;
import org.deidentifier.arx.DataSubset;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.aggregates.StatisticsContingencyTable;
import org.deidentifier.arx.aggregates.StatisticsContingencyTable.Entry;
import org.deidentifier.arx.aggregates.StatisticsFrequencyDistribution;
import org.deidentifier.arx.criteria.DPresence;
import org.deidentifier.arx.criteria.KAnonymity;
import org.junit.Test;
/**
* Test class for data statistics
*
* @author Fabian Prasser
*/
public class TestDataStatistics extends AbstractTest {
/**
* Helper class
*
* @author Fabian Prasser
*/
class DoubleArrayWrapper {
/** Double array*/
double[] values;
/**
* Creates a new instance
* @param values
*/
public DoubleArrayWrapper(double[] values) {
this.values = values;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
DoubleArrayWrapper other = (DoubleArrayWrapper) obj;
if (!getOuterType().equals(other.getOuterType())) {
return false;
}
if (!Arrays.equals(this.values, other.values)) {
return false;
}
return true;
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = (prime * result) + getOuterType().hashCode();
result = (prime * result) + Arrays.hashCode(this.values);
return result;
}
private TestDataStatistics getOuterType() {
return TestDataStatistics.this;
}
}
/**
* Performs a test.
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testContingency1() throws IllegalArgumentException, IOException {
this.provider.createDataDefinition();
this.provider.getData().getDefinition().setDataType("age", DataType.INTEGER);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXResult result = anonymizer.anonymize(this.provider.getData(), config);
// Define
StatisticsContingencyTable contingency;
String[] values1;
String[] values2;
double[][] frequencies;
// Check input
contingency = this.provider.getData()
.getHandle()
.getStatistics()
.getContingencyTable(0, true, 2, true);
values1 = new String[] { "34", "45", "66", "70" };
values2 = new String[] { "81667", "81675", "81925", "81931" };
assertTrue(Arrays.equals(values1, contingency.values1));
assertTrue(Arrays.equals(values2, contingency.values2));
frequencies = new double[][] { { 0, 0, 0.14285714285714285 },
{ 1, 1, 0.14285714285714285 },
{ 2, 2, 0.14285714285714285 },
{ 3, 3, 0.2857142857142857 },
{ 1, 3, 0.14285714285714285 },
{ 0, 3, 0.14285714285714285 } };
assertTrue("Unexpected result", deepEquals(toArray(contingency), frequencies));
// Check output
contingency = result.getOutput(false)
.getStatistics()
.getContingencyTable(0, true, 2, true);
values1 = new String[] { "<50", ">=50" };
values2 = new String[] { "816**", "819**" };
assertTrue(Arrays.equals(values1, contingency.values1));
assertTrue(Arrays.equals(values2, contingency.values2));
frequencies = new double[][] { { 0, 0, 0.2857142857142857 },
{ 1, 1, 0.42857142857142855 },
{ 0, 1, 0.2857142857142857 } };
assertTrue("Unexpected result", deepEquals(toArray(contingency), frequencies));
}
/**
* Performs a test.
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testContingency2() throws IllegalArgumentException, IOException {
this.provider.createDataDefinition();
this.provider.getData().getDefinition().setDataType("age", DataType.INTEGER);
// Subset
Set<Integer> set = new HashSet<Integer>();
set.add(0);
set.add(6);
DataSubset subset = DataSubset.create(this.provider.getData(), set);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.addPrivacyModel(new DPresence(0.0d, 1.0d, subset));
config.setMaxOutliers(0d);
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXResult result = anonymizer.anonymize(this.provider.getData(), config);
// Define
StatisticsContingencyTable contingency;
String[] values1;
String[] values2;
double[][] frequencies;
// Check input
contingency = this.provider.getData().getHandle().getView().getStatistics().getContingencyTable(0, true, 2, true);
values1 = new String[] { "34", "45" };
values2 = new String[] { "81667", "81931" };
assertTrue(Arrays.equals(values1, contingency.values1));
assertTrue(Arrays.equals(values2, contingency.values2));
frequencies = new double[][] { { 0, 0, 0.5 },
{ 1, 1, 0.5 } };
assertTrue("Unexpected result", deepEquals(toArray(contingency), frequencies));
// Check output
contingency = result.getOutput(false).getView().getStatistics().getContingencyTable(0, true, 2, true);
values1 = new String[] { "<50" };
values2 = new String[] { "81***" };
assertTrue(Arrays.equals(values1, contingency.values1));
assertTrue(Arrays.equals(values2, contingency.values2));
frequencies = new double[][] { { 0, 0, 1.0 } };
assertTrue("Unexpected result", deepEquals(toArray(contingency), frequencies));
}
/**
* Performs a test.
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testDistribution1() throws IllegalArgumentException, IOException {
this.provider.createDataDefinition();
this.provider.getData().getDefinition().setDataType("age", DataType.INTEGER);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXResult result = anonymizer.anonymize(this.provider.getData(), config);
// Define
StatisticsFrequencyDistribution distribution;
String[] values;
double[] frequency;
// Check input
distribution = this.provider.getData().getHandle().getStatistics().getFrequencyDistribution(0, true);
values = new String[] { "34", "45", "66", "70" };
frequency = new double[] { 0.2857142857142857, 0.2857142857142857, 0.14285714285714285, 0.2857142857142857 };
assertTrue(Arrays.equals(values, distribution.values));
assertTrue(Arrays.equals(frequency, distribution.frequency));
// Check output
distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(0, true);
values = new String[] { "<50", ">=50" };
frequency = new double[] { 0.5714285714285714, 0.42857142857142855 };
assertTrue(Arrays.equals(values, distribution.values));
assertTrue(Arrays.equals(frequency, distribution.frequency));
}
/**
* Performs a test.
*
* @throws IllegalArgumentException
* @throws IOException
*/
@Test
public void testDistribution2() throws IllegalArgumentException, IOException {
this.provider.createDataDefinition();
this.provider.getData().getDefinition().setDataType("age", DataType.INTEGER);
final ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(2));
config.setMaxOutliers(0d);
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXResult result = anonymizer.anonymize(this.provider.getData(), config);
// Define
StatisticsFrequencyDistribution distribution;
String[] values;
double[] frequency;
// Check input
distribution = this.provider.getData().getHandle().getStatistics().getFrequencyDistribution(1, false);
values = new String[] { "female", "male" };
frequency = new double[] { 0.42857142857142855, 0.5714285714285714 };
assertTrue(Arrays.equals(values, distribution.values));
assertTrue(Arrays.equals(frequency, distribution.frequency));
// Check output
distribution = result.getOutput(false).getStatistics().getFrequencyDistribution(1, true);
values = new String[] { "*" };
frequency = new double[] { 1.0 };
assertTrue(Arrays.equals(values, distribution.values));
assertTrue(Arrays.equals(frequency, distribution.frequency));
}
/**
* Checks the two arrays regarding equality, treating a double[][]
* as a set of comparable double[]'s
*
* @param set1
* @param set2
*/
private boolean deepEquals(double[][] set1, double[][] set2) {
if (set1.length != set2.length) {
return false;
}
Set<DoubleArrayWrapper> frequencies = new HashSet<DoubleArrayWrapper>();
for (int i = 0; i < set1.length; i++) {
frequencies.add(new DoubleArrayWrapper(set1[i]));
}
// Check
for (int j = 0; j < set2.length; j++) {
if (!frequencies.contains(new DoubleArrayWrapper(set2[j]))) {
return false;
}
}
// They are equal
return true;
}
/**
* Converts a contigency table to an array
*
* @param contingency
* @return
*/
private double[][] toArray(StatisticsContingencyTable contingency) {
List<double[]> list = new ArrayList<double[]>();
while (contingency.iterator.hasNext()) {
Entry e = contingency.iterator.next();
list.add(new double[] { e.value1, e.value2, e.frequency });
}
return list.toArray(new double[][] {});
}
}