/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.examples;
import java.io.IOException;
import java.util.List;
import org.apache.commons.math3.util.Pair;
import org.deidentifier.arx.ARXAnonymizer;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXResult;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.AttributeType.Hierarchy.DefaultHierarchy;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.Data.DefaultData;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.DataType.DataTypeWithFormat;
import org.deidentifier.arx.criteria.KAnonymity;
/**
* This class implements an example on how to use data cleansing capabilities
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class Example27 extends Example {
/**
* Entry point.
*
* @param args
* the arguments
* @throws IOException
*/
public static void main(String[] args) throws IOException {
// Define data
DefaultData data = Data.create();
data.add("age", "gender", "zipcode", "dob");
data.add("34", "male", "81667", "3.2.1913");
data.add("45", "female", "81675", "5.5.1955");
data.add("66", "male", "81925", "3.3.1967");
data.add("70", "female", "81931", "1.1.1992");
data.add("34", "female", "81931", "25.11.1988");
data.add("70", "male", "81931", "13.3.1955");
data.add("45", "male", "81931", "28.6.2013");
// Define hierarchies
DefaultHierarchy age = Hierarchy.create();
age.add("34", "<50", "*");
age.add("45", "<50", "*");
age.add("66", ">=50", "*");
age.add("70", ">=50", "*");
age.add("99", ">=50", "*");
DefaultHierarchy gender = Hierarchy.create();
gender.add("male", "*");
gender.add("female", "*");
// Only excerpts for readability
DefaultHierarchy zipcode = Hierarchy.create();
zipcode.add("81667", "8166*", "816**", "81***", "8****", "*****");
zipcode.add("81675", "8167*", "816**", "81***", "8****", "*****");
zipcode.add("81925", "8192*", "819**", "81***", "8****", "*****");
zipcode.add("81931", "8193*", "819**", "81***", "8****", "*****");
data.getDefinition().setAttributeType("age", age);
data.getDefinition().setAttributeType("gender", gender);
data.getDefinition().setAttributeType("zipcode", zipcode);
data.getDefinition().setAttributeType("dob", AttributeType.INSENSITIVE_ATTRIBUTE);
// Create an instance of the anonymizer
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(3));
config.setMaxOutliers(0d);
// Process results
System.out.println("Input:");
print(data.getHandle());
System.out.println("Determining data types:");
determineDataType(data.getHandle(), 0);
determineDataType(data.getHandle(), 1);
determineDataType(data.getHandle(), 2);
determineDataType(data.getHandle(), 3);
System.out.println("Replacing 34 with 99");
data.getHandle().replace(0, "34", "99");
System.out.println("New input:");
print(data.getHandle());
ARXResult result = anonymizer.anonymize(data, config);
// Process results
System.out.println("Output:");
print(result.getOutput(false));
System.out.println("Replacing female with f");
data.getHandle().replace(1, "female", "f");
System.out.println("New output:");
print(result.getOutput(false));
System.out.println("New input:");
print(data.getHandle());
System.out.println("Replacing 81*** with 81xxx");
data.getHandle().replace(2, "81***", "81xxx");
System.out.println("New output:");
print(result.getOutput(false));
}
/**
* Prints a list of matching data types
* @param handle
* @param column
*/
private static void determineDataType(DataHandle handle, int column) {
System.out.println(" - Potential data types for attribute: "+handle.getAttributeName(column));
List<Pair<DataType<?>, Double>> types = handle.getMatchingDataTypes(column);
// Print entries sorted by match percentage
for (Pair<DataType<?>, Double> entry : types) {
System.out.print(" * ");
System.out.print(entry.getKey().getDescription().getLabel());
if (entry.getKey().getDescription().hasFormat()) {
System.out.print("[");
System.out.print(((DataTypeWithFormat) entry.getKey()).getFormat());
System.out.print("]");
}
System.out.print(": ");
System.out.println(entry.getValue());
}
}
}