/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.examples;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.sql.SQLException;
import java.util.Arrays;
import org.deidentifier.arx.ARXAnonymizer;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXResult;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.AttributeType.Hierarchy.DefaultHierarchy;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.DataSource;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.aggregates.HierarchyBuilderGroupingBased.Level;
import org.deidentifier.arx.aggregates.HierarchyBuilderIntervalBased;
import org.deidentifier.arx.aggregates.HierarchyBuilderIntervalBased.Interval;
import org.deidentifier.arx.aggregates.HierarchyBuilderIntervalBased.Range;
import org.deidentifier.arx.criteria.KAnonymity;
/**
* This class implements an example on how to use data cleansing using the DataSource functionality.
*
* @author Florian Kohlmayer
* @author Fabian Prasser
*/
public class Example28 extends Example {
/**
* Main entry point.
*
* @param args
* @throws IOException
* @throws SQLException
* @throws ClassNotFoundException
*/
public static void main(String[] args) throws IOException,
SQLException,
ClassNotFoundException {
exampleCSV();
buildHierarchy();
useBuilderAndAnonymize();
}
/**
* This method uses a hierarchy builder for an interval based hierarchy containing NULL values.
*
* @throws IOException
*/
private static void buildHierarchy() throws IOException {
// Define hierarchies
// Define hierarchies
HierarchyBuilderIntervalBased<Long> builder1 = HierarchyBuilderIntervalBased.create(DataType.INTEGER,
new Range<Long>(0l, 0l, 0l),
new Range<Long>(99l, 99l, 99l));
// Define base intervals
builder1.setAggregateFunction(DataType.INTEGER.createAggregate().createIntervalFunction(true, false));
builder1.addInterval(0l, 20l);
builder1.addInterval(20l, 33l);
// Define grouping fanouts
builder1.getLevel(0).addGroup(2);
builder1.getLevel(1).addGroup(3);
// Print hierarhcy definition
System.out.println("------------------------");
System.out.println("INTERVAL-BASED HIERARCHY");
System.out.println("------------------------");
System.out.println("");
System.out.println("SPECIFICATION");
// Print specification
for (Interval<Long> interval1 : builder1.getIntervals()) {
System.out.println(interval1);
}
// Print specification
for (Level<Long> level : builder1.getLevels()) {
System.out.println(level);
}
// Print info about resulting levels
System.out.println("Resulting levels: " + Arrays.toString(builder1.prepare(getExampleData())));
System.out.println("");
System.out.println("RESULT");
// Print resulting hierarchy
printArray(builder1.build().getHierarchy());
System.out.println("");
}
/**
* This method imports data from a simple CSV file, set a data type and replace all non-matching values with NULL values.
*
* @throws IOException
*/
private static void exampleCSV() throws IOException {
DataSource source = DataSource.createCSVSource("data/test_dirty.csv", StandardCharsets.UTF_8, ';', true);
source.addColumn("age", DataType.INTEGER, true);
// Create data object
Data data = Data.create(source);
// Define hierarchies
DefaultHierarchy age = Hierarchy.create();
age.add("34", "<50", "*");
age.add("45", "<50", "*");
age.add("66", ">=50", "*");
age.add("70", ">=50", "*");
age.add("99", ">=50", "*");
age.add("NULL", "NULL", "*");
data.getDefinition().setAttributeType("age", age);
// Print to console
print(data.getHandle());
System.out.println("\n");
// Anonymize
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(3));
config.setMaxOutliers(0d);
ARXResult result = anonymizer.anonymize(data, config);
// Print results
System.out.println("Output:");
print(result.getOutput(false));
}
private static String[] getExampleData() {
String[] data = new String[] {
"34",
"66",
"70",
"34",
"70",
"NULL",
};
return data;
}
/**
* This method uses a hierarchy builder for an interval based hierarchy containing NULL values.
*
* @throws IOException
*/
private static void useBuilderAndAnonymize() throws IOException {
DataSource source = DataSource.createCSVSource("data/test_dirty.csv", StandardCharsets.UTF_8, ';', true);
source.addColumn("age", DataType.INTEGER, true);
// Create data object
Data data = Data.create(source);
// Define hierarchies
HierarchyBuilderIntervalBased<Long> builder1 = HierarchyBuilderIntervalBased.create(
DataType.INTEGER,
new Range<Long>(0l, 0l, 0l),
new Range<Long>(99l, 99l, 99l));
// Define base intervals
builder1.setAggregateFunction(DataType.INTEGER.createAggregate().createIntervalFunction(true, false));
builder1.addInterval(0l, 20l);
builder1.addInterval(20l, 33l);
// Define grouping fanouts
builder1.getLevel(0).addGroup(2);
builder1.getLevel(1).addGroup(3);
data.getDefinition().setAttributeType("age", builder1);
// Print info
System.out.println("Data:");
printHandle(data.getHandle());
System.out.println("Hierarchy:");
printArray(builder1.build(data.getHandle().getDistinctValues(0)).getHierarchy());
// Anonymize
ARXAnonymizer anonymizer = new ARXAnonymizer();
ARXConfiguration config = ARXConfiguration.create();
config.addPrivacyModel(new KAnonymity(3));
config.setMaxOutliers(0d);
ARXResult result = anonymizer.anonymize(data, config);
// Print results
System.out.println("Output:");
print(result.getOutput(false));
}
}