/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.examples;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.deidentifier.arx.ARXPopulationModel;
import org.deidentifier.arx.AttributeType;
import org.deidentifier.arx.Data;
import org.deidentifier.arx.DataHandle;
import org.deidentifier.arx.risk.RiskEstimateBuilder;
import org.deidentifier.arx.risk.RiskModelAttributes;
/**
* Example for evaluating distinction and separation of attributes as described in
* R. Motwani et al. "Efficient algorithms for masking and finding quasi-identifiers"
* Proc. VLDB Conf., 2007.
*
* @author Maximilian Zitzmann
* @author Fabian Prasser
*/
public class Example47 extends Example {
/**
* Entry point.
*
* @param args the arguments
*/
public static void main(String[] args) throws IOException {
Data data = loadData();
// Flag every attribute as quasi identifier
for (int i = 0; i < data.getHandle().getNumColumns(); i++) {
data.getDefinition().setAttributeType(data.getHandle().getAttributeName(i), AttributeType.QUASI_IDENTIFYING_ATTRIBUTE);
}
// Perform risk analysis
System.out.println("\n - Input data");
print(data.getHandle());
System.out.println("\n - Quasi-identifiers with values (in percent):");
analyzeAttributes(data.getHandle());
}
/**
* Calculate Alpha Distinction and Separation
*
* @param handle the data handle
*/
private static void analyzeAttributes(DataHandle handle) {
ARXPopulationModel populationmodel = ARXPopulationModel.create(ARXPopulationModel.Region.USA);
RiskEstimateBuilder builder = handle.getRiskEstimator(populationmodel);
RiskModelAttributes riskmodel = builder.getAttributeRisks();
// output
printPrettyTable(riskmodel.getAttributeRisks());
}
private static Data loadData() {
// Define data
Data.DefaultData data = Data.create();
data.add("age", "sex", "state");
data.add("20", "Female", "CA");
data.add("30", "Female", "CA");
data.add("40", "Female", "TX");
data.add("20", "Male", "NY");
data.add("40", "Male", "CA");
data.add("53", "Male", "CA");
data.add("76", "Male", "EU");
data.add("40", "Female", "AS");
data.add("32", "Female", "CA");
data.add("88", "Male", "CA");
data.add("48", "Female", "AS");
data.add("76", "Male", "UU");
return data;
}
/**
* Helper that prints a table
* @param quasiIdentifiers
*/
private static void printPrettyTable(RiskModelAttributes.QuasiIdentifierRisk[] quasiIdentifiers) {
// get char count of longest quasi-identifier
int charCountLongestQi = quasiIdentifiers[quasiIdentifiers.length-1].getIdentifier().toString().length();
// make sure that there is enough space for the table header strings
charCountLongestQi = Math.max(charCountLongestQi, 12);
// calculate space needed
String leftAlignFormat = "| %-" + charCountLongestQi + "s | %13.2f | %12.2f |%n";
// add 2 spaces that are in the string above on the left and right side of the first pattern
charCountLongestQi += 2;
// subtract the char count of the column header string to calculate
// how many spaces we need for filling up to the right columnborder
int spacesAfterColumHeader = charCountLongestQi - 12;
System.out.format("+" + StringUtils.repeat("-", charCountLongestQi) + "+---------------+--------------+%n");
System.out.format("| Identifier " + StringUtils.repeat(" ", spacesAfterColumHeader) + "| Distinction | Separation |%n");
System.out.format("+" + StringUtils.repeat("-", charCountLongestQi) + "+---------------+--------------+%n");
for (RiskModelAttributes.QuasiIdentifierRisk quasiIdentifier : quasiIdentifiers) {
// print every Quasi-Identifier
System.out.format(leftAlignFormat, quasiIdentifier.getIdentifier(), quasiIdentifier.getDistinction() * 100, quasiIdentifier.getSeparation() * 100);
}
System.out.format("+" + StringUtils.repeat("-", charCountLongestQi) + "+---------------+--------------+%n");
}
}