/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.AttributeType.MicroAggregationFunction;
import org.deidentifier.arx.aggregates.HierarchyBuilder;
import org.deidentifier.arx.certificate.elements.ElementData;
import org.deidentifier.arx.framework.check.distribution.DistributionAggregateFunction.DistributionAggregateFunctionGeneralization;
import org.deidentifier.arx.io.ImportAdapter;
import org.deidentifier.arx.io.ImportConfiguration;
/**
* Encapsulates a definition of the types of attributes contained in a dataset.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class DataDefinition implements Cloneable{
/** Is this data definition locked. */
private boolean locked = false;
/** The mapped attribute types. */
private final Map<String, AttributeType> attributeTypes = new HashMap<String, AttributeType>();
/** The mapped builders. */
private final Map<String, HierarchyBuilder<?>> builders = new HashMap<String, HierarchyBuilder<?>>();
/** The mapped hierchies. */
private final Map<String, Hierarchy> hierarchies = new HashMap<String, Hierarchy>();
/** The mapped functions. */
private final Map<String, MicroAggregationFunction> functions = new HashMap<String, MicroAggregationFunction>();
/** The mapped data types. */
private final Map<String, DataType<?>> dataTypes = new HashMap<String, DataType<?>>();
/** The mapped minimum generalization. */
private final Map<String, Integer> minGeneralization = new HashMap<String, Integer>();
/** The mapped maximum generalization. */
private final Map<String, Integer> maxGeneralization = new HashMap<String, Integer>();
@Override
public DataDefinition clone() {
final DataDefinition d = new DataDefinition();
for (final String attr : attributeTypes.keySet()) {
d.attributeTypes.put(attr, attributeTypes.get(attr).clone());
}
for (final String attr : dataTypes.keySet()) {
d.dataTypes.put(attr, dataTypes.get(attr).clone());
}
for (final String attr : hierarchies.keySet()) {
d.hierarchies.put(attr, hierarchies.get(attr));
}
for (final String attr : functions.keySet()) {
d.functions.put(attr, functions.get(attr) == null ? null : functions.get(attr).clone());
}
for (final String attr : minGeneralization.keySet()) {
d.minGeneralization.put(attr, minGeneralization.get(attr));
}
for (final String attr : maxGeneralization.keySet()) {
d.maxGeneralization.put(attr, maxGeneralization.get(attr));
}
for (final String attr : builders.keySet()) {
d.builders.put(attr, builders.get(attr));
}
d.setLocked(this.isLocked());
return d;
}
/**
* Returns the type defined for the attribute.
*
* @param attribute
* @return
*/
public AttributeType getAttributeType(final String attribute) {
return attributeTypes.get(attribute);
}
/**
* Returns the data type for the given column.
*
* @param columnName
* @return
*/
public DataType<?> getDataType(final String columnName) {
final DataType<?> t = dataTypes.get(columnName);
if (t == null) {
return DataType.STRING;
} else {
return t;
}
}
/**
* Returns the according hierarchy as String array.
*
* @param attribute
* @return
*/
public String[][] getHierarchy(final String attribute) {
Hierarchy hierarchy = hierarchies.get(attribute);
return hierarchy == null ? null : hierarchy.getHierarchy();
}
/**
* Returns the associated builder, if any.
*
* @param attribute
* @return
*/
public HierarchyBuilder<?> getHierarchyBuilder(final String attribute) {
return builders.get(attribute);
}
/**
* Returns the according hierarchy object.
*
* @param attribute
* @return
*/
public Hierarchy getHierarchyObject(final String attribute) {
return hierarchies.get(attribute);
}
/**
* Returns the direct identifiers.
*
* @return
*/
public Set<String> getIdentifyingAttributes() {
return getAttributesByType(AttributeType.ATTR_TYPE_ID);
}
/**
* Returns the insensitive attributes.
*
* @return
*/
public Set<String> getInsensitiveAttributes() {
return getAttributesByType(AttributeType.ATTR_TYPE_IS);
}
/**
* Returns the maximum generalization for the attribute.
*
* @param attribute
* @return
*/
public int getMaximumGeneralization(final String attribute) {
checkQuasiIdentifier(attribute);
Integer result = maxGeneralization.get(attribute);
if (result != null) return result;
if (this.getHierarchy(attribute) != null) {
String[][] hierarchy = this.getHierarchy(attribute);
if (hierarchy.length == 0 || hierarchy[0] == null) {
return 0;
} else {
return hierarchy[0].length - 1;
}
} else {
return 0;
}
}
/**
* Returns the according microaggregation function.
*
* @param attribute
* @return
*/
public MicroAggregationFunction getMicroAggregationFunction(final String attribute) {
return functions.get(attribute);
}
/**
* Returns the minimum generalization for the attribute.
*
* @param attribute
* @return
*/
public int getMinimumGeneralization(final String attribute) {
checkQuasiIdentifier(attribute);
Integer result = minGeneralization.get(attribute);
return result != null ? result : 0;
}
/**
* Returns the quasi-identifiers for which generalization is specified.
* @return
*/
public Set<String> getQuasiIdentifiersWithGeneralization() {
final Set<String> result = new HashSet<String>();
for (String attr : getAttributesByType(AttributeType.ATTR_TYPE_QI)) {
if (getMicroAggregationFunction(attr) == null) {
result.add(attr);
}
}
return result;
}
/**
* Returns the quasi-identifiers for which microaggregation is specified.
* @return
*/
public Set<String> getQuasiIdentifiersWithMicroaggregation() {
final Set<String> result = new HashSet<String>();
for (String attr : getAttributesByType(AttributeType.ATTR_TYPE_QI)) {
if (getMicroAggregationFunction(attr) != null) {
result.add(attr);
}
}
return result;
}
/**
* Returns the quasi identifying attributes.
*
* @return
*/
public Set<String> getQuasiIdentifyingAttributes() {
return getAttributesByType(AttributeType.ATTR_TYPE_QI);
}
/**
* Returns the sensitive attributes.
*
* @return
*/
public Set<String> getSensitiveAttributes() {
return getAttributesByType(AttributeType.ATTR_TYPE_SE);
}
/**
* Returns whether a hierarchy is available.
*
* @param attribute
* @return
*/
public boolean isHierarchyAvailable(String attribute) {
return getHierarchy(attribute) != null;
}
/**
* Returns whether a hierarchy builder is available.
*
* @param attribute
* @return
*/
public boolean isHierarchyBuilderAvailable(String attribute) {
return getHierarchyBuilder(attribute) != null;
}
/**
* Returns whether this definition can be altered.
*
* @return
*/
public boolean isLocked(){
return locked;
}
/**
* Returns whether a maximum generalization level is available.
*
* @param attribute
* @return
*/
public boolean isMaximumGeneralizationAvailable(String attribute) {
checkQuasiIdentifier(attribute);
return maxGeneralization.containsKey(attribute) || (this.getHierarchy(attribute) != null);
}
/**
* Returns whether a minimum generalization level is available.
*
* @param attribute
* @return
*/
public boolean isMinimumGeneralizationAvailable(String attribute) {
checkQuasiIdentifier(attribute);
return true;
}
/**
* Reads all settings from the given definition
* @param definition
*/
public void read(DataDefinition other) {
// Clone and copy stuff
this.attributeTypes.clear();
this.attributeTypes.putAll(other.attributeTypes);
this.builders.clear();
this.builders.putAll(other.builders);
this.hierarchies.clear();
this.hierarchies.putAll(other.hierarchies);
this.functions.clear();
this.functions.putAll(other.functions);
this.dataTypes.clear();
this.dataTypes.putAll(other.dataTypes);
this.minGeneralization.clear();
this.minGeneralization.putAll(other.minGeneralization);
this.maxGeneralization.clear();
this.maxGeneralization.putAll(other.maxGeneralization);
}
/**
* Renders this object
* @return
*/
public List<ElementData> render() {
// Render attribute types
List<ElementData> result = new ArrayList<>();
result.add(render("Insensitive attributes", getInsensitiveAttributes()));
result.add(render("Sensitive attributes", getSensitiveAttributes()));
result.add(render("Identifying attributes", getIdentifyingAttributes()));
result.add(render("Quasi-identifying attributes", getQuasiIdentifyingAttributes()));
// Render hierarchies
Set<String> attributes = new HashSet<>();
attributes.addAll(getInsensitiveAttributes());
attributes.addAll(getSensitiveAttributes());
attributes.addAll(getIdentifyingAttributes());
attributes.addAll(getQuasiIdentifyingAttributes());
for (String attribute : attributes) {
if ((!this.functions.containsKey(attribute) || this.functions.get(attribute) == null ) &&
(this.hierarchies.containsKey(attribute) || this.builders.containsKey(attribute))) {
result.add(render(attribute, this.hierarchies.get(attribute), this.builders.get(attribute)));
}
}
for (String attribute : attributes) {
if (this.functions.containsKey(attribute) && this.functions.get(attribute) != null) {
result.add(render(attribute, this.functions.get(attribute)));
}
}
return result;
}
/**
* Resets the according setting
* @param attr
*/
public void resetAttributeType(String attr) {
this.attributeTypes.remove(attr);
}
/**
* Resets the according setting
* @param attr
*/
public void resetHierarchy(String attr) {
this.hierarchies.remove(attr);
}
/**
* Resets the according setting
* @param attr
*/
public void resetHierarchyBuilder(String attr) {
this.builders.remove(attr);
}
/**
* Resets the according setting
* @param attr
*/
public void resetMaximumGeneralization(String attr) {
this.minGeneralization.remove(attr);
}
/**
* Resets the according setting
* @param attr
*/
public void resetMicroAggregationFunction(String attr) {
this.functions.remove(attr);
}
/**
* Resets the according setting
* @param attr
*/
public void resetMinimumGeneralization(String attr) {
this.maxGeneralization.remove(attr);
}
/**
* Define the type of a given attribute.
*
* @param attribute
* @param type
*/
public void setAttributeType(final String attribute,
final AttributeType type) {
checkLocked();
checkNullArgument(type, "Type");
attributeTypes.put(attribute, type);
if (type instanceof Hierarchy) {
this.hierarchies.put(attribute, (Hierarchy)type);
} else if (type instanceof MicroAggregationFunction) {
this.functions.put(attribute, (MicroAggregationFunction)type);
}
}
/**
* Defines the given attribute as a quasi-identifier and stores the functional
* representation of the generalization hierarchy.
*
* @param attribute
* @param builder
*/
public void setAttributeType(final String attribute,
final HierarchyBuilder<?> builder) {
checkLocked();
checkNullArgument(builder, "Builder");
attributeTypes.put(attribute, AttributeType.QUASI_IDENTIFYING_ATTRIBUTE);
builders.put(attribute, builder);
}
/**
* Define the datatype of a given attribute.
*
* @param attribute
* @param type
*/
public void setDataType(final String attribute, final DataType<?> type) {
checkLocked();
checkNullArgument(type, "Type");
dataTypes.put(attribute, type);
}
/**
* Associates the given hierarchy
* @param attribute
* @param hierarchy
*/
public void setHierarchy(String attribute, Hierarchy hierarchy) {
this.hierarchies.put(attribute, hierarchy);
}
/**
* Associates the given hierarchy builder
* @param attribute
* @param builder
*/
public void setHierarchy(String attribute, HierarchyBuilder<?> builder) {
this.builders.put(attribute, builder);
}
/**
* Define the maximal generalization of a given attribute.
*
* @param attribute
* @param maximum
*/
public void setMaximumGeneralization(final String attribute,
final int maximum) {
checkLocked();
maxGeneralization.put(attribute, maximum);
}
/**
* Associates the given microaggregation function
* @param attribute
* @param builder
*/
public void setMicroAggregationFunction(String attribute, MicroAggregationFunction function) {
this.functions.put(attribute, function);
}
/**
* Define the minimal generalization of a given attribute.
*
* @param attribute
* @param minimum
*/
public void setMinimumGeneralization(final String attribute,
final int minimum) {
checkLocked();
minGeneralization.put(attribute, minimum);
}
/**
* Checks whether this handle is locked.
*
* @throws IllegalStateException
*/
private void checkLocked() throws IllegalStateException{
if (locked) {throw new IllegalStateException("This definition is currently locked");}
}
/**
* Checks whether the argument is null.
*
* @param argument
* @param name
* @throws IllegalArgumentException
*/
private void checkNullArgument(Object argument, String name) throws IllegalArgumentException {
if (argument == null) { throw new NullPointerException(name + " must not be null"); }
}
/**
* Checks whether the attribute is a quasi-identifier.
*
* @param attribute
* @throws IllegalArgumentException
*/
private void checkQuasiIdentifier(String attribute) throws IllegalArgumentException {
if (attributeTypes.get(attribute) == null ||
attributeTypes.get(attribute).getType() != AttributeType.ATTR_TYPE_QI) {
throw new IllegalArgumentException("Attribute ("+attribute+") is not a quasi-identifier");
}
}
/**
* Returns attributes by type
* @param type
* @return
*/
private Set<String> getAttributesByType(int type) {
final Set<String> result = new HashSet<String>();
for (final Entry<String, AttributeType> entry : attributeTypes.entrySet()) {
if (entry.getValue().getType() == type) {
result.add(entry.getKey());
}
}
return result;
}
/**
* Renders a hierarchy
* @param attribute
* @param hierarchy
* @param builder
*/
private ElementData render(String attribute, Hierarchy hierarchy, HierarchyBuilder<?> builder) {
ElementData result = new ElementData("Generalization hierarchy");
result.addProperty("Attribute", attribute);
if (hierarchy != null && hierarchy.getHierarchy() != null &&
hierarchy.getHierarchy().length != 0 && hierarchy.getHierarchy()[0] != null) {
result.addProperty("Height", hierarchy.getHierarchy()[0].length);
if (this.getQuasiIdentifyingAttributes().contains(attribute)) {
result.addProperty("Minimum level", this.getMinimumGeneralization(attribute));
result.addProperty("Maximum level", this.getMaximumGeneralization(attribute));
}
} else if (builder != null){
result.addProperty("Builder type", builder.getType().toString());
}
return result;
}
/**
* Renders a microaggregation function
* @param attribute
* @param function
* @return
*/
private ElementData render(String attribute, MicroAggregationFunction function) {
ElementData result = new ElementData("Microaggregation function");
result.addProperty("Attribute", attribute);
if (function != null) {
result.addProperty("Type", function.getLabel());
}
return result;
}
/**
* Renders a set of attributes
* @param title
* @param attributes
* @return
*/
private ElementData render(String title, Set<String> attributes) {
ElementData result = new ElementData(title);
if (attributes.isEmpty()) {
result.addItem("None");
} else {
for (String attribute : attributes) {
result.addProperty(attribute, this.getDataType(attribute).toString());
}
}
return result;
}
/**
* Materializes all functional hierarchies.
*
* @param handle
*/
protected void materializeHierarchies(DataHandle handle) {
// For each qi with generalization
for (String qi : this.getQuasiIdentifiersWithGeneralization()) {
// If no hierarchy is available
if (!isHierarchyAvailable(qi)) {
// Obtain data
String[] data = handle.getDistinctValues(handle.getColumnIndexOf(qi));
// If builder is available
if (isHierarchyBuilderAvailable(qi)) {
// Compute and store hierarchy
try {
this.hierarchies.put(qi, this.getHierarchyBuilder(qi).build(data));
} catch (Exception e) {
throw new IllegalStateException("Error building hierarchy for attribute ("+qi+")", e);
}
} else {
// Create empty hierarchy
String[][] hierarchy = new String[data.length][];
for (int i=0; i<data.length; i++) {
hierarchy[i] = new String[]{data[i]};
}
this.hierarchies.put(qi, Hierarchy.create(hierarchy));
}
}
}
// For each qi with microaggregation
for (String qi : this.getQuasiIdentifiersWithMicroaggregation()) {
if (this.getMicroAggregationFunction(qi).getFunction() instanceof DistributionAggregateFunctionGeneralization) {
// If no hierarchy is available
if (!isHierarchyAvailable(qi)) {
// Obtain data
String[] data = handle.getDistinctValues(handle.getColumnIndexOf(qi));
// If builder is available
if (isHierarchyBuilderAvailable(qi)) {
// Compute and store hierarchy
try {
this.hierarchies.put(qi, this.getHierarchyBuilder(qi).build(data));
} catch (Exception e) {
throw new IllegalStateException("Error building hierarchy for attribute ("+qi+")", e);
}
} else {
// Create empty hierarchy
String[][] hierarchy = new String[data.length][];
for (int i=0; i<data.length; i++) {
hierarchy[i] = new String[]{data[i]};
}
this.hierarchies.put(qi, Hierarchy.create(hierarchy));
}
}
}
}
}
/**
* Parses the configuration of the import adapter.
*
* @param adapter
*/
protected void parse(ImportAdapter adapter) {
String[] header = adapter.getHeader();
ImportConfiguration config = adapter.getConfig();
for (int i=0; i<config.getColumns().size(); i++){
this.setDataType(header[i], config.getColumns().get(i).getDataType());
}
}
/**
* Lock/unlock the definition.
*
* @param locked
*/
protected void setLocked(boolean locked){
this.locked = locked;
}
}