/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.aggregates;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import org.deidentifier.arx.AttributeType.Hierarchy;
import org.deidentifier.arx.DataType;
/**
* This abstract base class enables building hierarchies for categorical and non-categorical values.
*
* @author Fabian Prasser
* @param <T>
*/
public abstract class HierarchyBuilderGroupingBased<T> extends HierarchyBuilder<T> implements Serializable {
/**
* This class represents a fanout parameter.
*
* @author Fabian Prasser
* @param <U>
*/
public static class Group<U> implements Serializable {
/** TODO */
private static final long serialVersionUID = -5767501048737045793L;
/** Fanout. */
private final int size;
/** Aggregate function. */
private final AggregateFunction<U> function;
/**
* Creates a new instance.
*
* @param size
* @param function
*/
private Group(int size, AggregateFunction<U> function) {
if (size<=0) {
throw new IllegalArgumentException("Size must be >= 0");
}
if (function==null) {
throw new IllegalArgumentException("Function must not be null");
}
this.size = size;
this.function = function;
}
/**
* @return the function
*/
public AggregateFunction<U> getFunction() {
return function;
}
/**
* @return the size
*/
public int getSize() {
return size;
}
@Override
public String toString(){
return "Group[length="+size+", function="+function.toString()+"]";
}
}
/**
* This class represents a level in the hierarchy.
*
* @author Fabian Prasser
* @param <U>
*/
public static class Level<U> implements Serializable{
/** TODO */
private static final long serialVersionUID = 1410005675926162598L;
/** Level. */
private final int level;
/** List of groups. */
private final List<Group<U>> list = new ArrayList<Group<U>>();
/** Builder. */
private final HierarchyBuilderGroupingBased<U> builder;
/**
* Creates a new instance.
*
* @param builder
* @param level
*/
private Level(HierarchyBuilderGroupingBased<U> builder, int level) {
this.level = level;
this.builder = builder;
}
/**
* Adds the given group with the default aggregate function.
*
* @param size
* @return
*/
public Level<U> addGroup(int size) {
if (builder.getDefaultFunction() == null) {
throw new IllegalStateException("No default aggregate function defined");
}
this.list.add(new Group<U>(size, builder.getDefaultFunction()));
builder.setPrepared(false);
return this;
}
/**
* Adds the given group with the given aggregate function.
*
* @param size
* @param function
* @return
*/
public Level<U> addGroup(int size, AggregateFunction<U> function) {
this.list.add(new Group<U>(size, function));
builder.setPrepared(false);
return this;
}
/**
* Adds the given group. The result will be labeled with the given string
* @param size
* @param label
* @return
*/
public Level<U> addGroup(int size, String label) {
this.list.add(new Group<U>(size, AggregateFunction.forType(builder.getDataType()).createConstantFunction(label)));
builder.setPrepared(false);
return this;
}
/**
* Removes all groups on this level.
*
* @return
*/
public Level<U> clearGroups() {
this.list.clear();
builder.setPrepared(false);
return this;
}
/**
* Returns the list.
*
* @return
*/
@SuppressWarnings("unchecked")
public List<Group<U>> getGroups(){
return (List<Group<U>>)((ArrayList<Group<U>>)this.list).clone();
}
/**
* @return the level
*/
public int getLevel() {
return level;
}
@Override
public String toString(){
StringBuilder b = new StringBuilder();
b.append("Level[height="+level+"]\n");
for (int i=0, length=list.size(); i<length; i++){
Group<U> fanout = list.get(i);
b.append(" ").append(fanout.toString());
if (i<length-1) b.append("\n");
}
return b.toString();
}
}
/**
* A group representation to be used by subclasses.
*
* @author Fabian Prasser
*/
protected abstract static class AbstractGroup implements Serializable {
/** TODO */
private static final long serialVersionUID = -7657969446040078411L;
/** TODO */
private String label;
/**
*
*
* @param label
*/
protected AbstractGroup(String label){
this.label = label;
}
/**
*
*
* @return
*/
protected String getLabel(){
return label;
}
}
/** TODO */
private static final long serialVersionUID = 3208791665131141362L;
/** The data array. */
private transient String[] data;
/** All fanouts for each level. */
private Map<Integer, Level<T>> groups = new HashMap<Integer, Level<T>>();
/** The groups on the first level. */
private transient AbstractGroup[][] abstractGroups;
/** Are we ready to go. */
private transient boolean prepared = false;
/** The data type. */
private DataType<T> datatype;
/** The default aggregate function, might be null. */
protected AggregateFunction<T> function;
/**
* Creates a new instance for the given data type.
*
* @param type
* @param datatype
*/
protected HierarchyBuilderGroupingBased(Type type, DataType<T> datatype){
super(type);
this.datatype = datatype;
}
/**
* Creates a new hierarchy, based on the predefined specification.
*
* @return
*/
public Hierarchy build(){
if (!prepared) {
throw new IllegalStateException("Please call prepare() first");
}
// Add input data
String[][] result = new String[data.length][abstractGroups.length + 1];
for (int i=0; i<result.length; i++) {
result[i] = new String[abstractGroups.length + 1];
result[i][0] = data[i];
}
// Add levels
for (int i=0; i<result[0].length - 1; i++){
Map<String, Map<AbstractGroup, String>> multiplicities = new HashMap<String, Map<AbstractGroup, String>>();
for (int j=0; j<result.length; j++){
result[j][i + 1] = getLabel(multiplicities, abstractGroups[i][j]);
}
}
Hierarchy h = Hierarchy.create(result);
this.prepared = false;
this.data = null;
this.abstractGroups = null;
return h;
}
/**
* Creates a new hierarchy, based on the predefined specification.
*
* @param data
* @return
*/
public Hierarchy build(String[] data){
prepare(data);
return build();
}
/**
* Returns the data type.
*
* @return
*/
public DataType<T> getDataType(){
return this.datatype;
}
/**
* Returns the default aggregate function.
*
* @return
*/
public AggregateFunction<T> getDefaultFunction(){
return this.function;
}
/**
* Returns the given level.
*
* @param level
* @return
*/
public Level<T> getLevel(int level){
if (!this.groups.containsKey(level)) {
this.groups.put(level, new Level<T>(this, level));
this.setPrepared(false);
}
return this.groups.get(level);
}
/**
* Returns all currently defined levels.
*
* @return
*/
public List<Level<T>> getLevels(){
List<Level<T>> levels = new ArrayList<Level<T>>();
levels.addAll(this.groups.values());
Collections.sort(levels, new Comparator<Level<T>>(){
@Override
public int compare(Level<T> o1,
Level<T> o2) {
return new Integer(o1.getLevel()).compareTo(new Integer(o2.getLevel()));
}
});
return levels;
}
/**
* Returns whether the current configuration is valid. Returns <code>null</code>, if so, an error message
* if not.
* @return
*/
public String isValid() {
// Check fanouts
int max = 0;
for (Entry<Integer, Level<T>> level : this.groups.entrySet()) {
if (level.getValue().getGroups().isEmpty()) {
if (level.getKey() < this.groups.size()-1) {
return "No group specified on level "+level.getKey();
}
}
max = Math.max(level.getKey(), max);
}
for (int i=0; i<max; i++){
if (!this.groups.containsKey(i)) {
return "Missing specification for level "+i;
} else if (this.groups.get(i).getGroups().isEmpty()) {
return "Missing specification for level "+i;
}
}
return null;
}
/**
* Prepares the builder. Returns a list of the number of equivalence classes per level
*
* @param data
* @return
*/
public int[] prepare(String[] data){
this.data = data;
String error = this.isValid();
if (error != null) {
throw new IllegalArgumentException(error);
}
this.abstractGroups = prepareGroups();
this.prepared = true;
// TODO: This assumes that input data does not contain duplicates
int[] result = new int[this.abstractGroups.length + 1];
result[0] = data.length;
for (int i=0; i<result.length - 1; i++){
Set<AbstractGroup> set = new HashSet<AbstractGroup>();
for (int j=0; j<this.abstractGroups[i].length; j++){
set.add(abstractGroups[i][j]);
}
result[i + 1] = set.size();
}
return result;
}
/**
* Sets the default aggregate function to be used by all fanouts.
*
* @param function
*/
public void setAggregateFunction(AggregateFunction<T> function){
if (function == null) {
throw new IllegalArgumentException("Function must not be null");
}
this.function = function;
}
/**
* Returns the label for a given group. Makes sure that no labels are returned twice
* @param multiplicities
* @param group
* @return
*/
private String getLabel(Map<String, Map<AbstractGroup, String>> multiplicities, AbstractGroup group) {
String label = group.getLabel();
Map<AbstractGroup, String> map = multiplicities.get(label);
if (map == null) {
map = new HashMap<AbstractGroup, String>();
map.put(group, label);
multiplicities.put(label, map);
return label;
} else {
String storedLabel = map.get(group);
if (storedLabel != null) {
return storedLabel;
} else {
label +="-"+map.size();
map.put(group, label);
return label;
}
}
}
/**
* Returns the data array.
*
* @return
*/
protected String[] getData(){
return data;
}
/**
* Returns the prepared groups for recursion.
*
* @return
*/
protected AbstractGroup[][] getPreparedGroups(){
return this.abstractGroups;
}
/**
* Tells the implementing class to prepare the generalization process.
*
* @return
*/
protected abstract AbstractGroup[][] prepareGroups();
/**
* Sets the data array.
*
* @param data
*/
protected void setData(String[] data){
this.data = data;
}
/**
* Sets the groups on higher levels of the hierarchy.
*
* @param levels
*/
protected void setLevels(List<Level<T>> levels) {
for (Level<T> level : levels) {
this.groups.put(level.getLevel(), level);
}
}
/**
* Is this builder prepared allready.
*
* @param prepared
*/
protected void setPrepared(boolean prepared){
this.prepared = prepared;
if (prepared == false) {
this.abstractGroups = null;
}
}
}