/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.framework.data; import com.carrotsearch.hppc.IntIntOpenHashMap; import com.carrotsearch.hppc.IntOpenHashSet; /** * The class GeneralizationHierarchy. * * @author Fabian Prasser * @author Florian Kohlmayer */ public class GeneralizationHierarchy { /** Level->number of distinct values. */ protected final int[] distinctValues; /** Input->level->output. */ protected final int[][] map; /** Name. */ protected final String attribute; /** * Creates a new generalization hierarchy. * * @param name * @param hierarchy * @param dimension * @param dictionary */ public GeneralizationHierarchy(final String name, final String[][] hierarchy, final int dimension, final Dictionary dictionary) { // Check if (hierarchy == null || hierarchy.length == 0) { throw new RuntimeException("Empty generalization hierarchy for attribute '" + name + "'"); } // Init this.attribute = name; final int height = hierarchy[0].length; // Determine number of unique input values final int uniqueIn = dictionary.getNumUniqueUnfinalizedValues(dimension); // Build hierarchy map = new int[uniqueIn][height]; for (int i = 0; i < hierarchy.length; i++) { final String[] input = hierarchy[i]; final Integer key = dictionary.probe(dimension, input[0]); if (key != null && key < uniqueIn) { for (int j = 0; j < input.length; j++) { final String value = input[j]; final int incode = dictionary.register(dimension, value); map[key][j] = incode; } } } // Count distinct values on each level distinctValues = new int[height]; final IntOpenHashSet vals = new IntOpenHashSet(); // for each column for (int i = 0; i < map[0].length; i++) { for (int k = 0; k < map.length; k++) { vals.add(map[k][i]); } distinctValues[i] = vals.size(); vals.clear(); } // Sanity check if (distinctValues[0] < uniqueIn) { throw new IllegalArgumentException("Attribute '" + name + "': hierarchy misses some values or contains duplicates"); } } /** * Can be used to create a copy of the generalization hierarchy. * * @param name * @param map * @param distinctValues */ protected GeneralizationHierarchy(final String name, final int[][] map, final int[] distinctValues) { this.attribute = name; this.map = map; this.distinctValues = distinctValues; } /** * Throws an exception, if the hierarchy is not monotonic. * * TODO: This is a potentially expensive check that should be done when loading the hierarchy * * @param manager */ public void checkMonotonicity(DataManager manager) { // Obtain dictionary String[] dictionary = null; String[] header = manager.getDataGeneralized().getHeader(); for (int i=0; i<header.length; i++) { if (header[i].equals(attribute)) { dictionary = manager.getDataGeneralized().getDictionary().getMapping()[i]; } } // Check if (dictionary==null) { throw new IllegalStateException("Cannot obtain dictionary for attribute ("+attribute+")"); } // Level value -> level+1 value final IntIntOpenHashMap hMap = new IntIntOpenHashMap(); // Input->level->output. for (int level = 0; level < (map[0].length - 1); level++) { hMap.clear(); for (int i = 0; i < map.length; i++) { final int outputCurrentLevel = map[i][level]; final int outputNextLevel = map[i][level + 1]; if (hMap.containsKey(outputCurrentLevel)) { final int compare = hMap.get(outputCurrentLevel); if (compare != outputNextLevel) { String in = dictionary[outputCurrentLevel]; String out1 = dictionary[compare]; String out2 = dictionary[outputNextLevel]; throw new IllegalArgumentException("The transformation rule for the attribute '" + attribute + "' is not a hierarchy. ("+in+") can either be transformed to ("+out1+") or to ("+out2+")"); } } else { hMap.put(outputCurrentLevel, outputNextLevel); } } } } /** * Returns the array. * * @return */ public int[][] getArray() { return map; } /** * Returns the number of distinct values. * * @return */ public int[] getDistinctValues() { return distinctValues; } /** * Returns the distinct values. * * @param level * @return */ public int[] getDistinctValues(final int level) { final IntOpenHashSet vals = new IntOpenHashSet(); for (int k = 0; k < map.length; k++) { vals.add(map[k][level]); } final int[] result = new int[vals.size()]; final int[] keys = vals.keys; final boolean[] allocated = vals.allocated; int index = 0; for (int i = 0; i < allocated.length; i++) { if (allocated[i]) { result[index++] = keys[i]; } } return result; } /** * Returns the height of the hierarchy. * * @return */ public int getHeight() { return map[0].length; } /** * Returns the name. * * @return */ public String getName() { return attribute; } }