/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Arrays;
import com.carrotsearch.hppc.LongDoubleOpenHashMap;
import com.carrotsearch.hppc.ObjectIntOpenHashMap;
/**
* This class represents a set of domain shares for an attribute. The shares are derived from a materialized
* generalization hierarchy. It is assumed that the complete domain of the attribute is represented in
* the hierarchy.
*
* @author Fabian Prasser
*/
public class DomainShareMaterialized implements DomainShare {
/** SVUID. */
private static final long serialVersionUID = -8981924690395236648L;
/** The value representing a non-existent entry. */
private static final double NOT_AVAILABLE = -Double.MAX_VALUE;
/** The size of the domain. */
private final double size;
/** One share per attribute. */
private final double[] shares;
/** If an attribute exists with different shares on different generalization levels, store the share in this map: <code>(((long)value) << 32) | (level & 0xffffffffL) -> share </code>. */
private transient LongDoubleOpenHashMap duplicates;
/**
* Creates a new set of domain shares derived from the given attribute.
*
* @param rawHierarchy
* @param encodedValues
* @param encodedHierarchy
*/
public DomainShareMaterialized(String[][] rawHierarchy,
String[] encodedValues,
int[][] encodedHierarchy) {
this.size = rawHierarchy.length;
this.duplicates = new LongDoubleOpenHashMap();
this.shares = new double[encodedValues.length];
Arrays.fill(shares, NOT_AVAILABLE);
@SuppressWarnings("unchecked")
ObjectIntOpenHashMap<String>[] maps = new ObjectIntOpenHashMap[rawHierarchy[0].length];
for (int level = 0; level < maps.length; level++) {
maps[level] = new ObjectIntOpenHashMap<String>();
}
// First, compute the share for each generalization strategy
for (int value = 0; value < rawHierarchy.length; value++) {
String[] transformation = rawHierarchy[value];
for (int level = 0; level < transformation.length; level++) {
ObjectIntOpenHashMap<String> map = maps[level];
String key = transformation[level];
if (!map.containsKey(key)) {
map.put(key, 0);
}
map.put(key, map.get(key) + 1);
}
}
// Now transform into an array representation and handle duplicates
for (int row = 0; row < encodedHierarchy.length; row++) {
int[] strategy = encodedHierarchy[row];
for (int level = 0; level < strategy.length; level++){
ObjectIntOpenHashMap<String> map = maps[level];
int value = strategy[level];
String keyString = encodedValues[value];
double share = (double) map.get(keyString) / size;
double stored = shares[value];
// If duplicate
if (stored != NOT_AVAILABLE) {
// If same share, simply continue
if (stored == share) {
continue;
}
// Mark as duplicate, if not already marked
if (stored >= 0d) {
shares[value] = -shares[value];
}
// Store duplicate value
long dkey = (((long) value) << 32) | (level & 0xffffffffL);
duplicates.put(dkey, share);
// If its not a duplicate, simply store
} else {
shares[value] = share;
}
}
}
}
/**
* Clone constructor
* @param size
* @param shares
* @param duplicates
*/
private DomainShareMaterialized(double size, double[] shares, LongDoubleOpenHashMap duplicates) {
this.size = size;
this.shares = shares;
this.duplicates = duplicates;
}
@Override
public DomainShareMaterialized clone() {
return new DomainShareMaterialized(this.size, this.shares.clone(), this.duplicates.clone());
}
/**
* Returns the size of the domain.
*
* @return
*/
@Override
public double getDomainSize() {
return size;
}
/**
* Returns the share of the given value.
*
* @param value
* @param level
* @return
*/
@Override
public double getShare(int value, int level) {
double share = shares[value];
if (share >= 0) {
return share;
} else {
long key = (((long) value) << 32) | (level & 0xffffffffL);
return duplicates.getOrDefault(key, -share);
}
}
/**
* De-serialization.
*
* @param aInputStream
* @throws ClassNotFoundException
* @throws IOException
*/
private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException {
// Default de-serialization
aInputStream.defaultReadObject();
// Read map
duplicates = IO.readLongDoubleOpenHashMap(aInputStream);
}
/**
* Serialization.
*
* @param aOutputStream
* @throws IOException
*/
private void writeObject(ObjectOutputStream aOutputStream) throws IOException {
// Default serialization
aOutputStream.defaultWriteObject();
// Write map
IO.writeLongDoubleOpenHashMap(aOutputStream, duplicates);
}
}