/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.metric.v2;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Arrays;
import java.util.Date;
import org.deidentifier.arx.DataType;
import org.deidentifier.arx.DataType.ARXDate;
import org.deidentifier.arx.DataType.ARXDecimal;
import org.deidentifier.arx.DataType.ARXInteger;
import org.deidentifier.arx.DataType.DataTypeWithRatioScale;
import org.deidentifier.arx.aggregates.HierarchyBuilderIntervalBased;
import com.carrotsearch.hppc.LongDoubleOpenHashMap;
/**
* This class represents a set of domain shares for an attribute. The shares are derived from a functional
* interval-based generalization hierarchy
*
* @author Fabian Prasser
* @param <T>
*/
public class DomainShareInterval<T> extends HierarchyBuilderIntervalBased<T> implements DomainShare {
/** SVUID. */
private static final long serialVersionUID = 3430961217394466615L;
/** The value representing a non-existent entry. */
private static final double NOT_AVAILABLE = -Double.MAX_VALUE;
/** The domain size. */
private double domainSize = 0d;
/** Data type. */
private DataTypeWithRatioScale<T> dataType;
/** One share per attribute. */
private final double[] shares;
/** If an attribute exists with different shares on different generalization levels, store the share in this map: <code>(((long)value) << 32) | (level & 0xffffffffL) -> share </code>. */
private transient LongDoubleOpenHashMap duplicates;
/**
* Creates a new set of domain shares derived from the given functional interval-based hierarchy.
*
* @param builder
* @param hierarchy
* @param dictionary
*/
@SuppressWarnings("unchecked")
public DomainShareInterval(HierarchyBuilderIntervalBased<T> builder,
int[][] hierarchy, String[] dictionary) {
// Super
super(builder.getDataType(), builder.getLowerRange(), builder.getUpperRange());
// Prepare
this.duplicates = new LongDoubleOpenHashMap();
this.shares = new double[dictionary.length];
Arrays.fill(shares, NOT_AVAILABLE);
// Copy intervals
for (Interval<T> interval : builder.getIntervals()) {
this.addInterval(interval);
}
// Copy levels
this.setLevels(builder.getLevels());
// Store ranges and type
this.dataType = (DataTypeWithRatioScale<T>)this.getDataType();
Range<T>[] ranges = this.getAdjustedRanges();
this.domainSize = toDouble(dataType.subtract(ranges[1].getLabelBound(), ranges[0].getLabelBound()));
// Prepare the array
String[] input = new String[hierarchy.length];
for (int i=0; i<hierarchy.length; i++) {
input[i] = dictionary[hierarchy[i][0]];
}
// Re-build the intervals
this.setData(input);
AbstractGroup[][] groups = this.prepareGroups();
// Sanity check
if (groups[0].length != hierarchy.length) {
throw new IllegalStateException("Invalid number of intervals");
} else if (groups.length != hierarchy[0].length - 1) {
throw new IllegalStateException("Invalid number of intervals");
}
for (int i=0; i<hierarchy.length; i++) {
for (int level = 0; level < hierarchy[i].length; level++) {
int value = hierarchy[i][level];
double stored = shares[value];
double share = 0d;
if (level == 0) {
share = 1d / domainSize;
} else {
AbstractGroup group = groups[level - 1][i];
if (group instanceof Interval) {
Interval<T> interval = (Interval<T>)group;
if (interval.isOutOfBound()) {
if (interval.isOutOfLowerBound()) {
share = toDouble(dataType.subtract(builder.getLowerRange().getSnapBound(),
builder.getLowerRange().getLabelBound())) / domainSize;
} else {
share = toDouble(dataType.subtract(builder.getUpperRange().getLabelBound(),
builder.getUpperRange().getSnapBound())) / domainSize;
}
} else if (interval.isNullInterval()) {
share = 1d / domainSize;
} else {
share = toDouble(dataType.subtract(interval.getMax(), interval.getMin())) / domainSize;
}
} else {
// Special case, '*' at the end
share = 1d;
}
}
// If duplicate
if (stored != NOT_AVAILABLE) {
// If same share, simply continue
if (stored == share) {
continue;
}
// Mark as duplicate, if not already marked
if (stored >= 0d) {
shares[value] = -shares[value];
}
// Store duplicate value
long dkey = (((long) value) << 32) | (level & 0xffffffffL);
duplicates.put(dkey, share);
// If its not a duplicate, simply store
} else {
shares[value] = share;
}
}
}
}
/**
* Creates a new instance
*/
@SuppressWarnings("unchecked")
private DomainShareInterval(double domainSize,
DataType<T> dataType,
Range<T> lower, Range<T> upper,
double[] shares,
LongDoubleOpenHashMap duplicates) {
super(dataType, lower, upper);
this.domainSize = domainSize;
this.dataType = (DataTypeWithRatioScale<T>)dataType;
this.shares = shares;
this.duplicates = duplicates;
}
@Override
public DomainShareInterval<T> clone() {
return new DomainShareInterval<T>(this.domainSize, this.getDataType(),
this.getLowerRange(), this.getUpperRange(),
this.shares.clone(), this.duplicates.clone());
}
/**
* Returns the size of the domain.
*
* @return
*/
@Override
public double getDomainSize() {
return this.domainSize;
}
/**
* Returns the share of the given value.
*
* @param value
* @param level
* @return
*/
@Override
public double getShare(int value, int level) {
double share = shares[value];
if (share >= 0) {
return share;
} else {
long key = (((long) value) << 32) | (level & 0xffffffffL);
return duplicates.getOrDefault(key, -share);
}
}
/**
* De-serialization.
*
* @param aInputStream
* @throws ClassNotFoundException
* @throws IOException
*/
private void readObject(ObjectInputStream aInputStream) throws ClassNotFoundException, IOException {
// Default de-serialization
aInputStream.defaultReadObject();
// Read map
duplicates = IO.readLongDoubleOpenHashMap(aInputStream);
}
/**
* Converts the given value of the attribute's data type to a double.
*
* @param value
* @return
*/
private double toDouble(T value) {
if (this.dataType instanceof ARXDate) {
return ((Date)value).getTime();
} else if (this.dataType instanceof ARXDecimal) {
return (Double)value;
} else if (this.dataType instanceof ARXInteger) {
return (Long)value;
} else {
throw new IllegalStateException("Unknown data type");
}
}
/**
* Serialization.
*
* @param aOutputStream
* @throws IOException
*/
private void writeObject(ObjectOutputStream aOutputStream) throws IOException {
// Default serialization
aOutputStream.defaultWriteObject();
// Write map
IO.writeLongDoubleOpenHashMap(aOutputStream, duplicates);
}
}