/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx.framework.check.history;
import java.util.HashMap;
import java.util.Iterator;
import org.deidentifier.arx.ARXConfiguration;
import org.deidentifier.arx.ARXConfiguration.ARXConfigurationInternal;
import org.deidentifier.arx.framework.check.distribution.Distribution;
import org.deidentifier.arx.framework.check.distribution.IntArrayDictionary;
import org.deidentifier.arx.framework.check.groupify.HashGroupify;
import org.deidentifier.arx.framework.check.groupify.HashGroupifyEntry;
import org.deidentifier.arx.framework.lattice.DependentAction;
import org.deidentifier.arx.framework.lattice.SolutionSpace;
import org.deidentifier.arx.framework.lattice.Transformation;
/**
* The Class History.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class History {
/**
* Two types of storage strategies for the history
* @author Fabian Prasser
*/
public static enum StorageStrategy {
ALL,
NON_ANONYMOUS
}
/** The actual buffer. */
private MRUCache<MRUCacheEntryMetadata> cache = null;
/** Current configuration. */
private final ARXConfigurationInternal config;
/** The dictionary for frequencies of the distributions. */
private final IntArrayDictionary dictionarySensFreq;
/** The dictionary for values of the distributions. */
private final IntArrayDictionary dictionarySensValue;
/** A map from nodes to snapshots. */
private HashMap<Long, int[]> nodeToSnapshot = null;
/** The current requirements. */
private final int requirements;
/** The node backing the last returned snapshot. */
private MRUCacheEntryMetadata resultMetadata;
/** Maximal number of entries. */
private int size;
/** The snapshotSizeDataset for the size of entries. */
private final long snapshotSizeDataset;
/** The snapshotSizeDataset for the minimum required reduction of a snapshot. */
private final double snapshotSizeSnapshot;
/** The solution space */
private final SolutionSpace solutionSpace;
/** Store the results of all types of transformations. */
private final DependentAction STORAGE_TRIGGER_ALL = new DependentAction(){
@Override
public boolean appliesTo(Transformation node) {
return true;
}
};
/** Store only the results of non-anonymous transformations. */
private final DependentAction STORAGE_TRIGGER_NON_ANONYMOUS = new DependentAction(){
@Override
public boolean appliesTo(Transformation node) {
return node.hasProperty(solutionSpace.getPropertyNotAnonymous());
}
};
/** The current storage strategy. */
private DependentAction storageTrigger;
/**
* Creates a new history.
*
* @param rowCount the row count
* @param size the max size
* @param snapshotSizeDataset the snapshotSizeDataset
* @param snapshotSizeSnapshot
* @param config
* @param dictionarySensValue
* @param dictionarySensFreq
* @param solutionSpace
*/
public History(final int rowCount,
final int size,
final double snapshotSizeDataset,
final double snapshotSizeSnapshot,
final ARXConfigurationInternal config,
final IntArrayDictionary dictionarySensValue,
final IntArrayDictionary dictionarySensFreq,
final SolutionSpace solutionSpace) {
this.snapshotSizeDataset = (long) (rowCount * snapshotSizeDataset);
this.snapshotSizeSnapshot = snapshotSizeSnapshot;
this.cache = new MRUCache<MRUCacheEntryMetadata>(size);
this.nodeToSnapshot = new HashMap<Long, int[]>(size);
this.size = size;
this.dictionarySensFreq = dictionarySensFreq;
this.dictionarySensValue = dictionarySensValue;
this.config = config;
this.requirements = config.getRequirements();
this.storageTrigger = STORAGE_TRIGGER_NON_ANONYMOUS;
this.solutionSpace = solutionSpace;
}
/**
* Retrieves a snapshot.
*
* @param transformation
* @return snapshot
*/
public int[] get(final int[] transformation) {
// Init
int[] resultSnapshot = null;
MRUCacheEntryMetadata resultMetadata = null;
int level = solutionSpace.getLevel(transformation);
// Search
MRUCacheEntry<MRUCacheEntryMetadata> entry = cache.getHead();
while (entry != null) {
MRUCacheEntryMetadata currentMetadata = entry.data;
if (currentMetadata.level < level) {
final int[] currentSnapshot = nodeToSnapshot.get(currentMetadata.id);
if ((resultMetadata == null) || (currentSnapshot.length < resultSnapshot.length)) {
if (solutionSpace.isParentChildOrEqual(transformation, currentMetadata.transformation)) {
resultMetadata = currentMetadata;
resultSnapshot = currentSnapshot;
}
}
}
entry = entry.next;
}
// Manager
if (resultMetadata != null) {
cache.touch(resultMetadata);
}
this.resultMetadata = resultMetadata;
// Return
return resultSnapshot;
}
/**
* Method needed for benchmarking.
*
* @return
*/
public IntArrayDictionary getDictionarySensFreq() {
return dictionarySensFreq;
}
/**
* Method needed for benchmarking.
*
* @return
*/
public IntArrayDictionary getDictionarySensValue() {
return dictionarySensValue;
}
/**
* Returns the current storage strategy.
*
* @return
*/
public DependentAction getStorageTrigger() {
return storageTrigger;
}
/**
* Returns the node backing the last returned snapshot.
*
* @return
*/
public int[] getTransformation() {
if (resultMetadata == null) {
return null;
} else {
return resultMetadata.transformation;
}
}
/**
* Clears the history.
*/
public void reset() {
this.cache.clear();
this.nodeToSnapshot.clear();
this.dictionarySensFreq.clear();
this.dictionarySensValue.clear();
this.resultMetadata = null;
}
/**
* Sets the size of this history.
*
* @param size
*/
public void setSize(int size) {
this.size = size;
}
/**
* Sets the storage strategy
* @param strategy
*/
public void setStorageStrategy(StorageStrategy strategy) {
if (strategy == StorageStrategy.ALL) {
this.storageTrigger = STORAGE_TRIGGER_ALL;
} else if (strategy == StorageStrategy.NON_ANONYMOUS) {
this.storageTrigger = STORAGE_TRIGGER_NON_ANONYMOUS;
}
}
/**
*
*
* @return
*/
public int size() {
return cache.size();
}
/**
* Stores a snapshot in the buffer.
*
* @param transformation The transformation
* @param groupify The groupify operator
* @param snapshot The snapshot that was previously used, if any
* @return
*/
public boolean store(final Transformation transformation, final HashGroupify groupify, final int[] snapshot) {
// Early abort if too large, or no space
if (size == 0 || groupify.getNumberOfEquivalenceClasses() > snapshotSizeDataset) {
return false;
}
// Early abort if too large
if (snapshot != null) {
final double relativeSize = (groupify.getNumberOfEquivalenceClasses() / ((double) snapshot.length / config.getSnapshotLength()));
if (relativeSize > snapshotSizeSnapshot) { return false; }
}
// Early abort if conditions are not triggered
if (!transformation.hasProperty(solutionSpace.getPropertyForceSnapshot()) &&
(transformation.hasProperty(solutionSpace.getPropertySuccessorsPruned()) || !storageTrigger.appliesTo(transformation))) {
return false;
}
// Clear the cache
cleanUpHistory();
// Perform LRU eviction, if still too large
if (cache.size() >= size) {
removeHistoryEntry(cache.removeHead());
}
// Create the snapshot
final int[] data = createSnapshot(groupify);
// assign snapshot and keep reference for cache
nodeToSnapshot.put(transformation.getIdentifier(), data);
cache.append(new MRUCacheEntryMetadata(transformation));
return true;
}
/**
* Remove pruned entries from the cache.
*/
private final void cleanUpHistory() {
final Iterator<MRUCacheEntryMetadata> metadata = cache.iterator();
while (metadata.hasNext()) {
final MRUCacheEntryMetadata node = metadata.next();
if (solutionSpace.hasProperty(node.transformation, solutionSpace.getPropertySuccessorsPruned())) {
metadata.remove();
removeHistoryEntry(node);
}
}
}
/**
* Creates a generic snapshot for all criteria.
*
* @param g the g
* @return the int[]
*/
private final int[] createSnapshot(final HashGroupify g) {
final int[] data = new int[g.getNumberOfEquivalenceClasses() * config.getSnapshotLength()];
int index = 0;
HashGroupifyEntry m = g.getFirstEquivalenceClass();
while (m != null) {
// Store element
data[index] = m.representative;
data[index + 1] = m.count;
// Add data for different requirements
switch (requirements) {
case ARXConfiguration.REQUIREMENT_COUNTER:
// do nothing
break;
case ARXConfiguration.REQUIREMENT_COUNTER | ARXConfiguration.REQUIREMENT_SECONDARY_COUNTER:
data[index + 2] = m.pcount;
break;
case ARXConfiguration.REQUIREMENT_COUNTER | ARXConfiguration.REQUIREMENT_SECONDARY_COUNTER | ARXConfiguration.REQUIREMENT_DISTRIBUTION:
data[index + 2] = m.pcount;
for (int i=0; i<m.distributions.length; i++) {
Distribution distribution = m.distributions[i];
distribution.pack();
data[index + 3 + i * 2] = dictionarySensValue.probe(distribution.getPackedElements());
data[index + 4 + i * 2] = dictionarySensFreq.probe(distribution.getPackedFrequency());
}
break;
// TODO: If we only need a distribution, we should get rid of the primary counter
case ARXConfiguration.REQUIREMENT_COUNTER | ARXConfiguration.REQUIREMENT_DISTRIBUTION:
case ARXConfiguration.REQUIREMENT_DISTRIBUTION:
for (int i=0; i<m.distributions.length; i++) {
Distribution distribution = m.distributions[i];
distribution.pack();
data[index + 2 + i * 2] = dictionarySensValue.probe(distribution.getPackedElements());
data[index + 3 + i * 2] = dictionarySensFreq.probe(distribution.getPackedFrequency());
}
break;
default:
throw new RuntimeException("Invalid requirements: " + requirements);
}
index += config.getSnapshotLength();
// Next element
m = m.nextOrdered;
}
return data;
}
/**
* Removes a snapshot.
*
* @param metadata
*/
private final void removeHistoryEntry(final MRUCacheEntryMetadata metadata) {
final int[] snapshot = nodeToSnapshot.remove(metadata.id);
switch (requirements) {
case ARXConfiguration.REQUIREMENT_COUNTER | ARXConfiguration.REQUIREMENT_SECONDARY_COUNTER | ARXConfiguration.REQUIREMENT_DISTRIBUTION:
for (int i = 0; i < snapshot.length; i += config.getSnapshotLength()) {
for (int j = i + 3; j < i + config.getSnapshotLength() - 1; j += 2) {
dictionarySensValue.decrementRefCount(snapshot[j]);
dictionarySensFreq.decrementRefCount(snapshot[j+1]);
}
}
break;
// TODO: If we only need a distribution, we should get rid of the primary counter
case ARXConfiguration.REQUIREMENT_COUNTER | ARXConfiguration.REQUIREMENT_DISTRIBUTION:
case ARXConfiguration.REQUIREMENT_DISTRIBUTION:
for (int i = 0; i < snapshot.length; i += config.getSnapshotLength()) {
for (int j = i + 2; j < i + config.getSnapshotLength() - 1; j += 2) {
dictionarySensValue.decrementRefCount(snapshot[j]);
dictionarySensFreq.decrementRefCount(snapshot[j+1]);
}
}
}
}
}