/*
* ARX: Powerful Data Anonymization
* Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.deidentifier.arx;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.deidentifier.arx.ARXLattice.ARXNode;
import org.deidentifier.arx.ARXLattice.Anonymity;
import org.deidentifier.arx.DataHandleInternal.InterruptHandler;
import org.deidentifier.arx.aggregates.StatisticsBuilder;
import org.deidentifier.arx.framework.data.Data;
import org.deidentifier.arx.framework.data.DataManager;
import org.deidentifier.arx.framework.data.DataManager.AttributeTypeInternal;
import org.deidentifier.arx.framework.data.Dictionary;
/**
* An implementation of the class DataHandle for output data.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class DataHandleOutput extends DataHandle {
/**
* The class ResultIterator.
*
* @author Fabian Prasser
* @author Florian Kohlmayer
*/
public class ResultIterator implements Iterator<String[]> {
/** The current row. */
private int row = -1;
@Override
public boolean hasNext() {
return row < outputGeneralized.getArray().length;
}
@Override
public String[] next() {
String[] result = null;
/* write header */
if (row == -1) {
result = header;
/* write a normal row */
} else {
// Create row
result = new String[header.length];
for (int i = 0; i < result.length; i++) {
result[i] = internalGetValue(row, i, false);
}
}
row++;
return result;
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
}
/** The data. */
private Data inputAnalyzed;
/** The data. */
private Data inputStatic;
/** An inverse map to data arrays. */
private int[][][] inverseData;
/** An inverse map to dictionaries. */
private Dictionary[] inverseDictionaries;
/** An inverse map for column indices. map[i*2]=attribute type, map[i*2+1]=index position. */
private int[] inverseMap;
/** The start index of the MA attributes in the dataDI */
private final int microaggregationStartIndex;
/** The data. */
private Data outputGeneralized;
/** The data. */
private Data outputMicroaggregated;
/** The current result. */
private ARXResult result;
/** Suppression handling. */
private final int suppressedAttributeTypes;
/** Flag determining whether this buffer has been optimized */
private boolean optimized = false;
/** Flag determining whether this buffer is anonymous */
private boolean anonymous = false;
/**
* Instantiates a new handle.
*
* @param result
* @param registry
* @param manager
* @param outputGeneralized
* @param outputMicroaggregated
* @param node
* @param definition
* @param config
*/
protected DataHandleOutput(final ARXResult result,
final DataRegistry registry,
final DataManager manager,
final Data outputGeneralized,
final Data outputMicroaggregated,
final ARXNode node,
final DataDefinition definition,
final ARXConfiguration config) {
registry.updateOutput(node, this);
this.setRegistry(registry);
// Init
this.suppressedAttributeTypes = convert(config.getSuppressedAttributeTypes());
this.result = result;
this.definition = definition;
this.anonymous = node.getAnonymity() == Anonymity.ANONYMOUS;
this.node = node;
// Extract data
this.outputGeneralized = outputGeneralized;
this.outputMicroaggregated = outputMicroaggregated;
this.inputAnalyzed = manager.getDataAnalyzed();
this.inputStatic = manager.getDataStatic();
this.header = manager.getHeader();
this.microaggregationStartIndex = manager.getMicroaggregationStartIndex();
// Build map inverse
this.inverseMap = new int[header.length * 2];
// Init with attribute type ID
for (int i = 0; i < this.inverseMap.length; i += 2) {
this.inverseMap[i] = AttributeTypeInternal.IDENTIFYING;
this.inverseMap[i + 1] = -1;
}
for (int i = 0; i < this.outputGeneralized.getMap().length; i++) {
final int pos = outputGeneralized.getMap()[i] * 2;
this.inverseMap[pos] = AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED;
this.inverseMap[pos + 1] = i;
}
for (int i = 0; i < this.microaggregationStartIndex; i++) {
final int pos = inputAnalyzed.getMap()[i] * 2;
this.inverseMap[pos] = AttributeTypeInternal.SENSITIVE;
this.inverseMap[pos + 1] = i;
}
for (int i = 0; i < outputMicroaggregated.getMap().length; i++) {
final int pos = outputMicroaggregated.getMap()[i] * 2;
this.inverseMap[pos] = AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED;
this.inverseMap[pos + 1] = i;
}
for (int i = 0; i < inputStatic.getMap().length; i++) {
final int pos = inputStatic.getMap()[i] * 2;
this.inverseMap[pos] = AttributeTypeInternal.INSENSITIVE;
this.inverseMap[pos + 1] = i;
}
// Build inverse data array
this.inverseData = new int[5][][];
this.inverseData[AttributeTypeInternal.INSENSITIVE] = this.inputStatic.getArray();
this.inverseData[AttributeTypeInternal.SENSITIVE] = this.inputAnalyzed.getArray();
this.inverseData[AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED] = this.outputGeneralized.getArray();
this.inverseData[AttributeTypeInternal.IDENTIFYING] = null;
this.inverseData[AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED] = this.outputMicroaggregated.getArray();
// Build inverse dictionary array
this.inverseDictionaries = new Dictionary[5];
this.inverseDictionaries[AttributeTypeInternal.INSENSITIVE] = this.inputStatic.getDictionary();
this.inverseDictionaries[AttributeTypeInternal.SENSITIVE] = this.inputAnalyzed.getDictionary();
this.inverseDictionaries[AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED] = this.outputGeneralized.getDictionary();
this.inverseDictionaries[AttributeTypeInternal.IDENTIFYING] = null;
this.inverseDictionaries[AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED] = this.outputMicroaggregated.getDictionary();
// Create view
this.getRegistry().createOutputSubset(node, config);
// Obtain data types
this.dataTypes = getDataTypeArray();
}
/**
* Gets the attribute name.
*
* @param col
* the col
* @return the attribute name
*/
@Override
public String getAttributeName(final int col) {
checkRegistry();
checkColumn(col);
return header[col];
}
@Override
public DataType<?> getDataType(String attribute) {
checkRegistry();
int col = this.getColumnIndexOf(attribute);
// Return the according values
final int key = col * 2;
final int type = inverseMap[key];
switch (type) {
case AttributeTypeInternal.IDENTIFYING:
return DataType.STRING;
default:
final int index = inverseMap[key + 1];
return dataTypes[type][index];
}
}
@Override
public int getGeneralization(final String attribute) {
checkRegistry();
return node.getGeneralization(attribute);
}
/**
* Gets the num columns.
*
* @return the num columns
*/
@Override
public int getNumColumns() {
checkRegistry();
return header.length;
}
/**
* Gets the num rows.
*
* @return the num rows
*/
@Override
public int getNumRows() {
checkRegistry();
return outputGeneralized.getDataLength();
}
@Override
public StatisticsBuilder getStatistics() {
return new StatisticsBuilder(new DataHandleInternal(this));
}
/**
* Gets the value.
*
* @param row
* the row
* @param col
* the col
* @return the value
*/
@Override
public String getValue(final int row, final int col) {
// Check
checkRegistry();
checkColumn(col);
checkRow(row, outputGeneralized.getDataLength());
// Perform
return internalGetValue(row, col, false);
}
@Override
public boolean isOptimized() {
return this.optimized;
}
/**
* Iterator.
*
* @return the iterator
*/
@Override
public Iterator<String[]> iterator() {
checkRegistry();
return new ResultIterator();
}
@Override
public boolean replace(int column, String original, String replacement) {
throw new UnsupportedOperationException("This operation is only supported by handles for data input");
}
/**
* Used to update data when loading projects after local recoding. This is part of the internal API
* and should never be called by users
* @param data
* @param types
*/
public void updateData(DataHandle data,
Map<String, DataType<?>> types,
int[] outliers) {
updateData(data, outputGeneralized, types, outliers);
updateData(data, outputMicroaggregated, types, outliers);
// Update outliers
int previous = 0;
for (int index : outliers) {
// Mark as not outlier from previous to index
for (int i = previous; i < index; i++) {
outputGeneralized.getArray()[i][0] &= Data.REMOVE_OUTLIER_MASK;
}
// Mark index as outlier
outputGeneralized.getArray()[index][0] |= Data.OUTLIER_MASK;
// Update
previous = index + 1;
}
// Mark as not outlier from previous to num rows
for (int i = previous; i < this.getNumRows(); i++) {
outputGeneralized.getArray()[i][0] &= Data.REMOVE_OUTLIER_MASK;
}
// Update data types
for (int i = 0; i < dataTypes.length; i++) {
DataType<?>[] type = dataTypes[i];
if (type != null) {
for (int j = 0; j < type.length; j++) {
if (i == AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED) {
String attribute = this.outputGeneralized.getHeader()[j];
if (types.get(attribute) == DataType.STRING) {
dataTypes[i][j] = DataType.STRING;
}
}
}
}
}
// Mark as optimized
this.optimized = true;
}
/**
* Converts the suppressed attribute type bitset to the internal datatypes.
*
* @param suppressedAttributeTypes
* @return
*/
private int convert(int suppressedAttributeTypes) {
int converted = 0;
for (int j = 0; j < 32; j++) {
if ((suppressedAttributeTypes & (1 << j)) != 0) {
switch (j) {
case AttributeType.ATTR_TYPE_ID:
converted |= (1 << AttributeTypeInternal.IDENTIFYING);
break;
case AttributeType.ATTR_TYPE_IS:
converted |= (1 << AttributeTypeInternal.INSENSITIVE);
break;
case AttributeType.ATTR_TYPE_QI:
converted |= (1 << AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED) | (1 << AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED);
break;
case AttributeType.ATTR_TYPE_SE:
converted |= (1 << AttributeTypeInternal.SENSITIVE);
break;
}
}
}
return converted;
}
/**
* Used to update data when loading projects after local recoding. This is part of the internal API
* and should never be called by users
* @param input
* @param output
* @param types
* @param outliers
*/
private void updateData(DataHandle input,
Data output,
Map<String, DataType<?>> types,
int[] outliers) {
// Init
String[] header = output.getHeader();
int[][] data = output.getData();
Dictionary dictionary = output.getDictionary();
// De-finalize
dictionary.definalizeAll();
// Update
for (int column = 0; column < header.length; column++) {
String attribute = header[column];
int columnindex = input.getColumnIndexOf(attribute);
// Update only tuples that are not outliers
int previous = 0;
for (int index : outliers) {
// Update
for (int row = previous; row < index; row++) {
String value = input.internalGetValue(row, columnindex, false);
int identifier = dictionary.register(column, value);
data[row][column] = identifier;
}
// Update
previous = index + 1;
}
// Update remaining tuples
for (int row = previous; row < input.getNumRows(); row++) {
String value = input.internalGetValue(row, columnindex, false);
int identifier = dictionary.register(column, value);
data[row][column] = identifier;
}
}
// Finalize
dictionary.finalizeAll();
}
/**
* Releases all resources.
*/
protected void doRelease() {
result.releaseBuffer(this);
node = null;
inputStatic = null;
outputGeneralized = null;
inputAnalyzed = null;
outputMicroaggregated = null;
inverseData = null;
inverseDictionaries = null;
inverseMap = null;
registry = null;
subset = null;
dataTypes = null;
definition = null;
header = null;
node = null;
}
@Override
protected ARXConfiguration getConfiguration() {
return result.getConfiguration();
}
/**
* Creates the data type array.
*
* @return
*/
@Override
protected DataType<?>[][] getDataTypeArray() {
DataType<?>[][] dataTypes = new DataType[5][];
dataTypes[AttributeTypeInternal.INSENSITIVE] = new DataType[inputStatic.getHeader().length];
dataTypes[AttributeTypeInternal.SENSITIVE] = new DataType[inputAnalyzed.getHeader().length];
dataTypes[AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED] = new DataType[outputGeneralized.getHeader().length];
dataTypes[AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED] = new DataType[outputMicroaggregated.getHeader().length];
dataTypes[AttributeTypeInternal.IDENTIFYING] = null;
for (int i = 0; i < dataTypes.length; i++) {
final DataType<?>[] type = dataTypes[i];
String[] header = null;
switch (i) {
case AttributeTypeInternal.INSENSITIVE:
header = inputStatic.getHeader();
break;
case AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED:
header = outputGeneralized.getHeader();
break;
case AttributeTypeInternal.SENSITIVE:
header = inputAnalyzed.getHeader();
break;
case AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED:
header = outputMicroaggregated.getHeader();
break;
}
if (type != null) {
for (int j = 0; j < type.length; j++) {
dataTypes[i][j] = definition.getDataType(header[j]);
if ((i == AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED && node.getTransformation()[j] > 0) ||
(i == AttributeTypeInternal.QUASI_IDENTIFYING_MICROAGGREGATED && !definition.getMicroAggregationFunction(header[j]).isTypePreserving())) {
dataTypes[i][j] = DataType.STRING;
}
}
}
}
return dataTypes;
}
/**
* Gets the distinct values.
*
* @param col the column
* @param ignoreSuppression
* @param handler
* @return the distinct values
*/
@Override
protected String[] getDistinctValues(final int col, final boolean ignoreSuppression, InterruptHandler handler) {
// Check
checkRegistry();
checkColumn(col);
final Set<String> vals = new HashSet<String>();
for (int i = 0; i < getNumRows(); i++) {
handler.checkInterrupt();
vals.add(internalGetValue(i, col, ignoreSuppression));
}
handler.checkInterrupt();
return vals.toArray(new String[vals.size()]);
}
/**
* Returns the input buffer
* @return
*/
protected int[][] getInputBuffer() {
checkRegistry();
return registry.getInputHandle().getInputBuffer();
}
/**
* Returns the output buffer
* @return
*/
protected Data getOutputBufferGeneralized() {
return outputGeneralized;
}
/**
* Returns the output buffer
* @return
*/
protected Data getOutputBufferMicroaggregated() {
return outputMicroaggregated;
}
/**
* A negative integer, zero, or a positive integer as the first argument is
* less than, equal to, or greater than the second. It uses the specified
* data types for comparison if no generalization was applied, otherwise it
* uses string comparison.
*
* @param row1
* the row1
* @param row2
* the row2
* @param columns
* the columns
* @param ascending
* the ascending
* @return the int
*/
@Override
protected int internalCompare(final int row1,
final int row2,
final int[] columns,
final boolean ascending) {
for (final int index : columns) {
final int key = index * 2;
final int attributeType = inverseMap[key];
final int indexMap = inverseMap[key + 1];
// Identifying attributes are removed from output data
if (attributeType == AttributeTypeInternal.IDENTIFYING) {
continue;
}
int cmp = 0;
try {
String s1 = internalGetValue(row1, index, false);
String s2 = internalGetValue(row2, index, false);
cmp = (s1 == DataType.ANY_VALUE && s2 == DataType.ANY_VALUE) ? 0
: (s1 == DataType.ANY_VALUE ? +1
: (s2 == DataType.ANY_VALUE ? -1
: dataTypes[attributeType][indexMap].compare(s1, s2)));
} catch (final Exception e) {
throw new RuntimeException(e);
}
if (cmp != 0) {
return ascending ? cmp : -cmp;
}
}
return 0;
}
/**
* Gets the value internal.
*
* @param row
* the row
* @param col
* the col
* @return the value internal
*/
@Override
protected String internalGetValue(final int row,
final int col,
final boolean ignoreSuppression) {
// Return the according values
final int key = col * 2;
final int type = inverseMap[key];
switch (type) {
case AttributeTypeInternal.IDENTIFYING:
return DataType.ANY_VALUE;
default:
final int index = inverseMap[key + 1];
final int[][] data = inverseData[type];
if (!ignoreSuppression && (suppressedAttributeTypes & (1 << type)) != 0 &&
((outputGeneralized.getArray()[row][0] & Data.OUTLIER_MASK) != 0)) {
return DataType.ANY_VALUE;
}
final int value = data[row][index] & Data.REMOVE_OUTLIER_MASK;
final String[][] dictionary = inverseDictionaries[type].getMapping();
return dictionary[index][value];
}
}
/**
* Returns whether the given row is an outlier.
*
* @param row
* @return
*/
protected boolean internalIsOutlier(final int row) {
return ((outputGeneralized.getArray()[row][0] & Data.OUTLIER_MASK) != 0);
}
@Override
protected boolean internalReplace(int column,
String original,
String replacement) {
// Init and check
if (column >= inverseMap.length) return false;
final int key = column * 2;
int type = inverseMap[key];
if (type >= inverseDictionaries.length) return false;
String[][] dictionary = inverseDictionaries[type].getMapping();
int index = inverseMap[key + 1];
if (index >= dictionary.length) return false;
String[] values = dictionary[index];
// Replace
boolean found = false;
for (int i = 0; i < values.length; i++) {
if (values[i].equals(original)) {
values[i] = replacement;
found = true;
}
}
// Return
return found;
}
/**
* Swap internal.
*
* @param row1
* the row1
* @param row2
* the row2
*/
protected void internalSwap(final int row1, final int row2) {
// Swap GH
int[] temp = outputGeneralized.getArray()[row1];
outputGeneralized.getArray()[row1] = outputGeneralized.getArray()[row2];
outputGeneralized.getArray()[row2] = temp;
// Swap OT
if (outputMicroaggregated.getArray().length != 0) {
temp = outputMicroaggregated.getArray()[row1];
outputMicroaggregated.getArray()[row1] = outputMicroaggregated.getArray()[row2];
outputMicroaggregated.getArray()[row2] = temp;
}
}
@Override
protected boolean isAnonymous() {
return this.anonymous;
}
/**
* Marks this handle as optimized
* @param optimized
*/
protected void setOptimized(boolean optimized) {
this.optimized = true;
}
/**
* Used to update data types after local recoding
* @param transformation
*/
protected void updateDataTypes(int[] transformation) {
for (int i = 0; i < dataTypes.length; i++) {
DataType<?>[] type = dataTypes[i];
if (type != null) {
for (int j = 0; j < type.length; j++) {
if ((i == AttributeTypeInternal.QUASI_IDENTIFYING_GENERALIZED && transformation[j] > 0)) {
dataTypes[i][j] = DataType.STRING;
}
}
}
}
}
}