/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.Map.Entry; import org.deidentifier.arx.ARXLattice.ARXNode; import cern.colt.GenericSorting; import cern.colt.Swapper; import cern.colt.function.IntComparator; /** * This class implements sorting and swapping for a set of paired data handles. * * @author Fabian Prasser */ class DataRegistry { /** The input handle, if any. */ private DataHandleInput input; /** The input subset handle, if any. */ private DataHandleSubset inputSubset; /** The output handle, if any. */ private Map<ARXNode, DataHandleOutput> output = new HashMap<ARXNode, DataHandleOutput>(); /** The output subset handle, if any. */ private Map<ARXNode, DataHandleSubset> outputSubset = new HashMap<ARXNode, DataHandleSubset>(); /** * Default constructor. */ public DataRegistry(){ // Empty by design } /** * Replace a value in the given column * @param column * @param original * @param replacement * @return */ public boolean replace(int column, String original, String replacement) { boolean replaced = false; replaced |= input.internalReplace(column, original, replacement); if (!output.isEmpty()) { replaced |= output.values().iterator().next().internalReplace(column, original, replacement); } return replaced; } /** * Helper that creates a view on a research subset. * * @param handle * @param subset * @return */ private DataHandleSubset createSubset(DataHandle handle, DataSubset subset) { DataHandleSubset result = new DataHandleSubset(handle, subset); result.setRegistry(this); return result; } /** * Returns any of the registered subsets. * * @return */ private DataHandleSubset getSubset() { DataHandleSubset subset = null; if (inputSubset!=null){ subset = inputSubset; } else if (!outputSubset.isEmpty()){ return outputSubset.values().iterator().next(); } return subset; } /** * Sort. * * @param handle * @param swapper * @param from * @param to * @param ascending * @param columns */ private void sortAll(final DataHandle handle, final Swapper swapper, final int from, final int to, final boolean ascending, final int... columns) { final DataHandle outer = handle; final DataHandleSubset subset = getSubset(); final IntComparator c = new IntComparator() { @Override public int compare(final int arg0, final int arg1) { return outer.internalCompare(arg0, arg1, columns, ascending); } }; final Swapper s = new Swapper() { @Override public void swap(final int arg0, final int arg1) { if (input != null) input.internalSwap(arg0, arg1); for (DataHandleOutput handle : output.values()) handle.internalSwap(arg0, arg1); if (subset != null) subset.internalSwap(arg0, arg1); if (swapper != null) swapper.swap(arg0, arg1); } }; GenericSorting.mergeSort(from, to, c, s); if (subset != null){ subset.internalRebuild(); } } /** * Sort. * * @param handle * @param swapper * @param from * @param to * @param ascending * @param columns */ private void sortSubset(final DataHandleSubset handle, final Swapper swapper, final int from, final int to, final boolean ascending, final int... columns) { final DataHandleSubset outer = handle; final IntComparator c = new IntComparator() { @Override public int compare(final int arg0, final int arg1) { return outer.internalCompare(arg0, arg1, columns, ascending); } }; final Swapper s = new Swapper() { @Override public void swap(final int arg0, final int arg1) { if (input != null) input.internalSwap(outer.internalTranslate(arg0), outer.internalTranslate(arg1)); for (DataHandleOutput handle : output.values()) handle.internalSwap(outer.internalTranslate(arg0), outer.internalTranslate(arg1)); if (swapper != null) swapper.swap(outer.internalTranslate(arg0), outer.internalTranslate(arg1)); } }; // No need to swap and rebuild the subset views GenericSorting.mergeSort(from, to, c, s); } /** * Swap. * * @param handle * @param row1 * @param row2 */ private void swapAll(DataHandle handle, int row1, int row2) { if (input!=null) input.internalSwap(row1, row2); for (DataHandleOutput outhandle : output.values()) outhandle.internalSwap(row1, row2); // Important to swap in only one subset DataHandleSubset subset = getSubset(); if (subset!=null){ subset.internalSwap(row1, row2); subset.internalRebuild(); } } /** * Swap. * * @param handle * @param row1 * @param row2 */ private void swapSubset(DataHandleSubset handle, int row1, int row2) { // Nothing to do for subsets row1 = handle.internalTranslate(row1); row2 = handle.internalTranslate(row2); if (input!=null) input.internalSwap(row1, row2); for (DataHandleOutput outhandle : output.values()) outhandle.internalSwap(row1, row2); } /** * Creates the views on the subset. * * @param config */ protected void createInputSubset(ARXConfiguration config){ if (config.getSubset() != null) { this.inputSubset = createSubset(this.input, config.getSubset()); } else { this.inputSubset = null; } this.input.setView(this.inputSubset); } /** * Creates the views on the subset. * * @param node * @param config */ protected void createOutputSubset(ARXNode node, ARXConfiguration config){ if (config.getSubset() != null) { this.outputSubset.put(node, createSubset(this.output.get(node), config.getSubset())); } else { this.outputSubset.remove(node); } this.output.get(node).setView(this.outputSubset.get(node)); } /** * Returns the base data type without generalization. * * @param attribute * @return */ protected DataType<?> getBaseDataType(String attribute) { return this.input.getBaseDataType(attribute); } /** * Returns the input handle * @return */ protected DataHandleInput getInputHandle() { return this.input; } /** * Returns a registered handle, if any. * * @param node * @return */ protected DataHandle getOutputHandle(ARXNode node) { return this.output.get(node); } /** * Implementation of {@link DataHandle#isOutlier(row)}. * * @param handle * @param row * @return */ protected boolean isOutlier(DataHandle handle, int row){ if (handle instanceof DataHandleInput){ return false; } else if (handle instanceof DataHandleOutput){ return ((DataHandleOutput)handle).internalIsOutlier(row); } else if (handle instanceof DataHandleSubset){ return isOutlier(((DataHandleSubset)handle).getSource(), row); } else { throw new RuntimeException("Illegal state"); } } /** * Releases the given handle. * * @param handle */ protected void release(DataHandle handle) { // Handle subsets if (handle instanceof DataHandleSubset) { return; } // Handle output Iterator<Entry<ARXNode, DataHandleOutput>> iter = output.entrySet().iterator(); while (iter.hasNext()) { Entry<ARXNode, DataHandleOutput> entry = iter.next(); if (entry.getValue().equals(handle)) { outputSubset.remove(entry.getKey()); iter.remove(); handle.doRelease(); return; } } // Handle input if (handle.equals(input)) { this.reset(); input.doRelease(); } } /** * Removes the association to all handles, but the input handle. */ protected void reset() { while (!this.output.entrySet().isEmpty()) { Entry<ARXNode, DataHandleOutput> entry = this.output.entrySet().iterator().next(); release(entry.getValue()); this.output.remove(entry.getKey()); } this.output.clear(); for (DataHandle handle : this.outputSubset.values()) { handle.setRegistry(null); } this.outputSubset.clear(); if (this.inputSubset != null) { this.inputSubset.setRegistry(null); this.inputSubset = null; } } /** * Implementation of {@link DataHandle#sort(boolean, int...)} * @param handle * @param ascending * @param columns */ protected void sort(final DataHandle handle, final boolean ascending, final int... columns) { sort(handle, 0, handle.getNumRows(), ascending, columns); } /** * Implementation of {@link DataHandle#sort(int, int, boolean, int...)} * @param handle * @param from * @param to * @param ascending * @param columns */ protected void sort(final DataHandle handle, final int from, final int to, final boolean ascending, final int... columns) { this.sort(handle, null, from, to, ascending, columns); } /** * Implementation of {@link DataHandle#sort(Swapper, boolean, int...)} * @param handle * @param swapper * @param ascending * @param columns */ protected void sort(final DataHandle handle, final Swapper swapper, final boolean ascending, final int... columns) { sort(handle, swapper, 0, handle.getNumRows(), ascending, columns); } /** * Implementation of {@link DataHandle#sort(Swapper, int, int, boolean, int...)} * @param handle * @param swapper * @param from * @param to * @param ascending * @param columns */ protected void sort(final DataHandle handle, final Swapper swapper, final int from, final int to, final boolean ascending, final int... columns) { handle.checkColumns(columns); handle.checkRow(from, handle.getNumRows()); handle.checkRow(to, handle.getNumRows()); if (handle instanceof DataHandleSubset){ sortSubset((DataHandleSubset)handle, swapper, from, to, ascending, columns); } else { sortAll(handle, swapper, from, to, ascending, columns); } } /** * Implementation of {@link DataHandle#swap(int, int)}. * * @param handle * @param row1 * @param row2 */ protected void swap(DataHandle handle, int row1, int row2) { if (handle instanceof DataHandleSubset){ swapSubset((DataHandleSubset)handle, row1, row2); } else { swapAll(handle, row1, row2); } } /** * Update the registry. * * @param input */ protected void updateInput(DataHandleInput input){ this.input = input; } /** * Update the registry. * * @param inputSubset */ protected void updateInputSubset(DataHandleSubset inputSubset){ this.inputSubset = inputSubset; } /** * Update the registry. * * @param node * @param output */ protected void updateOutput(ARXNode node, DataHandleOutput output){ this.output.put(node, output); } /** * Update the registry. * * @param node * @param outputSubset */ protected void updateOutputSubset(ARXNode node, DataHandleSubset outputSubset){ this.outputSubset.put(node, outputSubset); } }