/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.aggregates; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.ObjectInputStream; import java.io.Serializable; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; import java.util.HashMap; import java.util.List; import java.util.Map; import org.deidentifier.arx.DataType; /** * This class enables building hierarchies for categorical and non-categorical values * by ordering the data items and merging into groups with predefined sizes. * * @author Fabian Prasser * @param <T> */ public class HierarchyBuilderOrderBased<T> extends HierarchyBuilderGroupingBased<T> { /** * A serializable comparator. * * @author Fabian Prasser * @param <T> */ public static abstract class SerializableComparator<T> implements Comparator<T>, Serializable { /** Method */ private static final long serialVersionUID = 3851134667082727602L; } /** * Internal helper class * * @param <T> */ @SuppressWarnings("hiding") protected class CloseElements<T> extends AbstractGroup { /** Method */ private static final long serialVersionUID = 7224062023293601561L; /** Values */ private String[] values; /** * Creates a new instance * * @param values * @param function */ protected CloseElements(String[] values, AggregateFunction<T> function) { super(function.aggregate(values)); this.values = values; } /** * Returns the values * * @return */ protected String[] getValues(){ return values; } /** * Method * * @param list * @param function * @return */ @SuppressWarnings("rawtypes") protected CloseElements merge(List<CloseElements<T>> list, AggregateFunction<T> function) { List<String> values = new ArrayList<String>(); for (CloseElements group : list){ for (String s : ((CloseElements)group).getValues()) { values.add(s); } } return new CloseElements<T>(values.toArray(new String[values.size()]), function); } } /** TODO */ private static final long serialVersionUID = -2749758635401073668L; /** * Creates a new instance. Either preserves the given order, or * sorts the items according to the order induced by the given data type * * @param <T> * @param type The data type is also used for ordering data items * @param order Should the items be sorted according to the order induced by the data type * @return */ public static <T> HierarchyBuilderOrderBased<T> create(final DataType<T> type, boolean order) { return new HierarchyBuilderOrderBased<T>(type, order); } /** * Creates a new instance. Uses the comparator for ordering data items * * @param <T> * @param type The data type * @param comparator Use this comparator for ordering data items * @return */ public static <T> HierarchyBuilderOrderBased<T> create(final DataType<T> type, final Comparator<T> comparator) { return new HierarchyBuilderOrderBased<T>(type, comparator); } /** * Creates a new instance. Uses the defined order for data items * * @param <T> * @param type The data type * @param order Use this for ordering data items * @return */ public static <T> HierarchyBuilderOrderBased<T> create(final DataType<T> type, final String[] order) { return new HierarchyBuilderOrderBased<T>(type, order); } /** * Loads a builder specification from the given file. * * @param <T> * @param file * @return * @throws IOException */ @SuppressWarnings("unchecked") public static <T> HierarchyBuilderOrderBased<T> create(File file) throws IOException{ ObjectInputStream ois = null; try { ois = new ObjectInputStream(new FileInputStream(file)); HierarchyBuilderOrderBased<T> result = (HierarchyBuilderOrderBased<T>)ois.readObject(); return result; } catch (Exception e) { throw new IOException(e); } finally { if (ois != null) ois.close(); } } /** * Loads a builder specification from the given file. * * @param <T> * @param file * @return * @throws IOException */ public static <T> HierarchyBuilderOrderBased<T> create(String file) throws IOException{ return create(new File(file)); } /** Comparator */ private final Comparator<String> comparator; /** * Creates a new instance. * * @param type The data type is also used for ordering data items * @param order Should the items be sorted according to the order induced by the data type */ private HierarchyBuilderOrderBased(final DataType<T> type, boolean order) { super(Type.ORDER_BASED, type); if (order) { this.comparator = new SerializableComparator<String>(){ private static final long serialVersionUID = -5728888259809544706L; @Override public int compare(String o1, String o2) { try { return type.compare(o1, o2); } catch (Exception e) { throw new IllegalArgumentException(e); } } }; } else { this.comparator = null; } this.function = AggregateFunction.forType(type).createSetFunction(); } /** * Creates a new instance. * * @param type The data type * @param order Use this for ordering data items */ private HierarchyBuilderOrderBased(final DataType<T> type, final String[] order) { super(Type.ORDER_BASED, type); final Map<String, Integer> map = new HashMap<String, Integer>(); for (int i=0; i<order.length; i++) { map.put(order[i], i); } this.comparator = new SerializableComparator<String>(){ private static final long serialVersionUID = 8016783606581696832L; @Override public int compare(String o1, String o2) { try { return map.get(o1).compareTo(map.get(o2)); } catch (Exception e) { throw new IllegalArgumentException(e); } } }; this.function = AggregateFunction.forType(type).createSetFunction(); } /** * Creates a new instance. * * @param type The data type * @param comparator Use this comparator for ordering data items */ private HierarchyBuilderOrderBased(final DataType<T> type, final Comparator<T> comparator) { super(Type.ORDER_BASED, type); if (!(comparator instanceof Serializable)) { throw new IllegalArgumentException("Comparator must be serializable"); } this.comparator = new SerializableComparator<String>(){ private static final long serialVersionUID = -487411642974218418L; @Override public int compare(String o1, String o2) { try { return comparator.compare(type.parse(o1), type.parse(o2)); } catch (Exception e) { throw new IllegalArgumentException(e); } } }; this.function = AggregateFunction.forType(type).createSetFunction(); } /** * Returns the comparator. * * @return */ public Comparator<String> getComparator(){ return comparator; } @SuppressWarnings("unchecked") @Override protected AbstractGroup[][] prepareGroups() { if (comparator != null) { try { Arrays.sort(super.getData(), comparator); } catch (Exception e){ throw new IllegalArgumentException(e.getMessage()); } } List<Group<T>> groups = super.getLevel(0).getGroups(); List<String> items = new ArrayList<String>(); // Prepare String[] data = getData(); List<AbstractGroup[]> result = new ArrayList<AbstractGroup[]>(); int index = 0; int resultIndex = 0; int groupCount = 0; // Break if no groups specified if (!super.getLevels().isEmpty() && !super.getLevel(0).getGroups().isEmpty()) { // Create first column AbstractGroup[] first = new AbstractGroup[data.length]; outer: while (true) { for (Group<T> group : groups) { for (int i = 0; i<group.getSize(); i++){ items.add(data[index++]); if (index == data.length) break; } CloseElements<T> element = new CloseElements<T>(items.toArray(new String[items.size()]), group.getFunction()); for (int i=0; i<items.size(); i++) { first[resultIndex++] = element; } groupCount++; items.clear(); if (index == data.length) break outer; } } result.add(first); // Break if done if (groupCount>1) { // Build higher-level columns for (int i=1; i<super.getLevels().size(); i++){ // Break if done if (groupCount==1) break; // Prepare groupCount = 0; groups = super.getLevel(i).getGroups(); Map<AbstractGroup, AbstractGroup> map = new HashMap<AbstractGroup, AbstractGroup>(); List<AbstractGroup> list = new ArrayList<AbstractGroup>(); AbstractGroup[] column = result.get(i-1); for (int j=0; j<column.length; j++){ if (!map.containsKey(column[j])) { map.put(column[j], column[j]); list.add(column[j]); } } // Build index = 0; resultIndex = 0; List<CloseElements<T>> gItems = new ArrayList<CloseElements<T>>(); outer: while (true) { for (Group<T> group : groups) { for (int j = 0; j<group.getSize(); j++){ gItems.add((CloseElements<T>)list.get(index++)); if (index == list.size()) break; } CloseElements<T> element = gItems.get(0).merge(gItems, group.getFunction()); groupCount++; for (int j=0; j<gItems.size(); j++) { map.put(gItems.get(j), element); } gItems.clear(); if (index == list.size()) break outer; } } // Store AbstractGroup[] ccolumn = new AbstractGroup[data.length]; for (int j=0; j<column.length; j++){ ccolumn[j] = map.get(column[j]); } result.add(ccolumn); } } } else { groupCount = data.length; } // Add one last column if more than one group left if (groupCount>1) { AbstractGroup[] column = new AbstractGroup[data.length]; CloseElements<T> element = new CloseElements<T>(new String[]{}, AggregateFunction.forType(getDataType()).createConstantFunction(DataType.ANY_VALUE)); for (int i=0; i<column.length; i++){ column[i] = element; } result.add(column); } // Return return result.toArray(new AbstractGroup[0][0]); } }