/* * ARX: Powerful Data Anonymization * Copyright 2012 - 2017 Fabian Prasser, Florian Kohlmayer and contributors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.deidentifier.arx.aggregates; import java.io.Serializable; import java.text.ParseException; import java.util.Arrays; import java.util.Date; import org.deidentifier.arx.DataType; import org.deidentifier.arx.DataType.ARXDate; import org.deidentifier.arx.DataType.ARXDecimal; import org.deidentifier.arx.DataType.ARXInteger; /** * This abstract class represents an aggregate function. * * @author Fabian Prasser * @param <T> */ public abstract class AggregateFunction<T> implements Serializable{ /** * A builder for aggregate functions. * * @author Fabian Prasser * @param <T> */ public static class AggregateFunctionBuilder<T> { /** SVUID */ private DataType<T> type; /** * Creates a new instance. * * @param type */ private AggregateFunctionBuilder(DataType<T> type){ this.type = type; } /** * An aggregate function that returns a the arithmetic mean, if it can be computed, NULL otherwise. * * @return */ public final AggregateFunction<T> createArithmeticMeanFunction() { return new GenericArithmeticMean<T>(type); } /** * An aggregate function that returns a the arithmetic mean of min & max, if it can be computed, NULL otherwise. * * @return */ public final AggregateFunction<T> createArithmeticMeanOfBoundsFunction() { return new GenericArithmeticMeanOfBounds<T>(type); } /** * An aggregate function that returns an interval consisting of the * first and the last element following the predefined order. * * @return */ public final AggregateFunction<T> createBoundsFunction() { return new GenericBounds<T>(type); } /** * An aggregate function that returns a constant value. * * @param value * @return */ public final AggregateFunction<T> createConstantFunction(String value) { return new GenericConstant<T>(type, value); } /** * An aggregate function that returns a the geometric mean, if it can be computed, NULL otherwise. * * @return */ public final AggregateFunction<T> createGeometricMeanFunction() { return new GenericGeometricMean<T>(type); } /** * An aggregate function that returns a the geometric mean of min & max, if it can be computed, NULL otherwise. * * @return */ public final AggregateFunction<T> createGeometricMeanOfBoundsFunction() { return new GenericGeometricMeanOfBounds<T>(type); } /** * An aggregate function that returns an interval [min, max]. * * @return */ public final AggregateFunction<T> createIntervalFunction() { return new GenericInterval<T>(type, true, true); } /** * An aggregate function that returns an interval [min, max]. * * @param lowerIncluded * @param upperIncluded * @return */ public final AggregateFunction<T> createIntervalFunction(boolean lowerIncluded, boolean upperIncluded) { return new GenericInterval<T>(type, lowerIncluded, upperIncluded); } /** * An aggregate function that returns a common prefix. * * @return */ public final AggregateFunction<T> createPrefixFunction() { return new GenericCommonPrefix<T>(type, null); } /** * An aggregate function that returns a common prefix. The remaining characters will be redacted with * the given character * * @param redaction * @return */ public final AggregateFunction<T> createPrefixFunction(Character redaction) { return new GenericCommonPrefix<T>(type, redaction); } /** * * An aggregate function that returns a set of all data values . * * @return */ public final AggregateFunction<T> createSetFunction() { return new GenericSet<T>(type); } /** * * An aggregate function that returns a set of the prefixes of the data values. Length is 1 * * @return */ public final AggregateFunction<T> createSetOfPrefixesFunction() { return new GenericSetOfPrefixes<T>(type, 1); } /** * * An aggregate function that returns a set of the prefixes of the data values. * * @param length * @return */ public final AggregateFunction<T> createSetOfPrefixesFunction(int length) { return new GenericSetOfPrefixes<T>(type, length); } } /** * An aggregate function that has a parameter. * * @author Fabian Prasser * @param <T> */ public static abstract class AggregateFunctionWithParameter<T> extends AggregateFunction<T>{ /** SVUID */ private static final long serialVersionUID = 1L; /** * Creates a new instance. * * @param type */ protected AggregateFunctionWithParameter(DataType<T> type) { super(type); } /** * Returns whether the function accepts this parameter. * * @param parameter * @return */ public abstract boolean acceptsParameter(String parameter); /** * Returns the parameter. * * @return */ public abstract String getParameter(); /** * Creates a new instance with the given parameter. * * @param parameter * @return */ public abstract AggregateFunctionWithParameter<T> newInstance(String parameter); } /** * An aggregate function that returns the arithmetic mean, if it may be computed, "NULL" * otherwise. * * @author Fabian Prasser * @param <T> */ public static class GenericArithmeticMean<T> extends AggregateFunction<T> { /** SVUID*/ private static final long serialVersionUID = -901667129625212217L; /** * Creates a new instance. * * @param type */ private GenericArithmeticMean(DataType<T> type) { super(type); } @Override public String aggregate(String[] values) { // Count the number of non-null values double count = 0; for (String value : values) { count += value != null && !DataType.isNull(value) ? 1 : 0; } // Data-type specific implementation if (super.type.getDescription().getWrappedClass() == Date.class) { double result = 0d; for (String value : values) { Date date = ((ARXDate)type).parse(value); result += date != null ? (double)date.getTime() / count : 0d; } return ((ARXDate)type).format(new Date((long)result)); // Data-type specific implementation } else if (super.type.getDescription().getWrappedClass() == Long.class) { double result = 0d; for (String value : values) { Long longValue = ((ARXInteger)type).parse(value); result += longValue != null ? (double)longValue / count : 0d; } return ((ARXInteger)type).format((long)result); // Data-type specific implementation } else if (super.type.getDescription().getWrappedClass() == Double.class) { double result = 0d; for (String value : values) { Double doubleValue = ((ARXDecimal)type).parse(value); result += doubleValue != null ? doubleValue / count : 0d; } return ((ARXDecimal)type).format(result); // Data-type specific implementation } else { return DataType.NULL_VALUE; } } @Override public String toLabel() { return "Arithmetic mean"; } @Override public String toString(){ return "ArithmeticMean"; } } /** * An aggregate function that returns the arithmetic mean of min & max, if it may be computed, "NULL" * otherwise. * * @author Fabian Prasser * @param <T> */ public static class GenericArithmeticMeanOfBounds<T> extends AggregateFunction<T> { /** SVUID*/ private static final long serialVersionUID = 5067728720270473715L; /** * Creates a new instance. * * @param type */ private GenericArithmeticMeanOfBounds(DataType<T> type) { super(type); } @Override public String aggregate(String[] values) { String min = null; String max = null; for (String value : values) { try { if (value != null && !DataType.isNull(value) && (min == null || type.compare(min, value) > 0)){ min = value; } if (value != null && !DataType.isNull(value) && (max == null || type.compare(max, value) < 0)){ max = value; } } catch (Exception e) { return DataType.NULL_VALUE; } } if (min == null || max == null) { return DataType.NULL_VALUE; } else { return new GenericArithmeticMean<T>(this.type).aggregate(new String[]{min, max}); } } @Override public String toLabel() { return "Arithmetic mean of bounds"; } @Override public String toString(){ return "ArithmeticMeanOfBounds"; } } /** * An aggregate function that returns an interval consisting of the * first and the last element following the predefined order . * * @author Fabian Prasser * @param <T> */ public static class GenericBounds<T> extends AggregateFunction<T> { /** SVUID */ private static final long serialVersionUID = -8884657842545379206L; /** * Creates a new instance. * * @param type */ private GenericBounds(DataType<T> type) { super(type); } @Override public String aggregate(String[] values) { return new StringBuilder().append("[") .append(values[0]) .append(", ") .append(values[values.length - 1]) .append("]") .toString(); } @Override public String toLabel() { return "Bounding values"; } @Override public String toString(){ return "Bounds"; } } /** * An aggregate function that returns a common prefix. * * @author Fabian Prasser * @param <T> */ public static class GenericCommonPrefix<T> extends AggregateFunctionWithParameter<T> { /** SVUID */ private static final long serialVersionUID = 526809670467390820L; /** SVUID */ private Character redaction; /** * Creates a new instance. * * @param type * @param redaction */ private GenericCommonPrefix(DataType<T> type, final Character redaction) { super(type); this.redaction = redaction; } @Override public boolean acceptsParameter(String parameter) { return parameter == null || parameter.length()<=1; } @Override public String aggregate(String[] values) { // Determine length int length = Integer.MIN_VALUE; if (redaction != null) { for (String s : values) { length = Math.max(length, s.length()); } } // Determine largest common prefix int position = 0; outer: while (true) { if (values[0].length()==position) break outer; char c = values[0].charAt(position); for (int i = 1; i < values.length; i++) { if (values[i].charAt(position) != c) { break outer; } } position++; } position--; char[] result; if (redaction != null) { result = new char[length]; Arrays.fill(result, position + 1, length, redaction); } else { result = new char[position + 1]; } for (int i = 0; i <= position; i++) { result[i] = values[0].charAt(i); } return new String(result); } @Override public String getParameter() { if (redaction == null) return null; else return String.valueOf(redaction); } @Override public AggregateFunctionWithParameter<T> newInstance(String parameter) { if (parameter == null || parameter.length()==0) return new GenericCommonPrefix<T>(this.type, null); else return new GenericCommonPrefix<T>(this.type, parameter.toCharArray()[0]); } @Override public String toLabel() { return "Common prefix"; } @Override public String toString(){ if (redaction == null){ return "CommonPrefix"; } else { return "CommonPrefix[redaction="+redaction+"]"; } } } /** * An aggregate function that returns a constant value. * * @author Fabian Prasser * @param <T> */ public static class GenericConstant<T> extends AggregateFunctionWithParameter<T> { /** SVUID */ private static final long serialVersionUID = -8995068916108125096L; /** SVUID */ private String value; /** * Creates a new instance. * * @param type * @param value */ private GenericConstant(DataType<T> type, String value) { super(type); this.value = value; } @Override public boolean acceptsParameter(String parameter) { return parameter != null; } @Override public String aggregate(String[] values) { return value; } @Override public String getParameter() { return value; } @Override public AggregateFunctionWithParameter<T> newInstance(String parameter) { return new GenericConstant<T>(this.type, parameter); } @Override public String toLabel() { return "Constant value"; } @Override public String toString(){ return "Constant[value="+value+"]"; } } /** * An aggregate function that returns the geometric mean, if it may be computed, "NULL" * otherwise. * * @author Fabian Prasser * @param <T> */ public static class GenericGeometricMean<T> extends AggregateFunction<T> { /** SVUID*/ private static final long serialVersionUID = -1756610766270481335L; /** * Creates a new instance. * * @param type */ private GenericGeometricMean(DataType<T> type) { super(type); } @Override public String aggregate(String[] values) { // Count the number of non-null values double count = 0; for (String value : values) { count += value != null && !DataType.isNull(value) ? 1 : 0; } // Data-type specific implementation if (super.type.getDescription().getWrappedClass() == Date.class) { double result = 0d; for (String value : values) { Date date = ((ARXDate)type).parse(value); result += date != null ? Math.log10((double)date.getTime()) / count : 0d; } return ((ARXDate)type).format(new Date((long)Math.pow(10d, result))); // Data-type specific implementation } else if (super.type.getDescription().getWrappedClass() == Long.class) { double result = 0d; for (String value : values) { Long longValue = ((ARXInteger)type).parse(value); result += longValue != null ? Math.log10((double)longValue) / count : 0d; } return ((ARXInteger)type).format((long)Math.pow(10d, result)); // Data-type specific implementation } else if (super.type.getDescription().getWrappedClass() == Double.class) { double result = 0d; for (String value : values) { Double doubleValue = ((ARXDecimal)type).parse(value); result += doubleValue != null ? Math.log10(doubleValue) / count : 0d; } return ((ARXDecimal)type).format(Math.pow(10d, result)); // Data-type specific implementation } else { return DataType.NULL_VALUE; } } @Override public String toLabel() { return "Geometric mean"; } @Override public String toString(){ return "GeometricMean"; } } /** * An aggregate function that returns the geometric mean of min & max, if it may be computed, "NULL" * otherwise. * * @author Fabian Prasser * @param <T> */ public static class GenericGeometricMeanOfBounds<T> extends AggregateFunction<T> { /** SVUID*/ private static final long serialVersionUID = 8155390779775522723L; /** * Creates a new instance. * * @param type */ private GenericGeometricMeanOfBounds(DataType<T> type) { super(type); } @Override public String aggregate(String[] values) { String min = null; String max = null; for (String value : values) { try { if (value != null && !DataType.isNull(value) && (min == null || type.compare(min, value) > 0)){ min = value; } if (value != null && !DataType.isNull(value) && (max == null || type.compare(max, value) < 0)){ max = value; } } catch (Exception e) { return DataType.NULL_VALUE; } } if (min == null || max == null) { return DataType.NULL_VALUE; } else { return new GenericGeometricMean<T>(this.type).aggregate(new String[]{min, max}); } } @Override public String toLabel() { return "Geometric mean of bounds"; } @Override public String toString(){ return "GeometricMeanOfBounds"; } } /** * An aggregate function that returns an interval [min, max] . * * @author Fabian Prasser * @param <T> */ public static class GenericInterval<T> extends AggregateFunction<T> { /** SVUID */ private static final long serialVersionUID = -5182521036467379023L; /** SVUID */ private final boolean lowerIncluded; /** SVUID */ private final boolean upperIncluded; /** * Creates a new instance. * * @param type * @param lowerIncluded * @param upperIncluded */ private GenericInterval(DataType<T> type, boolean lowerIncluded, boolean upperIncluded) { super(type); this.lowerIncluded = lowerIncluded; this.upperIncluded = upperIncluded; } @Override public String aggregate(String[] values) { String min = null; String max = null; for (String value : values) { try { if (min == null || type.compare(min, value) > 0){ min = value; } if (max == null || type.compare(max, value) < 0){ max = value; } } catch (NumberFormatException | ParseException e) { throw new RuntimeException(e); } } return new StringBuilder().append(lowerIncluded ? "[" : "]") .append(min) .append(", ") .append(max) .append(upperIncluded ? "]" : "[") .toString(); } @Override public String toLabel() { return "Interval"; } @Override public String toString(){ return "Interval"; } } /** * An aggregate function that returns a set of all data values. * * @author Fabian Prasser * @param <T> */ public static class GenericSet<T> extends AggregateFunction<T> { /** SVUID */ private static final long serialVersionUID = -4029191421720743653L; /** * Creates a new instance. * * @param type */ private GenericSet(DataType<T> type) { super(type); } @Override public String aggregate(String[] values) { StringBuilder b = new StringBuilder(); b.append("{"); for (int i = 0; i < values.length; i++) { b.append(values[i]); if (i < values.length - 1) { b.append(", "); } } b.append("}"); return b.toString(); } @Override public String toLabel() { return "Set of values"; } @Override public String toString(){ return "Set"; } }; /** * An aggregate function that returns a set of the prefixes of the data values. * * @author Fabian Prasser * @param <T> */ public static class GenericSetOfPrefixes<T> extends AggregateFunctionWithParameter<T> { /** SVUID */ private static final long serialVersionUID = -4164142474804296433L; /** SVUID */ private int length; /** * Creates a new instance. * * @param type * @param length */ private GenericSetOfPrefixes(DataType<T> type, int length) { super(type); this.length = length; } @Override public boolean acceptsParameter(String parameter) { try { return Integer.parseInt(parameter) > 0; } catch (Exception e) { return false; } } @Override public String aggregate(String[] values) { StringBuilder b = new StringBuilder(); for (int i = 0; i < values.length; i++) { int size = Math.min(length, values[i].length()); b.append(values[i].substring(0, size)); if (i < values.length - 1) { b.append("-"); } } return b.toString(); } @Override public String getParameter() { return String.valueOf(length); } @Override public AggregateFunctionWithParameter<T> newInstance(String parameter) { return new GenericSetOfPrefixes<T>(this.type, Integer.parseInt(parameter)); } @Override public String toLabel() { return "Set of prefixes"; } @Override public String toString(){ return "SetOfPrefixes[length="+length+"]"; } }; /** SVUID */ private static final long serialVersionUID = 3803318906010996154L; /** * Returns a builder for the given data type. * * @param <T> * @param type * @return */ public static <T> AggregateFunctionBuilder<T> forType(DataType<T> type){ return new AggregateFunctionBuilder<T>(type); } /** The data type. */ protected DataType<T> type; /** * Constructor. * * @param type */ protected AggregateFunction(DataType<T> type){ this.type = type; } /** * * This function returns an aggregate value. * * @param values * @return */ public abstract String aggregate (String[] values); /** * * Returns whether the function accepts a parameter. * * @return */ public boolean hasParameter() { return (this instanceof AggregateFunctionWithParameter); } /** * Returns a label. * * @return */ public abstract String toLabel(); @Override public abstract String toString (); }