/******************************************************************************* * Copyright (c) 2017 École Polytechnique de Montréal * * All rights reserved. This program and the accompanying materials are * made available under the terms of the Eclipse Public License v1.0 which * accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html *******************************************************************************/ package org.eclipse.tracecompass.analysis.timing.core.statistics; import java.util.function.Function; import org.eclipse.jdt.annotation.NonNull; import org.eclipse.jdt.annotation.Nullable; import org.eclipse.tracecompass.common.core.NonNullUtils; /** * Class that calculates statistics on a certain type of object. If the object * is not a {@link Long}, a mapper function should be passed in the constructor * to retrieve the long value to make statistics on from an object. * * @author Bernd Hufmann * @author Geneviève Bastien * * @param <E> * The type of object to calculate statistics on * @since 1.3 */ public class Statistics<@NonNull E> implements IStatistics<E> { private final Function<E, @NonNull Long> fMapper; private @Nullable E fMin = null; private @Nullable E fMax = null; private long fNbElements; private double fMean; /** * reminder, this is the variance * nb elem, as per the online algorithm */ private double fVariance; private double fTotal; /** * Constructor */ public Statistics() { this(e -> { if (!(e instanceof Long)) { throw new IllegalStateException("The object " + e + " is not a number"); //$NON-NLS-1$//$NON-NLS-2$ } return (Long) e; }); } /** * Constructor * * @param mapper * A mapper function that takes an object to computes statistics * for and returns the value to use for the statistics */ public Statistics(Function<E, Long> mapper) { fNbElements = 0; fMean = 0.0; fVariance = 0.0; fTotal = 0.0; fMapper = mapper; } @Override public long getMin() { @Nullable E min = fMin; if (min == null) { return Long.MAX_VALUE; } return NonNullUtils.checkNotNull(fMapper.apply(min)); } @Override public long getMax() { @Nullable E max = fMax; if (max == null) { return Long.MIN_VALUE; } return NonNullUtils.checkNotNull(fMapper.apply(max)); } @Override public @Nullable E getMinObject() { return fMin; } @Override public @Nullable E getMaxObject() { return fMax; } @Override public long getNbElements() { return fNbElements; } @Override public double getMean() { return fMean; } /** * Gets the standard deviation of the elements. It uses the online algorithm * shown here <a href= * "https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Online_algorithm"> * Wikipedia article of dec 3 2015 </a> * * @return the standard deviation of the elements, will return NaN if there * are less than 3 elements */ @Override public double getStdDev() { return fNbElements > 2 ? Math.sqrt(fVariance / (fNbElements - 1)) : Double.NaN; } @Override public double getTotal() { return fTotal; } @Override public void update(E object) { Long value = NonNullUtils.checkNotNull(fMapper.apply(object)); /* * Min and max are trivial, as well as number of segments */ fMin = value <= getMin() ? object : fMin; fMax = value >= getMax() ? object : fMax; fNbElements++; /* * The running mean is not trivial, see proof in javadoc. * * TODO: Check if saturated math would be required here */ double delta = value - fMean; fMean += delta / fNbElements; fVariance += delta * (value - fMean); fTotal += value; } @Override public void merge(IStatistics<E> o) { if (!(o instanceof Statistics)) { throw new IllegalArgumentException("Can only merge statistics of the same class"); //$NON-NLS-1$ } Statistics<E> other = (Statistics<E>) o; if (other.fNbElements == 0) { return; } else if (fNbElements == 0) { copy(other); } else if (other.fNbElements == 1) { update(NonNullUtils.checkNotNull(other.getMaxObject())); } else if (fNbElements == 1) { Statistics<E> copyOther = new Statistics<>(fMapper); copyOther.copy(other); copyOther.update(NonNullUtils.checkNotNull(getMaxObject())); copy(copyOther); } else { internalMerge(other); } } private void internalMerge(Statistics<E> other) { /* * TODO: Check if saturated math would be required in this method * * Min and max are trivial, as well as number of segments */ long min = getMin(); long max = getMax(); fMin = other.getMin() <= min ? other.getMinObject() : fMin; fMax = other.getMax() >= max ? other.getMaxObject() : fMax; long oldNbSeg = fNbElements; double oldAverage = fMean; long otherSegments = other.getNbElements(); double otherAverage = other.getMean(); fNbElements += otherSegments; fTotal += other.getTotal(); /* * Average is a weighted average */ fMean = ((oldNbSeg * oldAverage) + (otherAverage * otherSegments)) / fNbElements; /* * This one is a bit tricky. * * The variance is the sum of the deltas from a mean squared. * * So if we add the old mean squared back to to variance and remove the * new mean, the standard deviation can be easily calculated. */ double avg1Sq = oldAverage * oldAverage; double avg2sq = otherAverage * otherAverage; double avgtSq = fMean * fMean; /* * This is a tricky part, bear in mind that the set is not continuous * but discrete, Therefore, we have for n elements, n-1 intervals * between them. Ergo, n-1 intervals are used for divisions and * multiplications. */ double variance1 = fVariance / (oldNbSeg - 1); double variance2 = other.fVariance / (otherSegments - 1); fVariance = ((variance1 + avg1Sq - avgtSq) * (oldNbSeg - 1) + (variance2 + avg2sq - avgtSq) * (otherSegments - 1)); } private void copy(Statistics<E> copyOther) { fMean = copyOther.fMean; fMax = copyOther.fMax; fMin = copyOther.fMin; fNbElements = copyOther.fNbElements; fTotal = copyOther.fTotal; fVariance = copyOther.fVariance; } @Override public String toString() { return this.getClass() + ": Avg: " + getMean() + " on " + getNbElements() + " elements"; //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ } }