// ============================================================================ // // Copyright (C) 2006-2016 Talend Inc. - www.talend.com // // This source code is available under agreement available at // %InstallDIR%\features\org.talend.rcp.branding.%PRODUCTNAME%\%PRODUCTNAME%license.txt // // You should have received a copy of the agreement // along with this program; if not, write to Talend SA // 9 rue Pages 92150 Suresnes, France // // ============================================================================ package org.talend.algorithms; import java.math.BigDecimal; import java.util.Collection; import java.util.Iterator; import java.util.Map; import java.util.Set; import java.util.TreeMap; /** * @author scorreia * * Utility class. */ public final class AlgoUtils { private AlgoUtils() { } /** * Method "incrementValueCounts". Stores in map the object as key, and counts how many times it appears. * * @param <T> the type of the key * @param value the value to store * @param valueToCount the map [object -> number of objects] * @return true if ok */ public static <T> boolean incrementValueCounts(T value, final Map<T, Long> valueToCount) { Long freq = valueToCount.get(value); if (freq == null) { freq = 0L; } freq++; valueToCount.put(value, freq); return freq > 0; } /** * DOC scorreia Comment method "getMedian". * * @param totalCount * @param valueToCount * @return */ public static double getMedian(long totalCount, final TreeMap<Object, Long> valueToCount) { return getQuantile(totalCount, valueToCount, 1, 2); } /** * Method "getQuantile". * * @param totalCount the total number of values * @param valueToCount the frequency table of each value * @param nthQuantile the nth q-quantile (e.g. 3 for the upper quartile) * @param qQuantile the q-quantile (e.g. 4 when we compute quartile) * @return the nth quantile */ public static double getQuantile(long totalCount, final Map<Object, Long> valueToCount, int nthQuantile, int qQuantile) { double p = (double) nthQuantile / qQuantile; Set<Object> keys = valueToCount.keySet(); Collection<Long> counts = valueToCount.values(); double kthValue = totalCount * p; // TDQ-8185: set the default value is, becasue sometimes there is no data, 0 is not exactly double localMedian = Double.NaN; long sumCount = 0; Iterator<Object> keyIterator = keys.iterator(); Number searchedKey = null; for (Long curValue : counts) { // MOD msjian 2011-12-16 TDQ-4164: there is something wrong about this compute searchedKey = new BigDecimal(String.valueOf(keyIterator.next()).trim()); // CAST here sumCount += curValue; // TDQ-4164 ~ if (sumCount >= kthValue) { // compute median if (totalCount % qQuantile != 0) { // odd number of value, take the middle localMedian = extracted(searchedKey).doubleValue(); } else { // even number of values localMedian = extracted(searchedKey).doubleValue(); if (sumCount - kthValue < 1) { // in case there are not many identical values Number nextKey = (Number) keyIterator.next(); // CAST here if (Double.isNaN(localMedian)) { localMedian = (extracted(nextKey).doubleValue()) / 2; // with averaging } else { localMedian = (localMedian + extracted(nextKey).doubleValue()) / 2; // with averaging } } } break; // we got it. } // else loop } return localMedian; } private static Number extracted(Number searchedKey) { if (searchedKey != null) { return searchedKey; } return null; } }