/* * The MIT License * * Copyright (c) 2011 The Broad Institute * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal * in the Software without restriction, including without limitation the rights * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell * copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN * THE SOFTWARE. */ package picard.util; import java.math.BigDecimal; import java.util.Arrays; import static java.lang.Math.log1p; import static java.lang.Math.pow; /** * General math utilities * * @author Tim Fennell */ public class MathUtil { /** The double value closest to 1 while still being less than 1. */ public static final double MAX_PROB_BELOW_ONE = 0.9999999999999999d; /** * this function mimics the behavior of log_1p but resulting in log _base 10_ of (1+x) instead of natural log of 1+x */ public static double log10_1p(final double x){ return log1p(x)/LOG_10_MATH.log_of_base; } /** Calculated the mean of an array of doubles. */ public static double mean(final double[] in, final int start, final int stop) { double total = 0; for (int i = start; i < stop; ++i) { total += in[i]; } return total / (stop - start); } /** Calculated the standard deviation of an array of doubles. */ public static double stddev(final double[] in, final int start, final int length) { return stddev(in, start, length, mean(in, start, length)); } /** Calculated the standard deviation of an array of doubles. */ public static double stddev(final double[] in, final int start, final int stop, final double mean) { double total = 0; for (int i = start; i < stop; ++i) { total += (in[i] * in[i]); } return Math.sqrt((total / (stop - start)) - (mean * mean)); } public static int compare(final int v1, final int v2) { return (v1 < v2 ? -1 : (v1 == v2 ? 0 : 1)); } /** Calculate the median of an array of doubles. Assumes that the input is sorted */ public static double median(final double... in) { if (in.length == 0) { throw new IllegalArgumentException("Attempting to find the median of an empty array"); } final double[] data = Arrays.copyOf(in, in.length); Arrays.sort(data); final int middle = data.length / 2; return data.length % 2 == 1 ? data[middle] : (data[middle - 1] + data[middle]) / 2.0; } /** * Obtains percentage of two Longs * @param numerator dividend * @param denominator divisor * @return numerator/(double)denominator if both are non-null and denominator != 0, else returns null. */ public static Double percentageOrNull(final Long numerator, final Long denominator) { if (numerator != null && denominator != null && denominator != 0) { return numerator.doubleValue() / denominator.doubleValue(); } else { return null; } } /** * Round off the value to the specified precision. */ public static double round(final double num, final int precision) { BigDecimal bd = new BigDecimal(num); bd = bd.setScale(precision, BigDecimal.ROUND_HALF_UP); return bd.doubleValue(); } /** Returns the largest value stored in the array. */ public static double max(final double[] nums) { return nums[indexOfMax(nums)]; } /** * Returns the index of the largest element in the array. If there are multiple equal maxima then * the earliest one in the array is returned. */ public static int indexOfMax(final double[] nums) { double max = nums[0]; int index = 0; for (int i = 1; i < nums.length; ++i) { if (nums[i] > max) { max = nums[i]; index = i; } } return index; } /** Returns the smallest value stored in the array. */ public static double min(final double[] nums) { double min = nums[0]; for (int i = 1; i < nums.length; ++i) { if (nums[i] < min) min = nums[i]; } return min; } /** Mimic's R's seq() function to produce a sequence of equally spaced numbers. */ public static double[] seq(final double from, final double to, final double by) { if (from < to && by <= 0) return new double[0]; if (from > to && by >= 0) return new double[0]; final int values = 1 + (int) Math.floor((to - from) / by); final double[] results = new double[values]; BigDecimal value = new BigDecimal(from); BigDecimal increment = new BigDecimal(by); for (int i=0; i<values; ++i) { results[i] = value.doubleValue(); value = value.add(increment); } return results; } /** "Promotes" an int[] into a double array with the same values (or as close as precision allows). */ public static double[] promote(final int[] is) { final double[] ds = new double[is.length]; for (int i = 0; i < is.length; ++i) ds[i] = is[i]; return ds; } @Deprecated // use pNormalizeLogProbability instead (renamed) public static double[] logLikelihoodsToProbs(final double[] likelihoods) { return pNormalizeLogProbability(likelihoods); } /** * Takes a complete set of mutually exclusive logPosteriors and converts them to probabilities * that sum to 1 with as much fidelity as possible. Limits probabilities to be in the space: * 0.9999999999999999 >= p >= (1-0.9999999999999999)/(lPosteriors.length-1) */ public static double[] pNormalizeLogProbability(final double[] lPosterior) { // Note: bumping all the LLs so that the biggest is 300 ensures that we have the // widest range possible when unlogging them before one of them underflows. 10^300 is // near the maximum before you hit positive infinity. final double maxLikelihood = max(lPosterior); final double bump = 300 - maxLikelihood; final double[] tmp = new double[lPosterior.length]; double total = 0; for (int i = 0; i < lPosterior.length; ++i) { tmp[i] = pow(10, lPosterior[i] + bump); total += tmp[i]; } final double maxP = MAX_PROB_BELOW_ONE; final double minP = (1 - MAX_PROB_BELOW_ONE) / (tmp.length - 1); for (int i = 0; i < lPosterior.length; ++i) { tmp[i] /= total; if (tmp[i] > maxP) tmp[i] = maxP; else if (tmp[i] < minP) tmp[i] = minP; } return tmp; } /** Calculates the ratio of two arrays of the same length. */ public static double[] divide(final double[] numerators, final double[] denominators) { if (numerators.length != denominators.length) throw new IllegalArgumentException("Arrays must be of same length."); final int len = numerators.length; final double[] result = new double[len]; for (int i = 0; i < len; ++i) result[i] = numerators[i] / denominators[i]; return result; } /** * Takes a vector of numbers and converts it to a vector of probabilities * that sum to 1 with as much fidelity as possible. Limits probabilities to be in the space: * 0.9999999999999999 >= p >= (1-0.9999999999999999)/(likelihoods.length-1) */ public static double[] pNormalizeVector(final double[] pPosterior) { final double[] tmp = new double[pPosterior.length]; final double total = sum(pPosterior); final double maxP = MAX_PROB_BELOW_ONE; final double minP = (1 - MAX_PROB_BELOW_ONE) / (tmp.length - 1); for (int i = 0; i < pPosterior.length; ++i) { tmp[i] = pPosterior[i] / total; if (tmp[i] > maxP) tmp[i] = maxP; else if (tmp[i] < minP) tmp[i] = minP; } return tmp; } /** Calculates the product of two arrays of the same length. */ public static double[] multiply(final double[] lhs, final double[] rhs) { if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length."); final int len = lhs.length; final double[] result = new double[len]; for (int i = 0; i < len; ++i) result[i] = lhs[i] * rhs[i]; return result; } /** calculates the sum of the arrays as a third array. */ public static double[] sum(final double[] lhs, final double[] rhs) { if (lhs.length != rhs.length) throw new IllegalArgumentException("Arrays must be of same length."); final int len = lhs.length; final double [] result = new double[len]; for (int i = 0; i < len; ++i) result[i] = lhs[i] + rhs[i]; return result; } /** Returns the sum of the elements in the array. */ public static double sum(final double[] arr) { double result = 0; for (final double next : arr) result += next; return result; } /** Returns the sum of the elements in the array starting with start and ending before stop. */ public static long sum(final long[] arr, final int start, final int stop) { long result = 0; for (int i=start; i<stop; ++i) { result += arr[i]; } return result; } public static final LogMath LOG_2_MATH = new LogMath(2); public static final LogMath NATURAL_LOG_MATH = new LogMath(Math.exp(1)) { @Override public double getLogValue(final double nonLogValue) { return Math.log(nonLogValue); } }; public static final LogMath LOG_10_MATH = new LogMath(10) { @Override public double getLogValue(final double nonLogValue) { return Math.log10(nonLogValue); } }; /** * A collection of common math operations that work with log values. To use it, pass values from log space, the operation will be * computed in non-log space, and a value in log space will be returned. */ public static class LogMath { private final double base; private final double log_of_base; public double getLog_of_base() { return log_of_base; } private LogMath(final double base) { this.base = base; this.log_of_base=Math.log(base); } /** Returns the decimal representation of the provided log values. */ public double getNonLogValue(final double logValue) { return Math.pow(base, logValue); } /** Returns the log-representation of the provided decimal value. */ public double getLogValue(final double nonLogValue) { return Math.log(nonLogValue) / log_of_base; } /** Computes the mean of the provided log values. */ public double mean(final double... logValues) { return sum(logValues) - getLogValue(logValues.length); } /** Computes the sum of the provided log values. */ public double sum(final double... logValues) { // Avoid overflow via scaling. final double scalingFactor = max(logValues); double simpleAdditionResult = 0; for (final double v : logValues) { simpleAdditionResult += getNonLogValue(v - scalingFactor); } return getLogValue(simpleAdditionResult) + scalingFactor; } /** Computes the sum of the provided log values. */ public double product(final double... logValues) { return MathUtil.sum(logValues); } } }