package edu.stanford.nlp.math;
import java.io.IOException;
import java.text.NumberFormat;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Random;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.RuntimeInterruptedException;
import edu.stanford.nlp.util.StringUtils;
/**
* Class ArrayMath
*
* @author Teg Grenager
*/
public class ArrayMath {
private static final Random rand = new Random();
private ArrayMath() { } // not instantiable
// BASIC INFO -----------------------------------------------------------------
public static int numRows(double[] v) {
return v.length;
}
// GENERATION -----------------------------------------------------------------
/**
* Generate a range of integers from start (inclusive) to end (exclusive).
* Similar to the Python range() builtin function.
*
* @param start
* @param end
* @return integers from [start...end)
*/
public static int[] range(int start, int end) {
assert end > start;
int len = end - start;
int[] range = new int[len];
for (int i = 0; i < range.length; ++i) range[i] = i+start;
return range;
}
// CASTS ----------------------------------------------------------------------
public static float[] doubleArrayToFloatArray(double[] a) {
float[] result = new float[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = (float) a[i];
}
return result;
}
public static double[] floatArrayToDoubleArray(float[] a) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i];
}
return result;
}
public static double[][] floatArrayToDoubleArray(float[][] a) {
double[][] result = new double[a.length][];
for (int i = 0; i < a.length; i++) {
result[i] = new double[a[i].length];
for (int j = 0; j < a[i].length; j++) {
result[i][j] = a[i][j];
}
}
return result;
}
public static float[][] doubleArrayToFloatArray(double[][] a) {
float[][] result = new float[a.length][];
for (int i = 0; i < a.length; i++) {
result[i] = new float[a[i].length];
for (int j = 0; j < a[i].length; j++) {
result[i][j] = (float) a[i][j];
}
}
return result;
}
// OPERATIONS ON AN ARRAY - NONDESTRUCTIVE
public static double[] exp(double[] a) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = Math.exp(a[i]);
}
return result;
}
public static double[] log(double[] a) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = Math.log(a[i]);
}
return result;
}
// OPERATIONS ON AN ARRAY - DESTRUCTIVE
public static void expInPlace(double[] a) {
for (int i = 0; i < a.length; i++) {
a[i] = Math.exp(a[i]);
}
}
public static void logInPlace(double[] a) {
for (int i = 0; i < a.length; i++) {
a[i] = Math.log(a[i]);
}
}
public static double[] softmax(double[] scales) {
double[] newScales = new double[scales.length];
double sum = 0;
for (int i = 0; i < scales.length; i++) {
newScales[i] = Math.exp(scales[i]);
sum += newScales[i];
}
for (int i = 0; i < scales.length; i++) {
newScales[i] /= sum;
}
return newScales;
}
// OPERATIONS WITH SCALAR - DESTRUCTIVE
/**
* Increases the values in this array by b. Does it in place.
*
* @param a The array
* @param b The amount by which to increase each item
*/
public static void addInPlace(double[] a, double b) {
for (int i = 0; i < a.length; i++) {
a[i] = a[i] + b;
}
}
/**
* Increases the values in this array by b. Does it in place.
*
* @param a The array
* @param b The amount by which to increase each item
*/
public static void addInPlace(float[] a, double b) {
for (int i = 0; i < a.length; i++) {
a[i] = (float) (a[i] + b);
}
}
/**
* Add c times the array b to array a. Does it in place.
*/
public static void addMultInPlace(double[] a, double[] b, double c) {
for (int i=0; i<a.length; i++) {
a[i] += b[i] * c;
}
}
/**
* Scales the values in this array by b. Does it in place.
*/
public static void multiplyInPlace(double[] a, double b) {
for (int i = 0; i < a.length; i++) {
a[i] = a[i] * b;
}
}
/**
* Scales the values in this array by b. Does it in place.
*/
public static void multiplyInPlace(float[] a, double b) {
for (int i = 0; i < a.length; i++) {
a[i] = (float) (a[i] * b);
}
}
/**
* Divides the values in this array by b. Does it in place.
*/
public static void divideInPlace(double[] a, double b) {
for (int i = 0; i < a.length; i++) {
a[i] = a[i] / b;
}
}
/**
* Scales the values in this array by c.
*/
public static void powInPlace(double[] a, double c) {
for (int i = 0; i < a.length; i++) {
a[i] = Math.pow(a[i], c);
}
}
/**
* Sets the values in this array by to their value taken to cth power.
*/
public static void powInPlace(float[] a, float c) {
for (int i = 0; i < a.length; i++) {
a[i] = (float) Math.pow(a[i], c);
}
}
// OPERATIONS WITH SCALAR - NONDESTRUCTIVE
public static double[] add(double[] a, double c) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i] + c;
}
return result;
}
public static float[] add(float[] a, double c) {
float[] result = new float[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = (float) (a[i] + c);
}
return result;
}
/**
* Scales the values in this array by c.
*/
public static double[] multiply(double[] a, double c) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i] * c;
}
return result;
}
/**
* Scales the values in this array by c.
*/
public static float[] multiply(float[] a, float c) {
float[] result = new float[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i] * c;
}
return result;
}
/**
* raises each entry in array a by power c
*/
public static double[] pow(double[] a, double c) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = Math.pow(a[i], c);
}
return result;
}
/**
* raises each entry in array a by power c
*/
public static float[] pow(float[] a, float c) {
float[] result = new float[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = (float) Math.pow(a[i], c);
}
return result;
}
// OPERATIONS WITH TWO ARRAYS - DESTRUCTIVE
public static void pairwiseAddInPlace(float[] to, float[] from) {
if (to.length != from.length) {
throw new RuntimeException("to length:" + to.length + " from length:" + from.length);
}
for (int i = 0; i < to.length; i++) {
to[i] = to[i] + from[i];
}
}
public static void pairwiseAddInPlace(double[] to, double[] from) {
if (to.length != from.length) {
throw new RuntimeException("to length:" + to.length + " from length:" + from.length);
}
for (int i = 0; i < to.length; i++) {
to[i] = to[i] + from[i];
}
}
public static void pairwiseAddInPlace(double[] to, int[] from) {
if (to.length != from.length) {
throw new RuntimeException();
}
for (int i = 0; i < to.length; i++) {
to[i] = to[i] + from[i];
}
}
public static void pairwiseAddInPlace(double[] to, short[] from) {
if (to.length != from.length) {
throw new RuntimeException();
}
for (int i = 0; i < to.length; i++) {
to[i] = to[i] + from[i];
}
}
public static void pairwiseSubtractInPlace(double[] to, double[] from) {
if (to.length != from.length) {
throw new RuntimeException();
}
for (int i = 0; i < to.length; i++) {
to[i] = to[i] - from[i];
}
}
public static void pairwiseScaleAddInPlace(double[] to, double[] from, double fromScale) {
if (to.length != from.length) {
throw new RuntimeException();
}
for (int i = 0; i < to.length; i++) {
to[i] = to[i] + fromScale * from[i];
}
}
// OPERATIONS WITH TWO ARRAYS - NONDESTRUCTIVE
public static int[] pairwiseAdd(int[] a, int[] b) {
int[] result = new int[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i] + b[i];
}
return result;
}
public static double[] pairwiseAdd(double[] a, double[] b) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
if (i < b.length) {
result[i] = a[i] + b[i];
} else {
result[i] = a[i];
}
}
return result;
}
public static float[] pairwiseAdd(float[] a, float[] b) {
float[] result = new float[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i] + b[i];
}
return result;
}
public static double[] pairwiseScaleAdd(double[] a, double[] b, double bScale) {
double[] result = new double[a.length];
for (int i = 0; i < a.length; i++) {
result[i] = a[i] + bScale * b[i];
}
return result;
}
public static double[] pairwiseSubtract(double[] a, double[] b) {
double[] c = new double[a.length];
for (int i = 0; i < a.length; i++) {
c[i] = a[i] - b[i];
}
return c;
}
public static float[] pairwiseSubtract(float[] a, float[] b) {
float[] c = new float[a.length];
for (int i = 0; i < a.length; i++) {
c[i] = a[i] - b[i];
}
return c;
}
/**
* Assumes that both arrays have same length.
*/
public static double dotProduct(double[] a, double[] b) {
if (a.length != b.length) {
throw new RuntimeException("Can't calculate dot product of multiple different lengths: a.length=" + a.length + " b.length=" + b.length);
}
double result = 0;
for (int i = 0; i < a.length; i++) {
result += a[i] * b[i];
}
return result;
}
/**
* Assumes that both arrays have same length.
*/
public static double[] pairwiseMultiply(double[] a, double[] b) {
if (a.length != b.length) {
throw new RuntimeException("Can't pairwise multiple different lengths: a.length=" + a.length + " b.length=" + b.length);
}
double[] result = new double[a.length];
for (int i = 0; i < result.length; i++) {
result[i] = a[i] * b[i];
}
return result;
}
/**
* Assumes that both arrays have same length.
*/
public static float[] pairwiseMultiply(float[] a, float[] b) {
if (a.length != b.length) {
throw new RuntimeException();
}
float[] result = new float[a.length];
for (int i = 0; i < result.length; i++) {
result[i] = a[i] * b[i];
}
return result;
}
/**
* Puts the result in the result array.
* Assumes that all arrays have same length.
*/
public static void pairwiseMultiply(double[] a, double[] b, double[] result) {
if (a.length != b.length) {
throw new RuntimeException();
}
for (int i = 0; i < result.length; i++) {
result[i] = a[i] * b[i];
}
}
/**
* Puts the result in the result array.
* Assumes that all arrays have same length.
*/
public static void pairwiseMultiply(float[] a, float[] b, float[] result) {
if (a.length != b.length) {
throw new RuntimeException();
}
for (int i = 0; i < result.length; i++) {
result[i] = a[i] * b[i];
}
}
/**
* Divide the first array by the second elementwise,
* and store results in place. Assume arrays have
* the same length
*/
public static void pairwiseDivideInPlace(double[] a, double[] b) {
if (a.length != b.length) {
throw new RuntimeException();
}
for (int i = 0; i < a.length; i++) {
a[i] = a[i] / b[i];
}
}
// ERROR CHECKING
public static boolean hasNaN(double[] a) {
for (double x : a) {
if (Double.isNaN(x)) return true;
}
return false;
}
public static boolean hasInfinite(double[] a) {
for (double anA : a) {
if (Double.isInfinite(anA)) return true;
}
return false;
}
public static boolean hasNaN(float[] a) {
for (float x : a) {
if (Float.isNaN(x)) return true;
}
return false;
}
// methods for filtering vectors ------------------------------------------
public static int countNaN(double[] v) {
int c = 0;
for (double d : v) {
if (Double.isNaN(d)) {
c++;
}
}
return c;
}
public static double[] filterNaN(double[] v) {
double[] u = new double[numRows(v) - countNaN(v)];
int j = 0;
for (double d : v) {
if ( ! Double.isNaN(d)) {
u[j++] = d;
}
}
return u;
}
public static int countInfinite(double[] v) {
int c = 0;
for (double aV : v)
if (Double.isInfinite(aV))
c++;
return c;
}
public static int countNonZero(double[] v) {
int c = 0;
for (double aV : v)
if (aV != 0.0)
++c;
return c;
}
public static int countCloseToZero(double[] v, double epsilon) {
int c = 0;
for (double aV : v)
if (Math.abs(aV) < epsilon)
++c;
return c;
}
public static int countPositive(double[] v) {
int c = 0;
for (double a : v) {
if (a > 0.0) {
++c;
}
}
return c;
}
public static int countNegative(double[] v) {
int c = 0;
for (double aV : v)
if (aV < 0.0)
++c;
return c;
}
public static double[] filterInfinite(double[] v) {
double[] u = new double[numRows(v) - countInfinite(v)];
int j = 0;
for (double aV : v) {
if (!Double.isInfinite(aV)) {
u[j++] = aV;
}
}
return u;
}
public static double[] filterNaNAndInfinite(double[] v) {
return filterInfinite(filterNaN(v));
}
// VECTOR PROPERTIES
/**
* Returns the sum of an array of numbers.
*/
public static double sum(double[] a) {
return sum(a,0,a.length);
}
/**
* Returns the sum of the portion of an array of numbers between
* <code>fromIndex</code>, inclusive, and <code>toIndex</code>, exclusive.
* Returns 0 if <code>fromIndex</code> >= <code>toIndex</code>.
*/
public static double sum(double[] a, int fromIndex, int toIndex) {
double result = 0.0;
for (int i = fromIndex; i < toIndex; i++) {
result += a[i];
}
return result;
}
public static int sum(int[] a) {
int result = 0;
for (int i : a) {
result += i;
}
return result;
}
public static float sum(float[] a) {
float result = 0.0F;
for (float f : a) {
result += f;
}
return result;
}
public static int sum(int[][] a) {
int result = 0;
for (int[] v : a) {
for (int item : v) {
result += item;
}
}
return result;
}
/**
* Returns diagonal elements of the given (square) matrix.
*/
public static int[] diag(int[][] a) {
int[] rv = new int[a.length];
for (int i = 0; i < a.length; i++) {
rv[i] = a[i][i];
}
return rv;
}
public static double average(double[] a) {
double total = ArrayMath.sum(a);
return total / a.length;
}
/** This version avoids any possibility of overflow. */
public static double iterativeAverage(double[] a) {
double avg = 0.0;
int t = 1;
for (double x : a) {
avg += (x - avg) / t;
t++;
}
return avg;
}
/**
* Computes inf-norm of vector.
* This is just the largest absolute value of an element.
*
* @param a Array of double
* @return inf-norm of a
*/
public static double norm_inf(double[] a) {
double max = Double.NEGATIVE_INFINITY;
for (double d : a) {
if (Math.abs(d) > max) {
max = Math.abs(d);
}
}
return max;
}
/**
* Computes inf-norm of vector.
*
* @return inf-norm of a
*/
public static double norm_inf(float[] a) {
double max = Double.NEGATIVE_INFINITY;
for (float anA : a) {
if (Math.abs(anA) > max) {
max = Math.abs(anA);
}
}
return max;
}
/**
* Computes 1-norm of vector.
*
* @param a A vector of double
* @return 1-norm of a
*/
public static double norm_1(double[] a) {
double sum = 0;
for (double anA : a) {
sum += (anA < 0 ? -anA : anA);
}
return sum;
}
/**
* Computes 1-norm of vector.
*
* @param a A vector of floats
* @return 1-norm of a
*/
public static double norm_1(float[] a) {
double sum = 0;
for (float anA : a) {
sum += (anA < 0 ? -anA : anA);
}
return sum;
}
/**
* Computes 2-norm of vector.
*
* @param a A vector of double
* @return Euclidean norm of a
*/
public static double norm(double[] a) {
double squaredSum = 0;
for (double anA : a) {
squaredSum += anA * anA;
}
return Math.sqrt(squaredSum);
}
/**
* Computes 2-norm of vector.
*
* @param a A vector of floats
* @return Euclidean norm of a
*/
public static double norm(float[] a) {
double squaredSum = 0;
for (float anA : a) {
squaredSum += anA * anA;
}
return Math.sqrt(squaredSum);
}
/**
* @return the index of the max value; if max is a tie, returns the first one.
*/
public static int argmax(double[] a) {
double max = Double.NEGATIVE_INFINITY;
int argmax = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] > max) {
max = a[i];
argmax = i;
}
}
return argmax;
}
/**
* @return the index of the max value; if max is a tie, returns the last one.
*/
public static int argmax_tieLast(double[] a) {
double max = Double.NEGATIVE_INFINITY;
int argmax = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] >= max) {
max = a[i];
argmax = i;
}
}
return argmax;
}
public static double max(double[] a) {
return a[argmax(a)];
}
public static double max(Collection<Double> a) {
double max = Double.NEGATIVE_INFINITY;
for (double d : a) {
if (d > max) { max = d; }
}
return max;
}
/**
* @return the index of the max value; if max is a tie, returns the first one.
*/
public static int argmax(float[] a) {
float max = Float.NEGATIVE_INFINITY;
int argmax = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] > max) {
max = a[i];
argmax = i;
}
}
return argmax;
}
public static float max(float[] a) {
return a[argmax(a)];
}
/**
* @return the index of the min value; if min is a tie, returns the first one.
*/
public static int argmin(double[] a) {
double min = Double.POSITIVE_INFINITY;
int argmin = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] < min) {
min = a[i];
argmin = i;
}
}
return argmin;
}
/**
* @return The minimum value in an array.
*/
public static double min(double[] a) {
return a[argmin(a)];
}
/**
* Returns the smallest value in a vector of doubles. Any values which
* are NaN or infinite are ignored. If the vector is empty, 0.0 is
* returned.
*/
public static double safeMin(double[] v) {
double[] u = filterNaNAndInfinite(v);
if (numRows(u) == 0) return 0.0;
return min(u);
}
/**
* @return the index of the min value; if min is a tie, returns the first one.
*/
public static int argmin(float[] a) {
float min = Float.POSITIVE_INFINITY;
int argmin = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] < min) {
min = a[i];
argmin = i;
}
}
return argmin;
}
public static float min(float[] a) {
return a[argmin(a)];
}
/**
* @return the index of the min value; if min is a tie, returns the first one.
*/
public static int argmin(int[] a) {
int min = Integer.MAX_VALUE;
int argmin = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] < min) {
min = a[i];
argmin = i;
}
}
return argmin;
}
public static int min(int[] a) {
return a[argmin(a)];
}
/**
* @return the index of the max value; if max is a tie, returns the first one.
*/
public static int argmax(int[] a) {
int max = Integer.MIN_VALUE;
int argmax = 0;
for (int i = 0; i < a.length; i++) {
if (a[i] > max) {
max = a[i];
argmax = i;
}
}
return argmax;
}
public static int max(int[] a) {
return a[argmax(a)];
}
/** Returns the smallest element of the matrix */
public static int min(int[][] matrix) {
int min = Integer.MAX_VALUE;
for (int[] row : matrix) {
for (int elem : row) {
min = Math.min(min, elem);
}
}
return min;
}
/** Returns the smallest element of the matrix */
public static int max(int[][] matrix) {
int max = Integer.MIN_VALUE;
for (int[] row : matrix) {
for (int elem : row) {
max = Math.max(max, elem);
}
}
return max;
}
/**
* Returns the largest value in a vector of doubles. Any values which
* are NaN or infinite are ignored. If the vector is empty, 0.0 is
* returned.
*/
public static double safeMax(double[] v) {
double[] u = filterNaNAndInfinite(v);
if (numRows(u) == 0) return 0.0;
return max(u);
}
/**
* Returns the log of the sum of an array of numbers, which are
* themselves input in log form. This is all natural logarithms.
* Reasonable care is taken to do this as efficiently as possible
* (under the assumption that the numbers might differ greatly in
* magnitude), with high accuracy, and without numerical overflow.
*
* @param logInputs An array of numbers [log(x1), ..., log(xn)]
* @return {@literal log(x1 + ... + xn)}
*/
public static double logSum(double... logInputs) {
return logSum(logInputs,0,logInputs.length);
}
/**
* Returns the log of the portion between <code>fromIndex</code>, inclusive, and
* <code>toIndex</code>, exclusive, of an array of numbers, which are
* themselves input in log form. This is all natural logarithms.
* Reasonable care is taken to do this as efficiently as possible
* (under the assumption that the numbers might differ greatly in
* magnitude), with high accuracy, and without numerical overflow. Throws an
* {@link IllegalArgumentException} if <code>logInputs</code> is of length zero.
* Otherwise, returns Double.NEGATIVE_INFINITY if <code>fromIndex</code> >=
* <code>toIndex</code>.
*
* @param logInputs An array of numbers [log(x1), ..., log(xn)]
* @param fromIndex The array index to start the sum from
* @param toIndex The array index after the last element to be summed
* @return {@literal log(x1 + ... + xn)}
*/
public static double logSum(double[] logInputs, int fromIndex, int toIndex) {
if (Thread.interrupted()) { // A good place to check for interrupts -- many functions call this
throw new RuntimeInterruptedException();
}
if (logInputs.length == 0)
throw new IllegalArgumentException();
if(fromIndex >= 0 && toIndex < logInputs.length && fromIndex >= toIndex)
return Double.NEGATIVE_INFINITY;
int maxIdx = fromIndex;
double max = logInputs[fromIndex];
for (int i = fromIndex+1; i < toIndex; i++) {
if (logInputs[i] > max) {
maxIdx = i;
max = logInputs[i];
}
}
boolean haveTerms = false;
double intermediate = 0.0;
double cutoff = max - SloppyMath.LOGTOLERANCE;
// we avoid rearranging the array and so test indices each time!
for (int i = fromIndex; i < toIndex; i++) {
if (i != maxIdx && logInputs[i] > cutoff) {
haveTerms = true;
intermediate += Math.exp(logInputs[i] - max);
}
}
if (haveTerms) {
return max + Math.log(1.0 + intermediate);
} else {
return max;
}
}
/**
* Returns the log of the portion between <code>fromIndex</code>, inclusive, and
* <code>toIndex</code>, exclusive, of an array of numbers, which are
* themselves input in log form. This is all natural logarithms.
* This version incorporates a stride, so you can sum only select numbers.
* Reasonable care is taken to do this as efficiently as possible
* (under the assumption that the numbers might differ greatly in
* magnitude), with high accuracy, and without numerical overflow. Throws an
* {@link IllegalArgumentException} if <code>logInputs</code> is of length zero.
* Otherwise, returns Double.NEGATIVE_INFINITY if <code>fromIndex</code> >=
* <code>toIndex</code>.
*
* @param logInputs An array of numbers [log(x1), ..., log(xn)]
* @param fromIndex The array index to start the sum from
* @param afterIndex The array index after the last element to be summed
* @return {@literal log(x1 + ... + xn)}
*/
public static double logSum(double[] logInputs, int fromIndex, int afterIndex, int stride) {
if (logInputs.length == 0)
throw new IllegalArgumentException();
if (fromIndex >= 0 && afterIndex < logInputs.length && fromIndex >= afterIndex)
return Double.NEGATIVE_INFINITY;
int maxIdx = fromIndex;
double max = logInputs[fromIndex];
for (int i = fromIndex + stride; i < afterIndex; i += stride) {
if (logInputs[i] > max) {
maxIdx = i;
max = logInputs[i];
}
}
boolean haveTerms = false;
double intermediate = 0.0;
double cutoff = max - SloppyMath.LOGTOLERANCE;
// we avoid rearranging the array and so test indices each time!
for (int i = fromIndex; i < afterIndex; i += stride) {
if (i != maxIdx && logInputs[i] > cutoff) {
haveTerms = true;
intermediate += Math.exp(logInputs[i] - max);
}
}
if (haveTerms) {
return max + Math.log(1.0 + intermediate); // using Math.log1p(intermediate) may be more accurate, but is slower
} else {
return max;
}
}
public static double logSum(List<Double> logInputs) {
return logSum(logInputs, 0, logInputs.size());
}
public static double logSum(List<Double> logInputs, int fromIndex, int toIndex) {
int length = logInputs.size();
if (length == 0)
throw new IllegalArgumentException();
if(fromIndex >= 0 && toIndex < length && fromIndex >= toIndex)
return Double.NEGATIVE_INFINITY;
int maxIdx = fromIndex;
double max = logInputs.get(fromIndex);
for (int i = fromIndex+1; i < toIndex; i++) {
double d = logInputs.get(i);
if (d > max) {
maxIdx = i;
max = d;
}
}
boolean haveTerms = false;
double intermediate = 0.0;
double cutoff = max - SloppyMath.LOGTOLERANCE;
// we avoid rearranging the array and so test indices each time!
for (int i = fromIndex; i < toIndex; i++) {
double d = logInputs.get(i);
if (i != maxIdx && d > cutoff) {
haveTerms = true;
intermediate += Math.exp(d - max);
}
}
if (haveTerms) {
return max + Math.log(1.0 + intermediate);
} else {
return max;
}
}
/**
* Returns the log of the sum of an array of numbers, which are
* themselves input in log form. This is all natural logarithms.
* Reasonable care is taken to do this as efficiently as possible
* (under the assumption that the numbers might differ greatly in
* magnitude), with high accuracy, and without numerical overflow.
*
* @param logInputs An array of numbers [log(x1), ..., log(xn)]
* @return log(x1 + ... + xn)
*/
public static float logSum(float[] logInputs) {
int leng = logInputs.length;
if (leng == 0) {
throw new IllegalArgumentException();
}
int maxIdx = 0;
float max = logInputs[0];
for (int i = 1; i < leng; i++) {
if (logInputs[i] > max) {
maxIdx = i;
max = logInputs[i];
}
}
boolean haveTerms = false;
double intermediate = 0.0f;
float cutoff = max - SloppyMath.LOGTOLERANCE_F;
// we avoid rearranging the array and so test indices each time!
for (int i = 0; i < leng; i++) {
if (i != maxIdx && logInputs[i] > cutoff) {
haveTerms = true;
intermediate += Math.exp(logInputs[i] - max);
}
}
if (haveTerms) {
return max + (float) Math.log(1.0 + intermediate);
} else {
return max;
}
}
// LINEAR ALGEBRAIC FUNCTIONS
public static double innerProduct(double[] a, double[] b) {
double result = 0.0;
int len = Math.min(a.length, b.length);
for (int i = 0; i < len; i++) {
result += a[i] * b[i];
}
return result;
}
public static double innerProduct(float[] a, float[] b) {
double result = 0.0;
int len = Math.min(a.length, b.length);
for (int i = 0; i < len; i++) {
result += a[i] * b[i];
}
return result;
}
// UTILITIES
public static double[][] load2DMatrixFromFile(String filename) throws IOException {
String s = IOUtils.slurpFile(filename);
String[] rows = s.split("[\r\n]+");
double[][] result = new double[rows.length][];
for (int i=0; i<result.length; i++) {
String[] columns = rows[i].split("\\s+");
result[i] = new double[columns.length];
for (int j=0; j<result[i].length; j++) {
result[i][j] = Double.parseDouble(columns[j]);
}
}
return result;
}
public static Integer[] box(int[] assignment) {
Integer[] result = new Integer[assignment.length];
for (int i=0; i<assignment.length; i++) {
result[i] = Integer.valueOf(assignment[i]);
}
return result;
}
public static int[] unboxToInt(Collection<Integer> list) {
int[] result = new int[list.size()];
int i = 0;
for (int v : list) {
result[i++] = v;
}
return result;
}
public static Double[] box(double[] assignment) {
Double[] result = new Double[assignment.length];
for (int i=0; i<assignment.length; i++) {
result[i] = Double.valueOf(assignment[i]);
}
return result;
}
public static double[] unbox(Collection<Double> list) {
double[] result = new double[list.size()];
int i = 0;
for (double v : list) {
result[i++] = v;
}
return result;
}
public static int indexOf(int n, int[] a) {
for (int i=0; i<a.length; i++) {
if (a[i]==n) return i;
}
return -1;
}
public static int[][] castToInt(double[][] doubleCounts) {
int[][] result = new int[doubleCounts.length][];
for (int i=0; i<doubleCounts.length; i++) {
result[i] = new int[doubleCounts[i].length];
for (int j=0; j<doubleCounts[i].length; j++) {
result[i][j] = (int) doubleCounts[i][j];
}
}
return result;
}
// PROBABILITY FUNCTIONS
/**
* Makes the values in this array sum to 1.0. Does it in place.
* If the total is 0.0 or NaN, throws an RuntimeException.
*/
public static void normalize(double[] a) {
double total = sum(a);
if (total == 0.0 || Double.isNaN(total)) {
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(a));
}
multiplyInPlace(a, 1.0/total); // divide each value by total
}
public static void L1normalize(double[] a) {
double total = L1Norm(a);
if (total == 0.0 || Double.isNaN(total)) {
if (a.length < 100) {
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(a));
} else {
double[] aTrunc = new double[100];
System.arraycopy(a, 0, aTrunc, 0, 100);
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(aTrunc) + " ... ");
}
}
multiplyInPlace(a, 1.0/total); // divide each value by total
}
public static void L2normalize(double[] a) {
double total = L2Norm(a);
if (total == 0.0 || Double.isNaN(total)) {
if (a.length < 100) {
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(a));
} else {
double[] aTrunc = new double[100];
System.arraycopy(a, 0, aTrunc, 0, 100);
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(aTrunc) + " ... ");
}
}
multiplyInPlace(a, 1.0/total); // divide each value by total
}
/**
* Makes the values in this array sum to 1.0. Does it in place.
* If the total is 0.0 or NaN, throws an RuntimeException.
*/
public static void normalize(float[] a) {
float total = sum(a);
if (total == 0.0f || Double.isNaN(total)) {
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN");
}
multiplyInPlace(a, 1.0f/total); // divide each value by total
}
public static void L2normalize(float[] a) {
float total = L2Norm(a);
if (total == 0.0 || Float.isNaN(total)) {
if (a.length < 100) {
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(a));
} else {
float[] aTrunc = new float[100];
System.arraycopy(a, 0, aTrunc, 0, 100);
throw new RuntimeException("Can't normalize an array with sum 0.0 or NaN: " + Arrays.toString(aTrunc) + " ... ");
}
}
multiplyInPlace(a, 1.0/total); // divide each value by total
}
/**
* Standardize values in this array, i.e., subtract the mean and divide by the standard deviation.
* If standard deviation is 0.0, throws a RuntimeException.
*/
public static void standardize(double[] a) {
double m = mean(a);
if (Double.isNaN(m)) {
throw new RuntimeException("Can't standardize array whose mean is NaN");
}
double s = stdev(a);
if (s == 0.0 || Double.isNaN(s)) {
throw new RuntimeException("Can't standardize array whose standard deviation is 0.0 or NaN");
}
addInPlace(a, -m); // subtract mean
multiplyInPlace(a, 1.0/s); // divide by standard deviation
}
public static double L2Norm(double[] a) {
double result = 0.0;
for(double d: a) {
result += Math.pow(d,2);
}
return Math.sqrt(result);
}
public static float L2Norm(float[] a) {
double result = 0;
for(float d: a) {
result += Math.pow(d,2);
}
return (float) Math.sqrt(result);
}
public static double L1Norm(double[] a) {
double result = 0.0;
for (double d: a) {
result += Math.abs(d);
}
return result;
}
/**
* Makes the values in this array sum to 1.0. Does it in place.
* If the total is 0.0, throws a RuntimeException.
* If the total is Double.NEGATIVE_INFINITY, then it replaces the
* array with a normalized uniform distribution. CDM: This last bit is
* weird! Do we really want that?
*/
public static void logNormalize(double[] a) {
double logTotal = logSum(a);
if (logTotal == Double.NEGATIVE_INFINITY) {
// to avoid NaN values
double v = -Math.log(a.length);
for (int i = 0; i < a.length; i++) {
a[i] = v;
}
return;
}
addInPlace(a, -logTotal); // subtract log total from each value
}
/**
* Samples from the distribution over values 0 through d.length given by d.
* Assumes that the distribution sums to 1.0.
*
* @param d the distribution to sample from
* @return a value from 0 to d.length
*/
public static int sampleFromDistribution(double[] d) {
return sampleFromDistribution(d, rand);
}
/**
* Samples from the distribution over values 0 through d.length given by d.
* Assumes that the distribution sums to 1.0.
*
* @param d the distribution to sample from
* @return a value from 0 to d.length
*/
public static int sampleFromDistribution(double[] d, Random random) {
// sample from the uniform [0,1]
double r = random.nextDouble();
// now compare its value to cumulative values to find what interval it falls in
double total = 0;
for (int i = 0; i < d.length - 1; i++) {
if (Double.isNaN(d[i])) {
throw new RuntimeException("Can't sample from NaN");
}
total += d[i];
if (r < total) {
return i;
}
}
return d.length - 1; // in case the "double-math" didn't total to exactly 1.0
}
/**
* Samples from the distribution over values 0 through d.length given by d.
* Assumes that the distribution sums to 1.0.
*
* @param d the distribution to sample from
* @return a value from 0 to d.length
*/
public static int sampleFromDistribution(float[] d, Random random) {
// sample from the uniform [0,1]
double r = random.nextDouble();
// now compare its value to cumulative values to find what interval it falls in
double total = 0;
for (int i = 0; i < d.length - 1; i++) {
if (Float.isNaN(d[i])) {
throw new RuntimeException("Can't sample from NaN");
}
total += d[i];
if (r < total) {
return i;
}
}
return d.length - 1; // in case the "double-math" didn't total to exactly 1.0
}
public static double klDivergence(double[] from, double[] to) {
double kl = 0.0;
double tot = sum(from);
double tot2 = sum(to);
// System.out.println("tot is " + tot + " tot2 is " + tot2);
for (int i = 0; i < from.length; i++) {
if (from[i] == 0.0) {
continue;
}
double num = from[i] / tot;
double num2 = to[i] / tot2;
// System.out.println("num is " + num + " num2 is " + num2);
kl += num * (Math.log(num / num2) / Math.log(2.0));
}
return kl;
}
/**
* Returns the Jensen Shannon divergence (information radius) between
* a and b, defined as the average of the kl divergences from a to b
* and from b to a.
*/
public static double jensenShannonDivergence(double[] a, double[] b) {
double[] average = pairwiseAdd(a, b);
multiplyInPlace(average, .5);
return .5 * klDivergence(a, average) + .5 * klDivergence(b, average);
}
public static void setToLogDeterministic(float[] a, int i) {
for (int j = 0; j < a.length; j++) {
if (j == i) {
a[j] = 0.0F;
} else {
a[j] = Float.NEGATIVE_INFINITY;
}
}
}
public static void setToLogDeterministic(double[] a, int i) {
for (int j = 0; j < a.length; j++) {
if (j == i) {
a[j] = 0.0;
} else {
a[j] = Double.NEGATIVE_INFINITY;
}
}
}
// SAMPLE ANALYSIS
public static double mean(double[] a) {
return sum(a) / a.length;
}
/** Return the mean of an array of int. */
public static double mean(int[] a) {
return ((double) sum(a)) / a.length;
}
public static double median(double[] a) {
double[] b = new double[a.length];
System.arraycopy(a, 0, b, 0, b.length);
Arrays.sort(b);
int mid = b.length / 2;
if (b.length % 2 == 0) {
return (b[mid - 1] + b[mid]) / 2.0;
} else {
return b[mid];
}
}
/**
* Returns the mean of a vector of doubles. Any values which are NaN or
* infinite are ignored. If the vector is empty, 0.0 is returned.
*/
public static double safeMean(double[] v) {
double[] u = filterNaNAndInfinite(v);
if (numRows(u) == 0) return 0.0;
return mean(u);
}
public static double sumSquaredError(double[] a) {
double mean = mean(a);
double result = 0.0;
for (double anA : a) {
double diff = anA - mean;
result += (diff * diff);
}
return result;
}
public static double sumSquared(double[] a) {
double result = 0.0;
for (double anA : a) {
result += (anA * anA);
}
return result;
}
public static double variance(double[] a) {
return sumSquaredError(a) / (a.length - 1);
}
public static double stdev(double[] a) {
return Math.sqrt(variance(a));
}
/**
* Returns the standard deviation of a vector of doubles. Any values which
* are NaN or infinite are ignored. If the vector contains fewer than two
* values, 1.0 is returned.
*/
public static double safeStdev(double[] v) {
double[] u = filterNaNAndInfinite(v);
if (numRows(u) < 2) return 1.0;
return stdev(u);
}
public static double standardErrorOfMean(double[] a) {
return stdev(a) / Math.sqrt(a.length);
}
/**
* Fills the array with sample from 0 to numArgClasses-1 without replacement.
*/
public static void sampleWithoutReplacement(int[] array, int numArgClasses) {
sampleWithoutReplacement(array, numArgClasses, rand);
}
/**
* Fills the array with sample from 0 to numArgClasses-1 without replacement.
*/
public static void sampleWithoutReplacement(int[] array, int numArgClasses, Random rand) {
int[] temp = new int[numArgClasses];
for (int i = 0; i < temp.length; i++) {
temp[i] = i;
}
shuffle(temp, rand);
System.arraycopy(temp, 0, array, 0, array.length);
}
public static void shuffle(int[] a) {
shuffle(a, rand);
}
/* Shuffle the integers in an array using a source of randomness.
* Uses the Fisher-Yates shuffle. Makes all orderings equally likely, iff
* the randomizer is good.
*
* @param a The array to shuffle
* @param rand The source of randomness
*/
public static void shuffle(int[] a, Random rand) {
for (int i = a.length - 1; i > 0; i--) {
int j = rand.nextInt(i+1); // a random index from 0 to i inclusive, may shuffle with itself
int tmp = a[i];
a[i] = a[j];
a[j] = tmp;
}
}
public static void reverse(int[] a) {
for (int i=0; i<a.length/2; i++) {
int j = a.length - i - 1;
int tmp = a[i];
a[i] = a[j];
a[j] = tmp;
}
}
public static boolean contains(int[] a, int i) {
for (int k : a) {
if (k == i) return true;
}
return false;
}
public static boolean containsInSubarray(int[] a, int begin, int end, int i) {
for (int j = begin; j < end; j++) {
if (a[j]==i) return true;
}
return false;
}
/**
* Direct computation of Pearson product-moment correlation coefficient.
* Note that if x and y are involved in several computations of
* pearsonCorrelation, it is perhaps more advisable to first standardize
* x and y, then compute innerProduct(x,y)/(x.length-1).
*/
public static double pearsonCorrelation(double[] x, double[] y) {
double result;
double sum_sq_x = 0, sum_sq_y = 0;
double mean_x = x[0], mean_y = y[0];
double sum_coproduct = 0;
for(int i=2; i<x.length+1;++i) {
double w = (i - 1)*1.0/i;
double delta_x = x[i-1] - mean_x;
double delta_y = y[i-1] - mean_y;
sum_sq_x += delta_x * delta_x*w;
sum_sq_y += delta_y * delta_y*w;
sum_coproduct += delta_x * delta_y*w;
mean_x += delta_x / i;
mean_y += delta_y / i;
}
double pop_sd_x = Math.sqrt(sum_sq_x/x.length);
double pop_sd_y = Math.sqrt(sum_sq_y/y.length);
double cov_x_y = sum_coproduct / x.length;
double denom = pop_sd_x*pop_sd_y;
if(denom == 0.0)
return 0.0;
result = cov_x_y/denom;
return result;
}
/**
* Computes the significance level by approximate randomization, using a
* default value of 1000 iterations. See documentation for other version
* of method.
*/
public static double sigLevelByApproxRand(double[] A, double[] B) {
return sigLevelByApproxRand(A, B, 1000);
}
/**
* Takes a pair of arrays, A and B, which represent corresponding
* outcomes of a pair of random variables: say, results for two different
* classifiers on a sequence of inputs. Returns the estimated
* probability that the difference between the means of A and B is not
* significant, that is, the significance level. This is computed by
* "approximate randomization". The test statistic is the absolute
* difference between the means of the two arrays. A randomized test
* statistic is computed the same way after initially randomizing the
* arrays by swapping each pair of elements with 50% probability. For
* the given number of iterations, we generate a randomized test
* statistic and compare it to the actual test statistic. The return
* value is the proportion of iterations in which a randomized test
* statistic was found to exceed the actual test statistic.
*
* @param A Outcome of one r.v.
* @param B Outcome of another r.v.
* @return Significance level by randomization
*/
public static double sigLevelByApproxRand(double[] A, double[] B, int iterations) {
if (A.length == 0)
throw new IllegalArgumentException("Input arrays must not be empty!");
if (A.length != B.length)
throw new IllegalArgumentException("Input arrays must have equal length!");
if (iterations <= 0)
throw new IllegalArgumentException("Number of iterations must be positive!");
double testStatistic = absDiffOfMeans(A, B, false); // not randomized
int successes = 0;
for (int i = 0; i < iterations; i++) {
double t = absDiffOfMeans(A, B, true); // randomized
if (t >= testStatistic) successes++;
}
return (double) (successes + 1) / (double) (iterations + 1);
}
public static double sigLevelByApproxRand(int[] A, int[] B) {
return sigLevelByApproxRand(A, B, 1000);
}
public static double sigLevelByApproxRand(int[] A, int[] B, int iterations) {
if (A.length == 0)
throw new IllegalArgumentException("Input arrays must not be empty!");
if (A.length != B.length)
throw new IllegalArgumentException("Input arrays must have equal length!");
if (iterations <= 0)
throw new IllegalArgumentException("Number of iterations must be positive!");
double[] X = new double[A.length];
double[] Y = new double[B.length];
for (int i = 0; i < A.length; i++) {
X[i] = A[i];
Y[i] = B[i];
}
return sigLevelByApproxRand(X, Y, iterations);
}
public static double sigLevelByApproxRand(boolean[] A, boolean[] B) {
return sigLevelByApproxRand(A, B, 1000);
}
public static double sigLevelByApproxRand(boolean[] A, boolean[] B, int iterations) {
if (A.length == 0)
throw new IllegalArgumentException("Input arrays must not be empty!");
if (A.length != B.length)
throw new IllegalArgumentException("Input arrays must have equal length!");
if (iterations <= 0)
throw new IllegalArgumentException("Number of iterations must be positive!");
double[] X = new double[A.length];
double[] Y = new double[B.length];
for (int i = 0; i < A.length; i++) {
X[i] = (A[i] ? 1.0 : 0.0);
Y[i] = (B[i] ? 1.0 : 0.0);
}
return sigLevelByApproxRand(X, Y, iterations);
}
// Returns the absolute difference between the means of arrays A and B.
// If 'randomize' is true, swaps matched A & B entries with 50% probability
// Assumes input arrays have equal, non-zero length.
private static double absDiffOfMeans(double[] A, double[] B, boolean randomize) {
Random random = new Random();
double aTotal = 0.0;
double bTotal = 0.0;
for (int i = 0; i < A.length; i++) {
if (randomize && random.nextBoolean()) {
aTotal += B[i];
bTotal += A[i];
} else {
aTotal += A[i];
bTotal += B[i];
}
}
double aMean = aTotal / A.length;
double bMean = bTotal / B.length;
return Math.abs(aMean - bMean);
}
// PRINTING FUNCTIONS
public static String toBinaryString(byte[] b) {
StringBuilder s = new StringBuilder();
for (byte by : b) {
for (int j = 7; j >= 0; j--) {
if ((by & (1 << j)) > 0) {
s.append('1');
} else {
s.append('0');
}
}
s.append(' ');
}
return s.toString();
}
public static String toString(double[] a) {
return toString(a, null);
}
public static String toString(double[] a, NumberFormat nf) {
if (a == null) return null;
if (a.length == 0) return "[]";
StringBuilder b = new StringBuilder();
b.append('[');
for (int i = 0; i < a.length - 1; i++) {
String s;
if (nf == null) {
s = String.valueOf(a[i]);
} else {
s = nf.format(a[i]);
}
b.append(s);
b.append(", ");
}
String s;
if (nf == null) {
s = String.valueOf(a[a.length - 1]);
} else {
s = nf.format(a[a.length - 1]);
}
b.append(s);
b.append(']');
return b.toString();
}
public static String toString(float[] a) {
return toString(a, null);
}
public static String toString(float[] a, NumberFormat nf) {
if (a == null) return null;
if (a.length == 0) return "[]";
StringBuilder b = new StringBuilder();
b.append('[');
for (int i = 0; i < a.length - 1; i++) {
String s;
if (nf == null) {
s = String.valueOf(a[i]);
} else {
s = nf.format(a[i]);
}
b.append(s);
b.append(", ");
}
String s;
if (nf == null) {
s = String.valueOf(a[a.length - 1]);
} else {
s = nf.format(a[a.length - 1]);
}
b.append(s);
b.append(']');
return b.toString();
}
public static String toString(int[] a) {
return toString(a, null);
}
public static String toString(int[] a, NumberFormat nf) {
if (a == null) return null;
if (a.length == 0) return "[]";
StringBuilder b = new StringBuilder();
b.append('[');
for (int i = 0; i < a.length - 1; i++) {
String s;
if (nf == null) {
s = String.valueOf(a[i]);
} else {
s = nf.format(a[i]);
}
b.append(s);
b.append(", ");
}
String s;
if (nf == null) {
s = String.valueOf(a[a.length - 1]);
} else {
s = nf.format(a[a.length - 1]);
}
b.append(s);
b.append(']');
return b.toString();
}
public static String toString(byte[] a) {
return toString(a, null);
}
public static String toString(byte[] a, NumberFormat nf) {
if (a == null) return null;
if (a.length == 0) return "[]";
StringBuilder b = new StringBuilder();
b.append('[');
for (int i = 0; i < a.length - 1; i++) {
String s;
if (nf == null) {
s = String.valueOf(a[i]);
} else {
s = nf.format(a[i]);
}
b.append(s);
b.append(", ");
}
String s;
if (nf == null) {
s = String.valueOf(a[a.length - 1]);
} else {
s = nf.format(a[a.length - 1]);
}
b.append(s);
b.append(']');
return b.toString();
}
public static String toString(int[][] counts) {
return toString(counts, null, null, 10, 10, NumberFormat.getInstance(), false);
}
public static String toString(int[][] counts, Object[] rowLabels, Object[] colLabels, int labelSize, int cellSize, NumberFormat nf, boolean printTotals) {
// first compute row totals and column totals
if (counts.length==0 || counts[0].length==0) return "";
int[] rowTotals = new int[counts.length];
int[] colTotals = new int[counts[0].length]; // assume it's square
int total = 0;
for (int i = 0; i < counts.length; i++) {
for (int j = 0; j < counts[i].length; j++) {
rowTotals[i] += counts[i][j];
colTotals[j] += counts[i][j];
total += counts[i][j];
}
}
StringBuilder result = new StringBuilder();
// column labels
if (colLabels != null) {
result.append(StringUtils.padLeft("", labelSize)); // spacing for the row labels!
for (int j = 0; j < counts[0].length; j++) {
String s = (colLabels[j]==null ? "null" : colLabels[j].toString());
if (s.length() > cellSize - 1) {
s = s.substring(0, cellSize - 1);
}
s = StringUtils.padLeft(s, cellSize);
result.append(s);
}
if (printTotals) {
result.append(StringUtils.padLeftOrTrim("Total", cellSize));
}
result.append('\n');
}
for (int i = 0; i < counts.length; i++) {
// row label
if (rowLabels != null) {
String s = (rowLabels[i]==null ? "null" : rowLabels[i].toString());
s = StringUtils.padOrTrim(s, labelSize); // left align this guy only
result.append(s);
}
// value
for (int j = 0; j < counts[i].length; j++) {
result.append(StringUtils.padLeft(nf.format(counts[i][j]), cellSize));
}
// the row total
if (printTotals) {
result.append(StringUtils.padLeft(nf.format(rowTotals[i]), cellSize));
}
result.append('\n');
}
// the col totals
if (printTotals) {
result.append(StringUtils.pad("Total", labelSize));
for (int colTotal : colTotals) {
result.append(StringUtils.padLeft(nf.format(colTotal), cellSize));
}
result.append(StringUtils.padLeft(nf.format(total), cellSize));
}
return result.toString();
}
public static String toString(double[][] counts) {
return toString(counts, 10, null, null, NumberFormat.getInstance(), false);
}
public static String toString(double[][] counts, int cellSize, Object[] rowLabels, Object[] colLabels, NumberFormat nf, boolean printTotals) {
if (counts==null) return null;
// first compute row totals and column totals
double[] rowTotals = new double[counts.length];
double[] colTotals = new double[counts[0].length]; // assume it's square
double total = 0.0;
for (int i = 0; i < counts.length; i++) {
for (int j = 0; j < counts[i].length; j++) {
rowTotals[i] += counts[i][j];
colTotals[j] += counts[i][j];
total += counts[i][j];
}
}
StringBuilder result = new StringBuilder();
// column labels
if (colLabels != null) {
result.append(StringUtils.padLeft("", cellSize));
for (int j = 0; j < counts[0].length; j++) {
String s = colLabels[j].toString();
if (s.length() > cellSize - 1) {
s = s.substring(0, cellSize - 1);
}
s = StringUtils.padLeft(s, cellSize);
result.append(s);
}
if (printTotals) {
result.append(StringUtils.padLeftOrTrim("Total", cellSize));
}
result.append('\n');
}
for (int i = 0; i < counts.length; i++) {
// row label
if (rowLabels != null) {
String s = rowLabels[i].toString();
s = StringUtils.padOrTrim(s, cellSize); // left align this guy only
result.append(s);
}
// value
for (int j = 0; j < counts[i].length; j++) {
result.append(StringUtils.padLeft(nf.format(counts[i][j]), cellSize));
}
// the row total
if (printTotals) {
result.append(StringUtils.padLeft(nf.format(rowTotals[i]), cellSize));
}
result.append('\n');
}
// the col totals
if (printTotals) {
result.append(StringUtils.pad("Total", cellSize));
for (double colTotal : colTotals) {
result.append(StringUtils.padLeft(nf.format(colTotal), cellSize));
}
result.append(StringUtils.padLeft(nf.format(total), cellSize));
}
return result.toString();
}
public static String toString(float[][] counts) {
return toString(counts, 10, null, null, NumberFormat.getIntegerInstance(), false);
}
public static String toString(float[][] counts, int cellSize, Object[] rowLabels, Object[] colLabels, NumberFormat nf, boolean printTotals) {
// first compute row totals and column totals
double[] rowTotals = new double[counts.length];
double[] colTotals = new double[counts[0].length]; // assume it's square
double total = 0.0;
for (int i = 0; i < counts.length; i++) {
for (int j = 0; j < counts[i].length; j++) {
rowTotals[i] += counts[i][j];
colTotals[j] += counts[i][j];
total += counts[i][j];
}
}
StringBuilder result = new StringBuilder();
// column labels
if (colLabels != null) {
result.append(StringUtils.padLeft("", cellSize));
for (int j = 0; j < counts[0].length; j++) {
String s = colLabels[j].toString();
s = StringUtils.padLeftOrTrim(s, cellSize);
result.append(s);
}
if (printTotals) {
result.append(StringUtils.padLeftOrTrim("Total", cellSize));
}
result.append('\n');
}
for (int i = 0; i < counts.length; i++) {
// row label
if (rowLabels != null) {
String s = rowLabels[i].toString();
s = StringUtils.pad(s, cellSize); // left align this guy only
result.append(s);
}
// value
for (int j = 0; j < counts[i].length; j++) {
result.append(StringUtils.padLeft(nf.format(counts[i][j]), cellSize));
}
// the row total
if (printTotals) {
result.append(StringUtils.padLeft(nf.format(rowTotals[i]), cellSize));
}
result.append('\n');
}
// the col totals
if (printTotals) {
result.append(StringUtils.pad("Total", cellSize));
for (double colTotal : colTotals) {
result.append(StringUtils.padLeft(nf.format(colTotal), cellSize));
}
result.append(StringUtils.padLeft(nf.format(total), cellSize));
}
return result.toString();
}
/**
* For testing only.
* @param args Ignored
*/
public static void main(String[] args) {
Random random = new Random();
int length = 100;
double[] A = new double[length];
double[] B = new double[length];
double aAvg = 70.0;
double bAvg = 70.5;
for (int i = 0; i < length; i++) {
A[i] = aAvg + random.nextGaussian();
B[i] = bAvg + random.nextGaussian();
}
System.out.println("A has length " + A.length + " and mean " + mean(A));
System.out.println("B has length " + B.length + " and mean " + mean(B));
for (int t = 0; t < 10; t++) {
System.out.println("p-value: " + sigLevelByApproxRand(A, B));
}
}
public static int[][] deepCopy(int[][] counts) {
int[][] result = new int[counts.length][];
for (int i=0; i<counts.length; i++) {
result[i] = new int[counts[i].length];
System.arraycopy(counts[i], 0, result[i], 0, counts[i].length);
}
return result;
}
public static double[][] covariance(double[][] data) {
double[] means = new double[data.length];
for (int i = 0; i < means.length; i++) {
means[i] = mean(data[i]);
}
double[][] covariance = new double[means.length][means.length];
for (int i = 0; i < data[0].length; i++) {
for (int j = 0; j < means.length; j++) {
for (int k = 0; k < means.length; k++) {
covariance[j][k] += (means[j]-data[j][i])*(means[k]-data[k][i]);
}
}
}
for (int i = 0; i < covariance.length; i++) {
for (int j = 0; j < covariance[i].length; j++) {
covariance[i][j] = Math.sqrt(covariance[i][j])/(data[0].length);
}
}
return covariance;
}
public static void addMultInto(double[] a, double[] b, double[] c, double d) {
for (int i=0; i<a.length; i++) {
a[i] = b[i] + c[i] * d;
}
}
public static void multiplyInto(double[] a, double[] b, double c) {
for (int i=0; i<a.length; i++) {
a[i] = b[i] * c;
}
}
public static double entropy(double[] probs) {
double e = 0.0;
for (double p : probs) {
if (p != 0.0)
e -= p * Math.log(p);
}
return e;
}
public static void assertFinite(double[] vector, String vectorName) throws InvalidElementException {
for(int i=0; i<vector.length; i++){
if (Double.isNaN(vector[i])) {
throw new InvalidElementException("NaN found in " + vectorName + " element " + i);
} else if (Double.isInfinite(vector[i])) {
throw new InvalidElementException("Infinity found in " + vectorName + " element " + i);
}
}
}
public static class InvalidElementException extends RuntimeException {
private static final long serialVersionUID = 1647150702529757545L;
public InvalidElementException(String s) {
super(s);
}
}
}