/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.mahout.clustering.dirichlet;
import java.util.Random;
import org.apache.mahout.common.RandomUtils;
import org.apache.mahout.math.DenseVector;
import org.apache.mahout.math.Vector;
import org.uncommons.maths.random.GaussianGenerator;
public final class UncommonDistributions {
public static final double SQRT2PI = Math.sqrt(2.0 * Math.PI);
private static final Random RANDOM = RandomUtils.getRandom();
private UncommonDistributions() {
}
// =============== start of BSD licensed code. See LICENSE.txt
/**
* Returns a double sampled according to this distribution. Uniformly fast for all k > 0. (Reference:
* Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Uses
* Cheng's rejection algorithm (GB) for k>=1, rejection from Weibull distribution for 0 < k < 1.
*/
public static double rGamma(double k, double lambda) {
boolean accept = false;
if (k >= 1.0) {
// Cheng's algorithm
double b = k - Math.log(4.0);
double c = k + Math.sqrt(2.0 * k - 1.0);
double lam = Math.sqrt(2.0 * k - 1.0);
double cheng = 1.0 + Math.log(4.5);
double x;
do {
double u = RANDOM.nextDouble();
double v = RANDOM.nextDouble();
double y = 1.0 / lam * Math.log(v / (1.0 - v));
x = k * Math.exp(y);
double z = u * v * v;
double r = b + c * y - x;
if (r >= 4.5 * z - cheng || r >= Math.log(z)) {
accept = true;
}
} while (!accept);
return x / lambda;
} else {
// Weibull algorithm
double c = 1.0 / k;
double d = (1.0 - k) * Math.pow(k, k / (1.0 - k));
double x;
do {
double u = RANDOM.nextDouble();
double v = RANDOM.nextDouble();
double z = -Math.log(u);
double e = -Math.log(v);
x = Math.pow(z, c);
if (z + e >= d + x) {
accept = true;
}
} while (!accept);
return x / lambda;
}
}
// ============= end of BSD licensed code
/**
* Returns a random sample from a beta distribution with the given shapes
*
* @param shape1
* a double representing shape1
* @param shape2
* a double representing shape2
* @return a Vector of samples
*/
public static double rBeta(double shape1, double shape2) {
double gam1 = rGamma(shape1, 1.0);
double gam2 = rGamma(shape2, 1.0);
return gam1 / (gam1 + gam2);
}
/**
* Returns a vector of random samples from a beta distribution with the given shapes
*
* @param k
* the number of samples to return
* @param shape1
* a double representing shape1
* @param shape2
* a double representing shape2
* @return a Vector of samples
*/
public static Vector rBeta(int k, double shape1, double shape2) {
// List<Double> params = new ArrayList<Double>(2);
// params.add(shape1);
// params.add(Math.max(0, shape2));
Vector result = new DenseVector(k);
for (int i = 0; i < k; i++) {
result.set(i, rBeta(shape1, shape2));
}
return result;
}
/**
* Return a random sample from the chi-squared (chi^2) distribution with df degrees of freedom.
*
* @return a double sample
*/
public static double rChisq(double df) {
double result = 0.0;
for (int i = 0; i < df; i++) {
double sample = rNorm(0.0, 1.0);
result += sample * sample;
}
return result;
}
/**
* Return a random value from a normal distribution with the given mean and standard deviation
*
* @param mean
* a double mean value
* @param sd
* a double standard deviation
* @return a double sample
*/
public static double rNorm(double mean, double sd) {
GaussianGenerator dist = new GaussianGenerator(mean, sd, RANDOM);
return dist.nextValue();
}
/**
* Return the normal density function value for the sample x
*
* pdf = 1/[sqrt(2*p)*s] * e^{-1/2*[(x-m)/s]^2}
*
* @param x
* a double sample value
* @param m
* a double mean value
* @param s
* a double standard deviation
* @return a double probability value
*/
public static double dNorm(double x, double m, double s) {
double xms = (x - m) / s;
double ex = xms * xms / 2.0;
double exp = Math.exp(-ex);
return exp / (SQRT2PI * s);
}
/** Returns one sample from a multinomial. */
public static int rMultinom(Vector probabilities) {
// our probability argument are not normalized.
double total = probabilities.zSum();
double nextDouble = RANDOM.nextDouble();
double p = nextDouble * total;
for (int i = 0; i < probabilities.size(); i++) {
double pi = probabilities.get(i);
if (p < pi) {
return i;
} else {
p -= pi;
}
}
// can't happen except for round-off error so we don't care what we return here
return 0;
}
/**
* Returns a multinomial vector sampled from the given probabilities
*
* rmultinom should be implemented as successive binomial sampling.
*
* Keep a normalizing amount that starts with 1 (I call it total).
*
* For each i k[i] = rbinom(p[i] / total, size); total -= p[i]; size -= k[i];
*
* @param size
* the size parameter of the binomial distribution
* @param probabilities
* a Vector of probabilities
* @return a multinomial distribution Vector
*/
public static Vector rMultinom(int size, Vector probabilities) {
// our probability argument may not be normalized.
double total = probabilities.zSum();
int cardinality = probabilities.size();
Vector result = new DenseVector(cardinality);
for (int i = 0; total > 0 && i < cardinality; i++) {
double p = probabilities.get(i);
int ki = rBinomial(size, p / total);
total -= p;
size -= ki;
result.set(i, ki);
}
return result;
}
/**
* Returns an integer sampled according to this distribution. Takes time proportional to np + 1. (Reference:
* Non-Uniform Random Variate Generation, Devroye http://cgm.cs.mcgill.ca/~luc/rnbookindex.html) Second
* time-waiting algorithm.
*/
public static int rBinomial(int n, double p) {
if (p >= 1.0) {
return n; // needed to avoid infinite loops and negative results
}
double q = -Math.log1p(-p);
double sum = 0.0;
int x = 0;
while (sum <= q) {
double u = RANDOM.nextDouble();
double e = -Math.log(u);
sum += e / (n - x);
x++;
}
if (x == 0) {
return 0;
}
return x - 1;
}
/**
* Sample from a Dirichlet distribution, returning a vector of probabilities using a stick-breaking
* algorithm
*
* @param totalCounts
* an unnormalized count Vector
* @param alpha0
* a double
* @return a Vector of probabilities
*/
public static Vector rDirichlet(Vector totalCounts, double alpha0) {
Vector pi = totalCounts.like();
double total = totalCounts.zSum();
double remainder = 1.0;
for (int k = 0; k < pi.size(); k++) {
double countK = totalCounts.get(k);
total -= countK;
double betaK = rBeta(1.0 + countK, Math.max(0.0, alpha0 + total));
double piK = betaK * remainder;
pi.set(k, piK);
remainder -= piK;
}
return pi;
}
}