/*
* Apache License
* Version 2.0, January 2004
* http://www.apache.org/licenses/
*
* Copyright 2013 Aurelian Tutuianu
* Copyright 2014 Aurelian Tutuianu
* Copyright 2015 Aurelian Tutuianu
* Copyright 2016 Aurelian Tutuianu
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
*/
package rapaio.core.distributions;
import rapaio.math.MathTools;
import rapaio.sys.WS;
import static rapaio.math.MathTools.*;
/**
* Binomial distribution.
* It models the number of successes from n trials, where all trials
* are independent Bernoulli random variables with parameter p.
*
* @author <a href="mailto:padreati@yahoo.com>Aurelian Tutuianu</a>
*/
public class Binomial implements Distribution {
private static final long serialVersionUID = 8813621560796556828L;
private final double p;
private final double n;
public Binomial(double p, double n) {
this.p = p;
this.n = n;
}
@Override
public boolean discrete() {
return true;
}
@Override
public String name() {
return "Binomial(p=" + WS.formatFlex(p) + ",n=" + ((int) n) + ")";
}
@Override
public double pdf(double x) {
if (x < min() || x > max()) return 0;
if (Math.abs(Math.rint(x) - x) < 1e-12)
return Math.exp(logBinomial(x, n, p));
return 0.0;
}
@Override
public double cdf(double x) {
if (x >= n)
return 1.0;
x = MathTools.floor(x);
return betaIncReg(1 - p, n - x, x + 1);
}
@Override
public double quantile(double p) {
double pr = this.p;
double q, mu, sigma, gamma, z, y;
if (Double.isNaN(p) || Double.isNaN(n) || Double.isNaN(pr)) return p + n + pr;
if (Double.isInfinite(n) || Double.isInfinite(pr)) return Double.NaN;
/* if log_p is true, p = -Inf is a legitimate value */
if (Double.isInfinite(p)) return Double.NaN;
//if(n != floor(n + 0.5)) return Double.NaN;
if (n != rint(n)) return Double.NaN;
if (pr < 0 || pr > 1 || n < 0) return Double.NaN;
// R_Q_P01_boundaries(p, 0, n);
/* !log_p */
if (p < 0 || p > 1)
return Double.NaN;
if (p == 0)
return 0;
if (p == 1)
return n;
if (pr == 0. || n == 0) return 0.;
q = 1 - pr;
if (q == 0.) return n; /* covers the full range of the densities */
mu = n * pr;
sigma = sqrt(n * pr * q);
gamma = (q - pr) / sigma;
/* Note : "same" code in qpois.c, qbinom.c, qnbinom.c --
* FIXME: This is far from optimal [cancellation for p ~= 1, etc]: */
/* temporary hack --- FIXME --- */
if (p + 1.01 * DBL_EPSILON >= 1.) return n;
/* y := approx.value (Cornish-Fisher expansion) : */
z = new Normal(0, 1).quantile(p);
//y = floor(mu + sigma * (z + gamma * (z*z - 1) / 6) + 0.5);
y = rint(mu + sigma * (z + gamma * (z * z - 1) / 6));
if (y > n) /* way off */ y = n;
z = new Binomial(pr, n).cdf(y);
/* fuzz to ensure left continuity: */
p *= 1 - 64 * DBL_EPSILON;
double[] zp = new double[]{z};
if (n < 1e5) return do_search(y, zp, p, n, pr, 1);
/* Otherwise be a bit cleverer in the search */
{
double incr = floor(n * 0.001), oldincr;
do {
oldincr = incr;
y = do_search(y, zp, p, n, pr, incr);
incr = Math.max(1, floor(incr / 100));
} while (oldincr > 1 && incr > n * 1e-15);
return y;
}
}
private static double do_search(double y, double[] z, double p, double n, double pr, double incr) {
if (z[0] >= p) {
/* search to the left */
while (true) {
double newz = new Binomial(pr, n).cdf(y - incr);
if (y == 0 || newz < p)
return y;
y = Math.max(0, y - incr);
z[0] = newz;
}
} else { /* search to the right */
while (true) {
y = Math.min(y + incr, n);
if (y == n || (z[0] = new Binomial(pr, n).cdf(y)) >= p)
return y;
}
}
}
@Override
public double min() {
return 0;
}
@Override
public double max() {
return n;
}
@Override
public double mean() {
return n * p;
}
@Override
public double mode() {
double low = floor((n + 1) * p);
double p1 = pdf(low - 1);
double p2 = pdf(low);
return (p1 > p2) ? low - 1 : low;
}
@Override
public double var() {
return n * p * (1 - p);
}
@Override
public double skewness() {
return (1 - 2 * p) / Math.sqrt(n * p * (1 - p));
}
@Override
public double kurtosis() {
return (1 - 6 * p * (1 - p)) / (n * p * (1 - p));
}
/**
* The wikipedia dedicated page (http://en.wikipedia.org/wiki/Binomial_distribution)
* states that entropy for binomial is:
* $$\frac1 2 \log_2 \big( 2\pi e\, np(1-p) \big) + O \left( \frac{1}{n} \right)$$
* <p>
* According to this page is lighter to use an approximation. The following page
* http://math.stackexchange.com/questions/244455/entropy-of-a-binomial-distribution
* documents how this entropy is approximated.
*
* @return entropy value
*/
@Override
public double entropy() {
return log(2 * Math.PI * Math.E * n * p * (1 - p)) / (2.0 * Math.log(2));
}
}