/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math.distribution; import java.io.Serializable; import org.apache.commons.math.MathRuntimeException; import org.apache.commons.math.exception.util.LocalizedFormats; import org.apache.commons.math.util.FastMath; /** * Implementation for the {@link ZipfDistribution}. * * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $ */ public class ZipfDistributionImpl extends AbstractIntegerDistribution implements ZipfDistribution, Serializable { /** Serializable version identifier. */ private static final long serialVersionUID = -140627372283420404L; /** Number of elements. */ private int numberOfElements; /** Exponent parameter of the distribution. */ private double exponent; /** * Create a new Zipf distribution with the given number of elements and * exponent. Both values must be positive; otherwise an * <code>IllegalArgumentException</code> is thrown. * * @param numberOfElements the number of elements * @param exponent the exponent * @exception IllegalArgumentException if n ≤ 0 or s ≤ 0.0 */ public ZipfDistributionImpl(final int numberOfElements, final double exponent) throws IllegalArgumentException { setNumberOfElementsInternal(numberOfElements); setExponentInternal(exponent); } /** * Get the number of elements (e.g. corpus size) for the distribution. * * @return the number of elements */ public int getNumberOfElements() { return numberOfElements; } /** * Set the number of elements (e.g. corpus size) for the distribution. * The parameter value must be positive; otherwise an * <code>IllegalArgumentException</code> is thrown. * * @param n the number of elements * @exception IllegalArgumentException if n ≤ 0 * @deprecated as of 2.1 (class will become immutable in 3.0) */ @Deprecated public void setNumberOfElements(final int n) { setNumberOfElementsInternal(n); } /** * Set the number of elements (e.g. corpus size) for the distribution. * The parameter value must be positive; otherwise an * <code>IllegalArgumentException</code> is thrown. * * @param n the number of elements * @exception IllegalArgumentException if n ≤ 0 */ private void setNumberOfElementsInternal(final int n) throws IllegalArgumentException { if (n <= 0) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.INSUFFICIENT_DIMENSION, n, 0); } this.numberOfElements = n; } /** * Get the exponent characterising the distribution. * * @return the exponent */ public double getExponent() { return exponent; } /** * Set the exponent characterising the distribution. * The parameter value must be positive; otherwise an * <code>IllegalArgumentException</code> is thrown. * * @param s the exponent * @exception IllegalArgumentException if s ≤ 0.0 * @deprecated as of 2.1 (class will become immutable in 3.0) */ @Deprecated public void setExponent(final double s) { setExponentInternal(s); } /** * Set the exponent characterising the distribution. * The parameter value must be positive; otherwise an * <code>IllegalArgumentException</code> is thrown. * * @param s the exponent * @exception IllegalArgumentException if s ≤ 0.0 */ private void setExponentInternal(final double s) throws IllegalArgumentException { if (s <= 0.0) { throw MathRuntimeException.createIllegalArgumentException( LocalizedFormats.NOT_POSITIVE_EXPONENT, s); } this.exponent = s; } /** * The probability mass function P(X = x) for a Zipf distribution. * * @param x the value at which the probability density function is evaluated. * @return the value of the probability mass function at x */ public double probability(final int x) { if (x <= 0 || x > numberOfElements) { return 0.0; } return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); } /** * The probability distribution function P(X <= x) for a Zipf distribution. * * @param x the value at which the PDF is evaluated. * @return Zipf distribution function evaluated at x */ @Override public double cumulativeProbability(final int x) { if (x <= 0) { return 0.0; } else if (x >= numberOfElements) { return 1.0; } return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); } /** * Access the domain value lower bound, based on <code>p</code>, used to * bracket a PDF root. * * @param p the desired probability for the critical value * @return domain value lower bound, i.e. * P(X < <i>lower bound</i>) < <code>p</code> */ @Override protected int getDomainLowerBound(final double p) { return 0; } /** * Access the domain value upper bound, based on <code>p</code>, used to * bracket a PDF root. * * @param p the desired probability for the critical value * @return domain value upper bound, i.e. * P(X < <i>upper bound</i>) > <code>p</code> */ @Override protected int getDomainUpperBound(final double p) { return numberOfElements; } /** * Calculates the Nth generalized harmonic number. See * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic * Series</a>. * * @param n the term in the series to calculate (must be ≥ 1) * @param m the exponent; special case m == 1.0 is the harmonic series * @return the nth generalized harmonic number */ private double generalizedHarmonic(final int n, final double m) { double value = 0; for (int k = n; k > 0; --k) { value += 1.0 / FastMath.pow(k, m); } return value; } /** * Returns the lower bound of the support for the distribution. * * The lower bound of the support is always 1 no matter the parameters. * * @return lower bound of the support (always 1) * @since 2.2 */ public int getSupportLowerBound() { return 1; } /** * Returns the upper bound of the support for the distribution. * * The upper bound of the support is the number of elements * * @return upper bound of the support * @since 2.2 */ public int getSupportUpperBound() { return getNumberOfElements(); } /** * Returns the mean. * * For number of elements N and exponent s, the mean is * <code>Hs1 / Hs</code> where * <ul> * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> * <li><code>Hs = generalizedHarmonic(N, s)</code></li> * </ul> * * @return the mean * @since 2.2 */ protected double getNumericalMean() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return Hs1 / Hs; } /** * Returns the variance. * * For number of elements N and exponent s, the mean is * <code>(Hs2 / Hs) - (Hs1^2 / Hs^2)</code> where * <ul> * <li><code>Hs2 = generalizedHarmonic(N, s - 2)</code></li> * <li><code>Hs1 = generalizedHarmonic(N, s - 1)</code></li> * <li><code>Hs = generalizedHarmonic(N, s)</code></li> * </ul> * * @return the variance * @since 2.2 */ protected double getNumericalVariance() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs2 = generalizedHarmonic(N, s - 2); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); } }