/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.commons.math3.distribution; import org.apache.commons.math3.exception.NotStrictlyPositiveException; import org.apache.commons.math3.exception.util.LocalizedFormats; import org.apache.commons.math3.random.RandomGenerator; import org.apache.commons.math3.random.Well19937c; import org.apache.commons.math3.util.FastMath; /** * Implementation of the Zipf distribution. * * @see <a href="http://mathworld.wolfram.com/ZipfDistribution.html">Zipf distribution (MathWorld)</a> */ public class ZipfDistribution extends AbstractIntegerDistribution { /** Serializable version identifier. */ private static final long serialVersionUID = -140627372283420404L; /** Number of elements. */ private final int numberOfElements; /** Exponent parameter of the distribution. */ private final double exponent; /** Cached numerical mean */ private double numericalMean = Double.NaN; /** Whether or not the numerical mean has been calculated */ private boolean numericalMeanIsCalculated = false; /** Cached numerical variance */ private double numericalVariance = Double.NaN; /** Whether or not the numerical variance has been calculated */ private boolean numericalVarianceIsCalculated = false; /** * Create a new Zipf distribution with the given number of elements and * exponent. * <p> * <b>Note:</b> this constructor will implicitly create an instance of * {@link Well19937c} as random generator to be used for sampling only (see * {@link #sample()} and {@link #sample(int)}). In case no sampling is * needed for the created distribution, it is advised to pass {@code null} * as random generator via the appropriate constructors to avoid the * additional initialisation overhead. * * @param numberOfElements Number of elements. * @param exponent Exponent. * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. */ public ZipfDistribution(final int numberOfElements, final double exponent) { this(new Well19937c(), numberOfElements, exponent); } /** * Creates a Zipf distribution. * * @param rng Random number generator. * @param numberOfElements Number of elements. * @param exponent Exponent. * @exception NotStrictlyPositiveException if {@code numberOfElements <= 0} * or {@code exponent <= 0}. * @since 3.1 */ public ZipfDistribution(RandomGenerator rng, int numberOfElements, double exponent) throws NotStrictlyPositiveException { super(rng); if (numberOfElements <= 0) { throw new NotStrictlyPositiveException(LocalizedFormats.DIMENSION, numberOfElements); } if (exponent <= 0) { throw new NotStrictlyPositiveException(LocalizedFormats.EXPONENT, exponent); } this.numberOfElements = numberOfElements; this.exponent = exponent; } /** * Get the number of elements (e.g. corpus size) for the distribution. * * @return the number of elements */ public int getNumberOfElements() { return numberOfElements; } /** * Get the exponent characterizing the distribution. * * @return the exponent */ public double getExponent() { return exponent; } /** {@inheritDoc} */ public double probability(final int x) { if (x <= 0 || x > numberOfElements) { return 0.0; } return (1.0 / FastMath.pow(x, exponent)) / generalizedHarmonic(numberOfElements, exponent); } /** {@inheritDoc} */ @Override public double logProbability(int x) { if (x <= 0 || x > numberOfElements) { return Double.NEGATIVE_INFINITY; } return -FastMath.log(x) * exponent - FastMath.log(generalizedHarmonic(numberOfElements, exponent)); } /** {@inheritDoc} */ public double cumulativeProbability(final int x) { if (x <= 0) { return 0.0; } else if (x >= numberOfElements) { return 1.0; } return generalizedHarmonic(x, exponent) / generalizedHarmonic(numberOfElements, exponent); } /** * {@inheritDoc} * * For number of elements {@code N} and exponent {@code s}, the mean is * {@code Hs1 / Hs}, where * <ul> * <li>{@code Hs1 = generalizedHarmonic(N, s - 1)},</li> * <li>{@code Hs = generalizedHarmonic(N, s)}.</li> * </ul> */ public double getNumericalMean() { if (!numericalMeanIsCalculated) { numericalMean = calculateNumericalMean(); numericalMeanIsCalculated = true; } return numericalMean; } /** * Used by {@link #getNumericalMean()}. * * @return the mean of this distribution */ protected double calculateNumericalMean() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return Hs1 / Hs; } /** * {@inheritDoc} * * For number of elements {@code N} and exponent {@code s}, the mean is * {@code (Hs2 / Hs) - (Hs1^2 / Hs^2)}, where * <ul> * <li>{@code Hs2 = generalizedHarmonic(N, s - 2)},</li> * <li>{@code Hs1 = generalizedHarmonic(N, s - 1)},</li> * <li>{@code Hs = generalizedHarmonic(N, s)}.</li> * </ul> */ public double getNumericalVariance() { if (!numericalVarianceIsCalculated) { numericalVariance = calculateNumericalVariance(); numericalVarianceIsCalculated = true; } return numericalVariance; } /** * Used by {@link #getNumericalVariance()}. * * @return the variance of this distribution */ protected double calculateNumericalVariance() { final int N = getNumberOfElements(); final double s = getExponent(); final double Hs2 = generalizedHarmonic(N, s - 2); final double Hs1 = generalizedHarmonic(N, s - 1); final double Hs = generalizedHarmonic(N, s); return (Hs2 / Hs) - ((Hs1 * Hs1) / (Hs * Hs)); } /** * Calculates the Nth generalized harmonic number. See * <a href="http://mathworld.wolfram.com/HarmonicSeries.html">Harmonic * Series</a>. * * @param n Term in the series to calculate (must be larger than 1) * @param m Exponent (special case {@code m = 1} is the harmonic series). * @return the n<sup>th</sup> generalized harmonic number. */ private double generalizedHarmonic(final int n, final double m) { double value = 0; for (int k = n; k > 0; --k) { value += 1.0 / FastMath.pow(k, m); } return value; } /** * {@inheritDoc} * * The lower bound of the support is always 1 no matter the parameters. * * @return lower bound of the support (always 1) */ public int getSupportLowerBound() { return 1; } /** * {@inheritDoc} * * The upper bound of the support is the number of elements. * * @return upper bound of the support */ public int getSupportUpperBound() { return getNumberOfElements(); } /** * {@inheritDoc} * * The support of this distribution is connected. * * @return {@code true} */ public boolean isSupportConnected() { return true; } }