/* XXL: The eXtensible and fleXible Library for data processing
Copyright (C) 2000-2011 Prof. Dr. Bernhard Seeger
Head of the Database Research Group
Department of Mathematics and Computer Science
University of Marburg
Germany
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 3 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; If not, see <http://www.gnu.org/licenses/>.
http://code.google.com/p/xxl/
*/
package xxl.core.math.statistics.nonparametric;
import xxl.core.functions.AbstractFunction;
import xxl.core.functions.Function;
import xxl.core.math.functions.AbstractRealFunctionFunction;
import xxl.core.math.queries.WindowQuery;
/**
* This class realizes a real-valued function providing an empirical cdf
* (cumulative distribution function)
* based on a simple random sample (iid)
* for data given by objects of type <tt>Number</tt>. <br>
* The empirical distribution function <tt>F_n(y)</tt> of a sample
* is the proportion of observations less than or equal to <tt>y</tt>.
* <br>
* Generally, a more detailed coverage of the empirical cdf
* is given in [Sco92]: David W. Scott. Multivariate Density Estimation:
* Theory, Practice, and Visualization. 1992.
* <br>
* The cdf is implemented as a one-dimensional
* {@link xxl.core.functions.Function function} expecting
* objects of type <tt>Number</tt> as well as a
* {@link xxl.core.math.functions.RealFunction RealFunction} expecting
* data of type <tt>double</tt>.
* A sample of any kind of data given by an iterator could
* be easily obtained through the {@link xxl.core.cursors.mappers.ReservoirSampler reservoir sampler
* cursor}.
*
* @see xxl.core.functions.Function
* @see xxl.core.math.functions.AbstractRealFunctionFunction
* @see xxl.core.math.functions.RealFunction
* @see xxl.core.math.queries.WindowQuery
*/
public class EmpiricalCDF extends AbstractRealFunctionFunction implements WindowQuery {
/** Provides a factory for a
* {@link xxl.core.math.statistics.nonparametric.EmpiricalCDF sampling based} cdf estimator.
* The parameters needed for construction are passed to the factory by an
* <tt>Object[]</tt> representing an <TT>iid</TT> sample.
*/
public static Function FACTORY = new AbstractFunction<Object[],EmpiricalCDF>() {
public EmpiricalCDF invoke(Object[] o) {
return new EmpiricalCDF(o);
}
};
/** used sample */
protected Object[] sample;
/** indicates whether sample, variance or bounds have changed */
protected boolean hasChanged = true;
/** confidence with P( | Fn(x) - F(x) | < epsilon ) = 1 - alpha = p */
protected double alpha;
/** last computed function value */
protected double y;
/** Constructs a new empirical cdf based upon the given sample
* and a confidence level.
*
* @param sample sample used to build the cdf
* @param alpha confidence with P( | Fn(x) - F(x) | < epsilon ) = 1 - alpha = p
*/
public EmpiricalCDF(Object[] sample, double alpha) {
this.sample = sample;
this.alpha = alpha;
}
/** Constructs a new empirical cdf based upon the given sample.
*
* @param sample sample used to build up the cdf
*/
public EmpiricalCDF(Object[] sample) {
this(sample, -1);
}
/** Evaluates the empirical cdf at given point x.
*
* @param x argument where to evaluate the empirical cdf
* @return value of the empirical cdf at point x
*/
public double eval(double x) {
int r = 0;
if (hasChanged())
reinit();
int size = sample.length;
for (int i = 0; i < size; i++) {
if (((Double) sample[i]).doubleValue() <= x)
r++;
}
y = (double) r / (double) size;
return y;
}
/** Sets a new sample. If the sample has changed, i.e., if old_sample.equals (new_sample)
* returns false, <tt>changed</tt> will be set true.
*
* @param newSample new sample to set
*/
public void setSample(Object[] newSample) {
if (!newSample.equals(sample)) {
hasChanged = true;
sample = newSample;
}
}
/** Indicates whether something has changed. Thus, a recomputation of the
* bandwidth may be necessary.
*
* @return true, if something has changed
*/
public boolean hasChanged() {
return hasChanged;
}
/** Reinitilizes the object after changes. */
private void reinit() {
hasChanged = false;
}
/**
* Returns the current confidence interval at the last computed function point with
* P( | Fn(x) - F(x) | < epsilon ) = 1 - alpha = p.
* <br>
* <b>Note:<\b> This functionality is not available yet!
*
* @throws UnsupportedOperationException if no confidence level alpha has been given and
* hence no confidence interval for the given value could be computed or
* a sample of a size smaller than 40 is given
* @return value of the current confidence interval given by [ F(x)-epsilon, F(x)+epsilon]
*/
public double epsilon() throws UnsupportedOperationException {
if (alpha < 0)
throw new UnsupportedOperationException("computing of confidence intervals not supported by this estimator");
if (sample.length <= 40)
throw new UnsupportedOperationException("computing of confidence intervals not supported for sample sizes smaller than 40");
// ---
throw new UnsupportedOperationException("computing of confidence intervals not yet supported by this estimator");
}
/**
* Returns the current confidence interval at the given value x with
* P( | Fn(x) - F(x) | < epsilon ) = 1 - alpha = p.
*
* @param x argument where to evaluate the confidence interval
* @throws UnsupportedOperationException if no confidence level alpha has been given and
* hence no confidence interval for the given value could be computed
* or a sample of a smaller size than 40 is given
* @return value of the current confidence interval given by [ F(x)-epsilon, F(x)+epsilon]
*/
public double epsilon(double x) throws UnsupportedOperationException {
y = eval(x);
return epsilon();
}
/** Returns the confidence level.
*
* @return p = 1-alpha
* @throws UnsupportedOperationException if no confidence level has been given and
* hence no confidence interval could be computed
*/
public double confidence() throws UnsupportedOperationException {
if (alpha < 0)
throw new UnsupportedOperationException("computing of confidence intervals not supported by this estimator");
else
return 1 - alpha;
}
/** Returns the difference F(b) - F(a).
* The implementation of this method is not based on the {@link #eval(double) eval-method}.
* The difference will be directly evaluated by<br>
* <pre>
* <code>
int size = sample.length;
for ( int i=0; i< size; i++){
double xi = ((Number) sample[i]).doubleValue();
if (( xi <= b) && ( xi > a)) r++;
}
return (double) r / (double) size;
* </code>
* </pre>
*
* @param left left border of the window query
* @param right right border of the window query
* @return the difference F(b) - F(a) respectively F(right) - F(left)
*/
public double windowQuery(Object left, Object right) {
double a = ((Number) left).doubleValue();
double b = ((Number) right).doubleValue();
int r = 0;
if (hasChanged())
reinit();
int size = sample.length;
for (int i = 0; i < size; i++) {
double xi = ((Number) sample[i]).doubleValue();
if ((xi <= b) && (xi > a))
r++;
}
return (double) r / (double) size;
}
}