KMeansPlusPlusClusterer.java example

Explorer

javajoy-master
- src
  - it
    - enricod
      - joy
        IStack.java
        IStackNode.java
        JoyFactory.java
        JoyStack.java
        Pair.java
        StackNodeType.java
        nodes
        AbstractNode.java
        BoolStackNode.java
        OperatorStackNode.java
        RealFieldStackNode.java
        operators
        AbstractBaseOperator.java
        AddOp.java
        DivideOp.java
        DummyOperator.java
        DupOperator.java
        IOperator.java
        IOperatorsRegistry.java
        MultiplyOp.java
        OperatorException.java
        OperatorsRegistry.java
        PushFieldElementOp.java
        SubtractOp.java
        parser
        BaseParser.java
        IParser.java
        ParserUtils.java
  - org
    - apache
      - commons
        math
        ArgumentOutsideDomainException.java
        ConvergenceException.java
        ConvergingAlgorithm.java
        ConvergingAlgorithmImpl.java
        DimensionMismatchException.java
        DuplicateSampleAbscissaException.java
        Field.java
        FieldElement.java
        FunctionEvaluationException.java
        MathConfigurationException.java
        MathException.java
        MathRuntimeException.java
        MaxEvaluationsExceededException.java
        MaxIterationsExceededException.java
        analysis
        BinaryFunction.java
        BivariateRealFunction.java
        ComposableFunction.java
        DifferentiableMultivariateRealFunction.java
        DifferentiableMultivariateVectorialFunction.java
        DifferentiableUnivariateMatrixFunction.java
        DifferentiableUnivariateRealFunction.java
        DifferentiableUnivariateVectorialFunction.java
        MultivariateMatrixFunction.java
        MultivariateRealFunction.java
        MultivariateVectorialFunction.java
        TrivariateRealFunction.java
        UnivariateMatrixFunction.java
        UnivariateRealFunction.java
        UnivariateVectorialFunction.java
        integration
        LegendreGaussIntegrator.java
        RombergIntegrator.java
        SimpsonIntegrator.java
        TrapezoidIntegrator.java
        UnivariateRealIntegrator.java
        UnivariateRealIntegratorImpl.java
        interpolation
        BicubicSplineInterpolatingFunction.java
        BicubicSplineInterpolator.java
        BivariateRealGridInterpolator.java
        DividedDifferenceInterpolator.java
        LinearInterpolator.java
        LoessInterpolator.java
        MicrosphereInterpolatingFunction.java
        MicrosphereInterpolator.java
        MultivariateRealInterpolator.java
        NevilleInterpolator.java
        SmoothingBicubicSplineInterpolator.java
        SmoothingPolynomialBicubicSplineInterpolator.java
        SplineInterpolator.java
        TricubicSplineInterpolatingFunction.java
        TricubicSplineInterpolator.java
        TrivariateRealGridInterpolator.java
        UnivariateRealInterpolator.java
        polynomials
        PolynomialFunction.java
        PolynomialFunctionLagrangeForm.java
        PolynomialFunctionNewtonForm.java
        PolynomialSplineFunction.java
        PolynomialsUtils.java
        solvers
        BisectionSolver.java
        BrentSolver.java
        LaguerreSolver.java
        MullerSolver.java
        NewtonSolver.java
        RiddersSolver.java
        SecantSolver.java
        UnivariateRealSolver.java
        UnivariateRealSolverFactory.java
        UnivariateRealSolverFactoryImpl.java
        UnivariateRealSolverImpl.java
        UnivariateRealSolverUtils.java
        complex
        Complex.java
        ComplexField.java
        ComplexFormat.java
        ComplexUtils.java
        dfp
        Dfp.java
        DfpDec.java
        DfpField.java
        DfpMath.java
        distribution
        AbstractContinuousDistribution.java
        AbstractDistribution.java
        AbstractIntegerDistribution.java
        BetaDistribution.java
        BetaDistributionImpl.java
        BinomialDistribution.java
        BinomialDistributionImpl.java
        CauchyDistribution.java
        CauchyDistributionImpl.java
        ChiSquaredDistribution.java
        ChiSquaredDistributionImpl.java
        ContinuousDistribution.java
        DiscreteDistribution.java
        Distribution.java
        ExponentialDistribution.java
        ExponentialDistributionImpl.java
        FDistribution.java
        FDistributionImpl.java
        GammaDistribution.java
        GammaDistributionImpl.java
        HasDensity.java
        HypergeometricDistribution.java
        HypergeometricDistributionImpl.java
        IntegerDistribution.java
        NormalDistribution.java
        NormalDistributionImpl.java
        PascalDistribution.java
        PascalDistributionImpl.java
        PoissonDistribution.java
        PoissonDistributionImpl.java
        SaddlePointExpansion.java
        TDistribution.java
        TDistributionImpl.java
        WeibullDistribution.java
        WeibullDistributionImpl.java
        ZipfDistribution.java
        ZipfDistributionImpl.java
        estimation
        AbstractEstimator.java
        EstimatedParameter.java
        EstimationException.java
        EstimationProblem.java
        Estimator.java
        GaussNewtonEstimator.java
        LevenbergMarquardtEstimator.java
        SimpleEstimationProblem.java
        WeightedMeasurement.java
        exception
        ConvergenceException.java
        DimensionMismatchException.java
        MathIllegalArgumentException.java
        MathIllegalNumberException.java
        MathIllegalStateException.java
        MathInternalError.java
        MathThrowable.java
        MathUnsupportedOperationException.java
        NoDataException.java
        NonMonotonousSequenceException.java
        NotPositiveException.java
        NotStrictlyPositiveException.java
        NullArgumentException.java
        NumberIsTooLargeException.java
        NumberIsTooSmallException.java
        OutOfRangeException.java
        ZeroException.java
        util
        ArgUtils.java
        DummyLocalizable.java
        Localizable.java
        LocalizedFormats.java
        MessageFactory.java
        fraction
        AbstractFormat.java
        BigFraction.java
        BigFractionField.java
        BigFractionFormat.java
        Fraction.java
        FractionConversionException.java
        FractionField.java
        FractionFormat.java
        ProperBigFractionFormat.java
        ProperFractionFormat.java
        genetics
        AbstractListChromosome.java
        BinaryChromosome.java
        BinaryMutation.java
        Chromosome.java
        ChromosomePair.java
        CrossoverPolicy.java
        ElitisticListPopulation.java
        Fitness.java
        FixedGenerationCount.java
        GeneticAlgorithm.java
        InvalidRepresentationException.java
        ListPopulation.java
        MutationPolicy.java
        OnePointCrossover.java
        PermutationChromosome.java
        Population.java
        RandomKey.java
        RandomKeyMutation.java
        SelectionPolicy.java
        StoppingCondition.java
        TournamentSelection.java
        geometry
        CardanEulerSingularityException.java
        NotARotationMatrixException.java
        Rotation.java
        RotationOrder.java
        Vector3D.java
        Vector3DFormat.java
        linear
        AbstractFieldMatrix.java
        AbstractRealMatrix.java
        AbstractRealVector.java
        AnyMatrix.java
        Array2DRowFieldMatrix.java
        Array2DRowRealMatrix.java
        ArrayFieldVector.java
        ArrayRealVector.java
        BiDiagonalTransformer.java
        BigMatrix.java
        BigMatrixImpl.java
        BlockFieldMatrix.java
        BlockRealMatrix.java
        CholeskyDecomposition.java
        CholeskyDecompositionImpl.java
        DecompositionSolver.java
        DefaultFieldMatrixChangingVisitor.java
        DefaultFieldMatrixPreservingVisitor.java
        DefaultRealMatrixChangingVisitor.java
        DefaultRealMatrixPreservingVisitor.java
        EigenDecomposition.java
        EigenDecompositionImpl.java
        FieldDecompositionSolver.java
        FieldLUDecomposition.java
        FieldLUDecompositionImpl.java
        FieldMatrix.java
        FieldMatrixChangingVisitor.java
        FieldMatrixPreservingVisitor.java
        FieldVector.java
        InvalidMatrixException.java
        LUDecomposition.java
        LUDecompositionImpl.java
        MatrixIndexException.java
        MatrixUtils.java
        MatrixVisitorException.java
        NonSquareMatrixException.java
        NotPositiveDefiniteMatrixException.java
        NotSymmetricMatrixException.java
        OpenMapRealMatrix.java
        OpenMapRealVector.java
        QRDecomposition.java
        QRDecompositionImpl.java
        RealMatrix.java
        RealMatrixChangingVisitor.java
        RealMatrixImpl.java
        RealMatrixPreservingVisitor.java
        RealVector.java
        RealVectorFormat.java
        SingularMatrixException.java
        SingularValueDecomposition.java
        SingularValueDecompositionImpl.java
        SparseFieldMatrix.java
        SparseFieldVector.java
        SparseRealMatrix.java
        SparseRealVector.java
        TriDiagonalTransformer.java
        ode
        AbstractIntegrator.java
        ContinuousOutputModel.java
        DerivativeException.java
        ExtendedFirstOrderDifferentialEquations.java
        FirstOrderConverter.java
        FirstOrderDifferentialEquations.java
        FirstOrderIntegrator.java
        IntegratorException.java
        MultistepIntegrator.java
        ODEIntegrator.java
        SecondOrderDifferentialEquations.java
        SecondOrderIntegrator.java
        events
        CombinedEventsManager.java
        EventException.java
        EventHandler.java
        EventState.java
        jacobians
        EventHandlerWithJacobians.java
        FirstOrderIntegratorWithJacobians.java
        ODEWithJacobians.java
        ParameterizedODE.java
        StepHandlerWithJacobians.java
        StepInterpolatorWithJacobians.java
        nonstiff
        AdamsBashforthIntegrator.java
        AdamsIntegrator.java
        AdamsMoultonIntegrator.java
        AdamsNordsieckTransformer.java
        AdaptiveStepsizeIntegrator.java
        ClassicalRungeKuttaIntegrator.java
        ClassicalRungeKuttaStepInterpolator.java
        DormandPrince54Integrator.java
        DormandPrince54StepInterpolator.java
        DormandPrince853Integrator.java
        DormandPrince853StepInterpolator.java
        EmbeddedRungeKuttaIntegrator.java
        EulerIntegrator.java
        EulerStepInterpolator.java
        GillIntegrator.java
        GillStepInterpolator.java
        GraggBulirschStoerIntegrator.java
        GraggBulirschStoerStepInterpolator.java
        HighamHall54Integrator.java
        HighamHall54StepInterpolator.java
        MidpointIntegrator.java
        MidpointStepInterpolator.java
        RungeKuttaIntegrator.java
        RungeKuttaStepInterpolator.java
        ThreeEighthesIntegrator.java
        ThreeEighthesStepInterpolator.java
        sampling
        AbstractStepInterpolator.java
        DummyStepHandler.java
        DummyStepInterpolator.java
        FixedStepHandler.java
        NordsieckStepInterpolator.java
        StepHandler.java
        StepInterpolator.java
        StepNormalizer.java
        optimization
        DifferentiableMultivariateRealOptimizer.java
        DifferentiableMultivariateVectorialOptimizer.java
        GoalType.java
        LeastSquaresConverter.java
        MultiStartDifferentiableMultivariateRealOptimizer.java
        MultiStartDifferentiableMultivariateVectorialOptimizer.java
        MultiStartMultivariateRealOptimizer.java
        MultiStartUnivariateRealOptimizer.java
        MultivariateRealOptimizer.java
        OptimizationException.java
        RealConvergenceChecker.java
        RealPointValuePair.java
        SimpleRealPointChecker.java
        SimpleScalarValueChecker.java
        SimpleVectorialPointChecker.java
        SimpleVectorialValueChecker.java
        UnivariateRealOptimizer.java
        VectorialConvergenceChecker.java
        VectorialPointValuePair.java
        direct
        DirectSearchOptimizer.java
        MultiDirectional.java
        NelderMead.java
        PowellOptimizer.java
        fitting
        CurveFitter.java
        GaussianDerivativeFunction.java
        GaussianFitter.java
        GaussianFunction.java
        GaussianParametersGuesser.java
        HarmonicCoefficientsGuesser.java
        HarmonicFitter.java
        HarmonicFunction.java
        ParametricGaussianFunction.java
        ParametricRealFunction.java
        PolynomialFitter.java
        WeightedObservedPoint.java
        general
        AbstractLeastSquaresOptimizer.java
        AbstractScalarDifferentiableOptimizer.java
        ConjugateGradientFormula.java
        GaussNewtonOptimizer.java
        LevenbergMarquardtOptimizer.java
        NonLinearConjugateGradientOptimizer.java
        Preconditioner.java
        linear
        AbstractLinearOptimizer.java
        LinearConstraint.java
        LinearObjectiveFunction.java
        LinearOptimizer.java
        NoFeasibleSolutionException.java
        Relationship.java
        SimplexSolver.java
        SimplexTableau.java
        UnboundedSolutionException.java
        univariate
        AbstractUnivariateRealOptimizer.java
        BracketFinder.java
        BrentOptimizer.java
        random
        AbstractRandomGenerator.java
        AbstractWell.java
        BitsStreamGenerator.java
        CorrelatedRandomVectorGenerator.java
        EmpiricalDistribution.java
        EmpiricalDistributionImpl.java
        GaussianRandomGenerator.java
        JDKRandomGenerator.java
        MersenneTwister.java
        NormalizedRandomGenerator.java
        RandomAdaptor.java
        RandomData.java
        RandomDataImpl.java
        RandomGenerator.java
        RandomVectorGenerator.java
        UncorrelatedRandomVectorGenerator.java
        UniformRandomGenerator.java
        UnitSphereRandomVectorGenerator.java
        ValueServer.java
        Well1024a.java
        Well19937a.java
        Well19937c.java
        Well44497a.java
        Well44497b.java
        Well512a.java
        special
        Beta.java
        Erf.java
        Gamma.java
        stat
        Frequency.java
        StatUtils.java
        clustering
        Cluster.java
        Clusterable.java
        EuclideanIntegerPoint.java
        KMeansPlusPlusClusterer.java
        correlation
        Covariance.java
        PearsonsCorrelation.java
        SpearmansCorrelation.java
        descriptive
        AbstractStorelessUnivariateStatistic.java
        AbstractUnivariateStatistic.java
        AggregateSummaryStatistics.java
        DescriptiveStatistics.java
        MultivariateSummaryStatistics.java
        StatisticalMultivariateSummary.java
        StatisticalSummary.java
        StatisticalSummaryValues.java
        StorelessUnivariateStatistic.java
        SummaryStatistics.java
        SynchronizedDescriptiveStatistics.java
        SynchronizedMultivariateSummaryStatistics.java
        SynchronizedSummaryStatistics.java
        UnivariateStatistic.java
        WeightedEvaluation.java
        moment
        FirstMoment.java
        FourthMoment.java
        GeometricMean.java
        Kurtosis.java
        Mean.java
        SecondMoment.java
        SemiVariance.java
        Skewness.java
        StandardDeviation.java
        ThirdMoment.java
        Variance.java
        VectorialCovariance.java
        VectorialMean.java
        rank
        Max.java
        Median.java
        Min.java
        Percentile.java
        summary
        Product.java
        Sum.java
        SumOfLogs.java
        SumOfSquares.java
        inference
        ChiSquareTest.java
        ChiSquareTestImpl.java
        OneWayAnova.java
        OneWayAnovaImpl.java
        TTest.java
        TTestImpl.java
        TestUtils.java
        UnknownDistributionChiSquareTest.java
        ranking
        NaNStrategy.java
        NaturalRanking.java
        RankingAlgorithm.java
        TiesStrategy.java
        regression
        AbstractMultipleLinearRegression.java
        GLSMultipleLinearRegression.java
        MultipleLinearRegression.java
        OLSMultipleLinearRegression.java
        SimpleRegression.java
        transform
        FastCosineTransformer.java
        FastFourierTransformer.java
        FastHadamardTransformer.java
        FastSineTransformer.java
        RealTransformer.java
        util
        BigReal.java
        BigRealField.java
        CompositeFormat.java
        ContinuedFraction.java
        DefaultTransformer.java
        DoubleArray.java
        FastMath.java
        MathUtils.java
        MultidimensionalCounter.java
        NumberTransformer.java
        OpenIntToDoubleHashMap.java
        OpenIntToFieldHashMap.java
        ResizableDoubleArray.java
        TransformerMap.java
- srcTest
  - it
    - enricod
      - joy
        JoyStackTests.java
        parser
        BaseParserTest.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math.stat.clustering;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;

import org.apache.commons.math.exception.ConvergenceException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.stat.descriptive.moment.Variance;

/**
 * Clustering algorithm based on David Arthur and Sergei Vassilvitski k-means++ algorithm.
 * @param <T> type of the points to cluster
 * @see <a href="http://en.wikipedia.org/wiki/K-means%2B%2B">K-means++ (wikipedia)</a>
 * @version $Revision: 1054333 $ $Date: 2011-01-02 01:34:58 +0100 (dim. 02 janv. 2011) $
 * @since 2.0
 */
public class KMeansPlusPlusClusterer<T extends Clusterable<T>> {

    /** Strategies to use for replacing an empty cluster. */
    public static enum EmptyClusterStrategy {

        /** Split the cluster with largest distance variance. */
        LARGEST_VARIANCE,

        /** Split the cluster with largest number of points. */
        LARGEST_POINTS_NUMBER,

        /** Create a cluster around the point farthest from its centroid. */
        FARTHEST_POINT,

        /** Generate an error. */
        ERROR

    }

    /** Random generator for choosing initial centers. */
    private final Random random;

    /** Selected strategy for empty clusters. */
    private final EmptyClusterStrategy emptyStrategy;

    /** Build a clusterer.
     * <p>
     * The default strategy for handling empty clusters that may appear during
     * algorithm iterations is to split the cluster with largest distance variance.
     * </p>
     * @param random random generator to use for choosing initial centers
     */
    public KMeansPlusPlusClusterer(final Random random) {
        this(random, EmptyClusterStrategy.LARGEST_VARIANCE);
    }

    /** Build a clusterer.
     * @param random random generator to use for choosing initial centers
     * @param emptyStrategy strategy to use for handling empty clusters that
     * may appear during algorithm iterations
     * @since 2.2
     */
    public KMeansPlusPlusClusterer(final Random random, final EmptyClusterStrategy emptyStrategy) {
        this.random        = random;
        this.emptyStrategy = emptyStrategy;
    }

    /**
     * Runs the K-means++ clustering algorithm.
     *
     * @param points the points to cluster
     * @param k the number of clusters to split the data into
     * @param maxIterations the maximum number of iterations to run the algorithm
     *     for.  If negative, no maximum will be used
     * @return a list of clusters containing the points
     */
    public List<Cluster<T>> cluster(final Collection<T> points,
                                    final int k, final int maxIterations) {
        // create the initial clusters
        List<Cluster<T>> clusters = chooseInitialCenters(points, k, random);
        assignPointsToClusters(clusters, points);

        // iterate through updating the centers until we're done
        final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
        for (int count = 0; count < max; count++) {
            boolean clusteringChanged = false;
            List<Cluster<T>> newClusters = new ArrayList<Cluster<T>>();
            for (final Cluster<T> cluster : clusters) {
                final T newCenter;
                if (cluster.getPoints().isEmpty()) {
                    switch (emptyStrategy) {
                        case LARGEST_VARIANCE :
                            newCenter = getPointFromLargestVarianceCluster(clusters);
                            break;
                        case LARGEST_POINTS_NUMBER :
                            newCenter = getPointFromLargestNumberCluster(clusters);
                            break;
                        case FARTHEST_POINT :
                            newCenter = getFarthestPoint(clusters);
                            break;
                        default :
                            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                    }
                    clusteringChanged = true;
                } else {
                    newCenter = cluster.getCenter().centroidOf(cluster.getPoints());
                    if (!newCenter.equals(cluster.getCenter())) {
                        clusteringChanged = true;
                    }
                }
                newClusters.add(new Cluster<T>(newCenter));
            }
            if (!clusteringChanged) {
                return clusters;
            }
            assignPointsToClusters(newClusters, points);
            clusters = newClusters;
        }
        return clusters;
    }

    /**
     * Adds the given points to the closest {@link Cluster}.
     *
     * @param <T> type of the points to cluster
     * @param clusters the {@link Cluster}s to add the points to
     * @param points the points to add to the given {@link Cluster}s
     */
    private static <T extends Clusterable<T>> void
        assignPointsToClusters(final Collection<Cluster<T>> clusters, final Collection<T> points) {
        for (final T p : points) {
            Cluster<T> cluster = getNearestCluster(clusters, p);
            cluster.addPoint(p);
        }
    }

    /**
     * Use K-means++ to choose the initial centers.
     *
     * @param <T> type of the points to cluster
     * @param points the points to choose the initial centers from
     * @param k the number of centers to choose
     * @param random random generator to use
     * @return the initial centers
     */
    private static <T extends Clusterable<T>> List<Cluster<T>>
        chooseInitialCenters(final Collection<T> points, final int k, final Random random) {

        final List<T> pointSet = new ArrayList<T>(points);
        final List<Cluster<T>> resultSet = new ArrayList<Cluster<T>>();

        // Choose one center uniformly at random from among the data points.
        final T firstPoint = pointSet.remove(random.nextInt(pointSet.size()));
        resultSet.add(new Cluster<T>(firstPoint));

        final double[] dx2 = new double[pointSet.size()];
        while (resultSet.size() < k) {
            // For each data point x, compute D(x), the distance between x and
            // the nearest center that has already been chosen.
            int sum = 0;
            for (int i = 0; i < pointSet.size(); i++) {
                final T p = pointSet.get(i);
                final Cluster<T> nearest = getNearestCluster(resultSet, p);
                final double d = p.distanceFrom(nearest.getCenter());
                sum += d * d;
                dx2[i] = sum;
            }

            // Add one new data point as a center. Each point x is chosen with
            // probability proportional to D(x)2
            final double r = random.nextDouble() * sum;
            for (int i = 0 ; i < dx2.length; i++) {
                if (dx2[i] >= r) {
                    final T p = pointSet.remove(i);
                    resultSet.add(new Cluster<T>(p));
                    break;
                }
            }
        }

        return resultSet;

    }

    /**
     * Get a random point from the {@link Cluster} with the largest distance variance.
     *
     * @param clusters the {@link Cluster}s to search
     * @return a random point from the selected cluster
     */
    private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters) {

        double maxVariance = Double.NEGATIVE_INFINITY;
        Cluster<T> selected = null;
        for (final Cluster<T> cluster : clusters) {
            if (!cluster.getPoints().isEmpty()) {

                // compute the distance variance of the current cluster
                final T center = cluster.getCenter();
                final Variance stat = new Variance();
                for (final T point : cluster.getPoints()) {
                    stat.increment(point.distanceFrom(center));
                }
                final double variance = stat.getResult();

                // select the cluster with the largest variance
                if (variance > maxVariance) {
                    maxVariance = variance;
                    selected = cluster;
                }

            }
        }

        // did we find at least one non-empty cluster ?
        if (selected == null) {
            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
        }

        // extract a random point from the cluster
        final List<T> selectedPoints = selected.getPoints();
        return selectedPoints.remove(random.nextInt(selectedPoints.size()));

    }

    /**
     * Get a random point from the {@link Cluster} with the largest number of points
     *
     * @param clusters the {@link Cluster}s to search
     * @return a random point from the selected cluster
     */
    private T getPointFromLargestNumberCluster(final Collection<Cluster<T>> clusters) {

        int maxNumber = 0;
        Cluster<T> selected = null;
        for (final Cluster<T> cluster : clusters) {

            // get the number of points of the current cluster
            final int number = cluster.getPoints().size();

            // select the cluster with the largest number of points
            if (number > maxNumber) {
                maxNumber = number;
                selected = cluster;
            }

        }

        // did we find at least one non-empty cluster ?
        if (selected == null) {
            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
        }

        // extract a random point from the cluster
        final List<T> selectedPoints = selected.getPoints();
        return selectedPoints.remove(random.nextInt(selectedPoints.size()));

    }

    /**
     * Get the point farthest to its cluster center
     *
     * @param clusters the {@link Cluster}s to search
     * @return point farthest to its cluster center
     */
    private T getFarthestPoint(final Collection<Cluster<T>> clusters) {

        double maxDistance = Double.NEGATIVE_INFINITY;
        Cluster<T> selectedCluster = null;
        int selectedPoint = -1;
        for (final Cluster<T> cluster : clusters) {

            // get the farthest point
            final T center = cluster.getCenter();
            final List<T> points = cluster.getPoints();
            for (int i = 0; i < points.size(); ++i) {
                final double distance = points.get(i).distanceFrom(center);
                if (distance > maxDistance) {
                    maxDistance     = distance;
                    selectedCluster = cluster;
                    selectedPoint   = i;
                }
            }

        }

        // did we find at least one non-empty cluster ?
        if (selectedCluster == null) {
            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
        }

        return selectedCluster.getPoints().remove(selectedPoint);

    }

    /**
     * Returns the nearest {@link Cluster} to the given point
     *
     * @param <T> type of the points to cluster
     * @param clusters the {@link Cluster}s to search
     * @param point the point to find the nearest {@link Cluster} for
     * @return the nearest {@link Cluster} to the given point
     */
    private static <T extends Clusterable<T>> Cluster<T>
        getNearestCluster(final Collection<Cluster<T>> clusters, final T point) {
        double minDistance = Double.MAX_VALUE;
        Cluster<T> minCluster = null;
        for (final Cluster<T> c : clusters) {
            final double distance = point.distanceFrom(c.getCenter());
            if (distance < minDistance) {
                minDistance = distance;
                minCluster = c;
            }
        }
        return minCluster;
    }

}