KMeansPlusPlusClusterer.java example

Explorer

ewjUtil-master
- src
  - com
    - ewjordan
      - evolution
        sim
        SpeedLimit.java
      - util
        ArrayUtil.java
        DoubleHistogram.java
        DoubleList.java
        MathUtil.java
        ObservationSummary.java
        SimpleIO.java
        StoredObservationSummary.java
        Strings.java
        objectWrap
        HasValue.java
        MutationInfo.java
        ObjectOptimizer.java
        OptimizableWrappedObject.java
        PrimitiveReference.java
        PrimitiveReferenceIntoArray.java
        ReferenceType.java
        Unwrapped.java
        WrappedObject.java
        WrappedObjectChromosome.java
        WrappedObjectToDoubleFunction.java
        test
        FunctionMaximizationTest.java
        PolynomialFunction.java
        WrappedObjectTest.java
        random
        Shuffler.java
        Spinner.java
  - org
    - apache
      - commons
        math
        ConvergenceException.java
        Field.java
        FieldElement.java
        MathConfigurationException.java
        MathException.java
        MathRuntimeException.java
        MaxEvaluationsExceededException.java
        analysis
        BivariateRealFunction.java
        DifferentiableMultivariateRealFunction.java
        DifferentiableMultivariateVectorialFunction.java
        DifferentiableUnivariateMatrixFunction.java
        DifferentiableUnivariateRealFunction.java
        DifferentiableUnivariateVectorialFunction.java
        FunctionUtils.java
        MultivariateMatrixFunction.java
        MultivariateRealFunction.java
        MultivariateVectorialFunction.java
        ParametricUnivariateRealFunction.java
        TrivariateRealFunction.java
        UnivariateMatrixFunction.java
        UnivariateRealFunction.java
        UnivariateVectorialFunction.java
        function
        Abs.java
        Acos.java
        Acosh.java
        Add.java
        Asin.java
        Asinh.java
        Atan.java
        Atan2.java
        Atanh.java
        Cbrt.java
        Ceil.java
        Constant.java
        Cos.java
        Cosh.java
        Divide.java
        Exp.java
        Expm1.java
        Floor.java
        Gaussian.java
        HarmonicOscillator.java
        Identity.java
        Inverse.java
        Log.java
        Log10.java
        Log1p.java
        Logistic.java
        Logit.java
        Max.java
        Min.java
        Minus.java
        Multiply.java
        Pow.java
        Power.java
        Rint.java
        Sigmoid.java
        Signum.java
        Sin.java
        Sinc.java
        Sinh.java
        Sqrt.java
        StepFunction.java
        Subtract.java
        Tan.java
        Tanh.java
        Ulp.java
        integration
        LegendreGaussIntegrator.java
        RombergIntegrator.java
        SimpsonIntegrator.java
        TrapezoidIntegrator.java
        UnivariateRealIntegrator.java
        UnivariateRealIntegratorImpl.java
        interpolation
        BicubicSplineInterpolatingFunction.java
        BicubicSplineInterpolator.java
        BivariateRealGridInterpolator.java
        DividedDifferenceInterpolator.java
        LinearInterpolator.java
        LoessInterpolator.java
        MicrosphereInterpolatingFunction.java
        MicrosphereInterpolator.java
        MultivariateRealInterpolator.java
        NevilleInterpolator.java
        SmoothingPolynomialBicubicSplineInterpolator.java
        SplineInterpolator.java
        TricubicSplineInterpolatingFunction.java
        TricubicSplineInterpolator.java
        TrivariateRealGridInterpolator.java
        UnivariateRealInterpolator.java
        UnivariateRealPeriodicInterpolator.java
        polynomials
        PolynomialFunction.java
        PolynomialFunctionLagrangeForm.java
        PolynomialFunctionNewtonForm.java
        PolynomialSplineFunction.java
        PolynomialsUtils.java
        solvers
        AbstractDifferentiableUnivariateRealSolver.java
        AbstractPolynomialSolver.java
        AbstractUnivariateRealSolver.java
        BaseAbstractUnivariateRealSolver.java
        BaseUnivariateRealSolver.java
        BisectionSolver.java
        BrentSolver.java
        DifferentiableUnivariateRealSolver.java
        LaguerreSolver.java
        MullerSolver.java
        MullerSolver2.java
        NewtonSolver.java
        PolynomialSolver.java
        RiddersSolver.java
        SecantSolver.java
        UnivariateRealSolver.java
        UnivariateRealSolverUtils.java
        complex
        Complex.java
        ComplexField.java
        ComplexFormat.java
        ComplexUtils.java
        dfp
        Dfp.java
        DfpDec.java
        DfpField.java
        DfpMath.java
        distribution
        AbstractContinuousDistribution.java
        AbstractDistribution.java
        AbstractIntegerDistribution.java
        BetaDistribution.java
        BetaDistributionImpl.java
        BinomialDistribution.java
        BinomialDistributionImpl.java
        CauchyDistribution.java
        CauchyDistributionImpl.java
        ChiSquaredDistribution.java
        ChiSquaredDistributionImpl.java
        ContinuousDistribution.java
        DiscreteDistribution.java
        Distribution.java
        ExponentialDistribution.java
        ExponentialDistributionImpl.java
        FDistribution.java
        FDistributionImpl.java
        GammaDistribution.java
        GammaDistributionImpl.java
        HypergeometricDistribution.java
        HypergeometricDistributionImpl.java
        IntegerDistribution.java
        KolmogorovSmirnovDistribution.java
        KolmogorovSmirnovDistributionImpl.java
        NormalDistribution.java
        NormalDistributionImpl.java
        PascalDistribution.java
        PascalDistributionImpl.java
        PoissonDistribution.java
        PoissonDistributionImpl.java
        SaddlePointExpansion.java
        TDistribution.java
        TDistributionImpl.java
        WeibullDistribution.java
        WeibullDistributionImpl.java
        ZipfDistribution.java
        ZipfDistributionImpl.java
        exception
        ConvergenceException.java
        DimensionMismatchException.java
        MathArithmeticException.java
        MathIllegalArgumentException.java
        MathIllegalNumberException.java
        MathIllegalStateException.java
        MathInternalError.java
        MathParseException.java
        MathRuntimeException.java
        MathThrowable.java
        MathUnsupportedOperationException.java
        MathUserException.java
        MaxCountExceededException.java
        MultiDimensionMismatchException.java
        NoBracketingException.java
        NoDataException.java
        NonMonotonousSequenceException.java
        NotFiniteNumberException.java
        NotPositiveException.java
        NotStrictlyPositiveException.java
        NullArgumentException.java
        NumberIsTooLargeException.java
        NumberIsTooSmallException.java
        OutOfRangeException.java
        TooManyEvaluationsException.java
        ZeroException.java
        util
        ArgUtils.java
        DummyLocalizable.java
        ExceptionContext.java
        ExceptionContextProvider.java
        Localizable.java
        LocalizedFormats.java
        fraction
        AbstractFormat.java
        BigFraction.java
        BigFractionField.java
        BigFractionFormat.java
        Fraction.java
        FractionConversionException.java
        FractionField.java
        FractionFormat.java
        ProperBigFractionFormat.java
        ProperFractionFormat.java
        genetics
        AbstractListChromosome.java
        BinaryChromosome.java
        BinaryMutation.java
        Chromosome.java
        ChromosomePair.java
        CrossoverPolicy.java
        ElitisticListPopulation.java
        Fitness.java
        FixedGenerationCount.java
        GeneticAlgorithm.java
        InvalidRepresentationException.java
        ListPopulation.java
        MutationPolicy.java
        OnePointCrossover.java
        PermutationChromosome.java
        Population.java
        RandomKey.java
        RandomKeyMutation.java
        SelectionPolicy.java
        StoppingCondition.java
        TournamentSelection.java
        geometry
        Space.java
        Vector.java
        VectorFormat.java
        euclidean
        oned
        Euclidean1D.java
        Interval.java
        IntervalsSet.java
        OrientedPoint.java
        SubOrientedPoint.java
        Vector1D.java
        Vector1DFormat.java
        threed
        CardanEulerSingularityException.java
        Euclidean3D.java
        Line.java
        NotARotationMatrixException.java
        OutlineExtractor.java
        Plane.java
        PolyhedronsSet.java
        Rotation.java
        RotationOrder.java
        SubPlane.java
        Vector3D.java
        Vector3DFormat.java
        twod
        Euclidean2D.java
        Line.java
        NestedLoops.java
        PolygonsSet.java
        Segment.java
        SegmentBuilder.java
        SubLine.java
        Vector2D.java
        Vector2DFormat.java
        partitioning
        AbstractRegion.java
        AbstractSubHyperplane.java
        BSPTree.java
        BSPTreeVisitor.java
        BoundaryAttribute.java
        BoundarySizeVisitor.java
        Characterization.java
        Embedding.java
        Hyperplane.java
        Region.java
        RegionFactory.java
        Side.java
        SubHyperplane.java
        Transform.java
        utilities
        AVLTree.java
        OrderedTuple.java
        linear
        AbstractFieldMatrix.java
        AbstractRealMatrix.java
        AbstractRealVector.java
        AnyMatrix.java
        Array2DRowFieldMatrix.java
        Array2DRowRealMatrix.java
        ArrayFieldVector.java
        ArrayRealVector.java
        BiDiagonalTransformer.java
        BlockFieldMatrix.java
        BlockRealMatrix.java
        CholeskyDecomposition.java
        CholeskyDecompositionImpl.java
        DecompositionSolver.java
        DefaultFieldMatrixChangingVisitor.java
        DefaultFieldMatrixPreservingVisitor.java
        DefaultRealMatrixChangingVisitor.java
        DefaultRealMatrixPreservingVisitor.java
        EigenDecomposition.java
        EigenDecompositionImpl.java
        FieldDecompositionSolver.java
        FieldLUDecomposition.java
        FieldLUDecompositionImpl.java
        FieldMatrix.java
        FieldMatrixChangingVisitor.java
        FieldMatrixPreservingVisitor.java
        FieldVector.java
        LUDecomposition.java
        LUDecompositionImpl.java
        MatrixDimensionMismatchException.java
        MatrixUtils.java
        NonPositiveDefiniteMatrixException.java
        NonSquareMatrixException.java
        NonSymmetricMatrixException.java
        OpenMapRealMatrix.java
        OpenMapRealVector.java
        QRDecomposition.java
        QRDecompositionImpl.java
        RealMatrix.java
        RealMatrixChangingVisitor.java
        RealMatrixPreservingVisitor.java
        RealVector.java
        RealVectorFormat.java
        RectangularCholeskyDecomposition.java
        RectangularCholeskyDecompositionImpl.java
        SingularMatrixException.java
        SingularValueDecomposition.java
        SingularValueDecompositionImpl.java
        SparseFieldMatrix.java
        SparseFieldVector.java
        SparseRealMatrix.java
        SparseRealVector.java
        TriDiagonalTransformer.java
        ode
        AbstractIntegrator.java
        ContinuousOutputModel.java
        ExtendedFirstOrderDifferentialEquations.java
        FirstOrderConverter.java
        FirstOrderDifferentialEquations.java
        FirstOrderIntegrator.java
        IntegratorException.java
        MultistepIntegrator.java
        ODEIntegrator.java
        SecondOrderDifferentialEquations.java
        SecondOrderIntegrator.java
        events
        EventException.java
        EventHandler.java
        EventState.java
        nonstiff
        AdamsBashforthIntegrator.java
        AdamsIntegrator.java
        AdamsMoultonIntegrator.java
        AdamsNordsieckTransformer.java
        AdaptiveStepsizeIntegrator.java
        ClassicalRungeKuttaIntegrator.java
        ClassicalRungeKuttaStepInterpolator.java
        DormandPrince54Integrator.java
        DormandPrince54StepInterpolator.java
        DormandPrince853Integrator.java
        DormandPrince853StepInterpolator.java
        EmbeddedRungeKuttaIntegrator.java
        EulerIntegrator.java
        EulerStepInterpolator.java
        GillIntegrator.java
        GillStepInterpolator.java
        GraggBulirschStoerIntegrator.java
        GraggBulirschStoerStepInterpolator.java
        HighamHall54Integrator.java
        HighamHall54StepInterpolator.java
        MidpointIntegrator.java
        MidpointStepInterpolator.java
        RungeKuttaIntegrator.java
        RungeKuttaStepInterpolator.java
        ThreeEighthesIntegrator.java
        ThreeEighthesStepInterpolator.java
        sampling
        AbstractStepInterpolator.java
        DummyStepHandler.java
        DummyStepInterpolator.java
        FixedStepHandler.java
        NordsieckStepInterpolator.java
        StepHandler.java
        StepInterpolator.java
        StepNormalizer.java
        optimization
        AbstractConvergenceChecker.java
        BaseMultiStartMultivariateRealOptimizer.java
        BaseMultiStartMultivariateVectorialOptimizer.java
        BaseMultivariateRealOptimizer.java
        BaseMultivariateVectorialOptimizer.java
        BaseOptimizer.java
        ConvergenceChecker.java
        DifferentiableMultivariateRealOptimizer.java
        DifferentiableMultivariateVectorialOptimizer.java
        GoalType.java
        LeastSquaresConverter.java
        MultiStartDifferentiableMultivariateRealOptimizer.java
        MultiStartDifferentiableMultivariateVectorialOptimizer.java
        MultiStartMultivariateRealOptimizer.java
        MultivariateRealOptimizer.java
        OptimizationException.java
        RealPointValuePair.java
        SimpleRealPointChecker.java
        SimpleScalarValueChecker.java
        SimpleVectorialPointChecker.java
        SimpleVectorialValueChecker.java
        VectorialConvergenceChecker.java
        VectorialPointValuePair.java
        direct
        AbstractSimplex.java
        BaseAbstractScalarOptimizer.java
        BaseAbstractVectorialOptimizer.java
        CMAESOptimizer.java
        MultiDirectionalSimplex.java
        NelderMeadSimplex.java
        PowellOptimizer.java
        SimplexOptimizer.java
        fitting
        CurveFitter.java
        GaussianFitter.java
        HarmonicFitter.java
        PolynomialFitter.java
        WeightedObservedPoint.java
        general
        AbstractLeastSquaresOptimizer.java
        AbstractScalarDifferentiableOptimizer.java
        ConjugateGradientFormula.java
        GaussNewtonOptimizer.java
        LevenbergMarquardtOptimizer.java
        NonLinearConjugateGradientOptimizer.java
        Preconditioner.java
        linear
        AbstractLinearOptimizer.java
        LinearConstraint.java
        LinearObjectiveFunction.java
        LinearOptimizer.java
        NoFeasibleSolutionException.java
        Relationship.java
        SimplexSolver.java
        SimplexTableau.java
        UnboundedSolutionException.java
        univariate
        AbstractUnivariateRealOptimizer.java
        BaseUnivariateRealOptimizer.java
        BracketFinder.java
        BrentOptimizer.java
        MultiStartUnivariateRealOptimizer.java
        UnivariateRealOptimizer.java
        UnivariateRealPointValuePair.java
        random
        AbstractRandomGenerator.java
        AbstractWell.java
        BitsStreamGenerator.java
        CorrelatedRandomVectorGenerator.java
        EmpiricalDistribution.java
        EmpiricalDistributionImpl.java
        GaussianRandomGenerator.java
        JDKRandomGenerator.java
        MersenneTwister.java
        NormalizedRandomGenerator.java
        RandomAdaptor.java
        RandomData.java
        RandomDataImpl.java
        RandomGenerator.java
        RandomVectorGenerator.java
        UncorrelatedRandomVectorGenerator.java
        UniformRandomGenerator.java
        UnitSphereRandomVectorGenerator.java
        ValueServer.java
        Well1024a.java
        Well19937a.java
        Well19937c.java
        Well44497a.java
        Well44497b.java
        Well512a.java
        special
        Beta.java
        Erf.java
        Gamma.java
        stat
        Frequency.java
        StatUtils.java
        clustering
        Cluster.java
        Clusterable.java
        EuclideanIntegerPoint.java
        KMeansPlusPlusClusterer.java
        correlation
        Covariance.java
        PearsonsCorrelation.java
        SpearmansCorrelation.java
        descriptive
        AbstractStorelessUnivariateStatistic.java
        AbstractUnivariateStatistic.java
        AggregateSummaryStatistics.java
        DescriptiveStatistics.java
        MultivariateSummaryStatistics.java
        StatisticalMultivariateSummary.java
        StatisticalSummary.java
        StatisticalSummaryValues.java
        StorelessUnivariateStatistic.java
        SummaryStatistics.java
        SynchronizedDescriptiveStatistics.java
        SynchronizedMultivariateSummaryStatistics.java
        SynchronizedSummaryStatistics.java
        UnivariateStatistic.java
        WeightedEvaluation.java
        moment
        FirstMoment.java
        FourthMoment.java
        GeometricMean.java
        Kurtosis.java
        Mean.java
        SecondMoment.java
        SemiVariance.java
        Skewness.java
        StandardDeviation.java
        ThirdMoment.java
        Variance.java
        VectorialCovariance.java
        VectorialMean.java
        rank
        Max.java
        Median.java
        Min.java
        Percentile.java
        summary
        Product.java
        Sum.java
        SumOfLogs.java
        SumOfSquares.java
        inference
        ChiSquareTest.java
        ChiSquareTestImpl.java
        MannWhitneyUTest.java
        MannWhitneyUTestImpl.java
        OneWayAnova.java
        OneWayAnovaImpl.java
        TTest.java
        TTestImpl.java
        TestUtils.java
        UnknownDistributionChiSquareTest.java
        WilcoxonSignedRankTest.java
        WilcoxonSignedRankTestImpl.java
        ranking
        NaNStrategy.java
        NaturalRanking.java
        RankingAlgorithm.java
        TiesStrategy.java
        regression
        AbstractMultipleLinearRegression.java
        GLSMultipleLinearRegression.java
        MultipleLinearRegression.java
        OLSMultipleLinearRegression.java
        SimpleRegression.java
        transform
        FastCosineTransformer.java
        FastFourierTransformer.java
        FastHadamardTransformer.java
        FastSineTransformer.java
        RealTransformer.java
        util
        BigReal.java
        BigRealField.java
        CompositeFormat.java
        ContinuedFraction.java
        DefaultTransformer.java
        DoubleArray.java
        FastMath.java
        Incrementor.java
        MathUtils.java
        MultidimensionalCounter.java
        NumberTransformer.java
        OpenIntToDoubleHashMap.java
        OpenIntToFieldHashMap.java
        Pair.java
        ResizableDoubleArray.java
        TransformerMap.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.math.stat.clustering;

import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Random;

import org.apache.commons.math.exception.ConvergenceException;
import org.apache.commons.math.exception.MathIllegalArgumentException;
import org.apache.commons.math.exception.NumberIsTooSmallException;
import org.apache.commons.math.exception.util.LocalizedFormats;
import org.apache.commons.math.stat.descriptive.moment.Variance;
import org.apache.commons.math.util.MathUtils;

/**
 * Clustering algorithm based on David Arthur and Sergei Vassilvitski k-means++ algorithm.
 * @param <T> type of the points to cluster
 * @see <a href="http://en.wikipedia.org/wiki/K-means%2B%2B">K-means++ (wikipedia)</a>
 * @version $Id: KMeansPlusPlusClusterer.java 1131229 2011-06-03 20:49:25Z luc $
 * @since 2.0
 */
public class KMeansPlusPlusClusterer<T extends Clusterable<T>> {

    /** Strategies to use for replacing an empty cluster. */
    public static enum EmptyClusterStrategy {

        /** Split the cluster with largest distance variance. */
        LARGEST_VARIANCE,

        /** Split the cluster with largest number of points. */
        LARGEST_POINTS_NUMBER,

        /** Create a cluster around the point farthest from its centroid. */
        FARTHEST_POINT,

        /** Generate an error. */
        ERROR

    }

    /** Random generator for choosing initial centers. */
    private final Random random;

    /** Selected strategy for empty clusters. */
    private final EmptyClusterStrategy emptyStrategy;

    /** Build a clusterer.
     * <p>
     * The default strategy for handling empty clusters that may appear during
     * algorithm iterations is to split the cluster with largest distance variance.
     * </p>
     * @param random random generator to use for choosing initial centers
     */
    public KMeansPlusPlusClusterer(final Random random) {
        this(random, EmptyClusterStrategy.LARGEST_VARIANCE);
    }

    /** Build a clusterer.
     * @param random random generator to use for choosing initial centers
     * @param emptyStrategy strategy to use for handling empty clusters that
     * may appear during algorithm iterations
     * @since 2.2
     */
    public KMeansPlusPlusClusterer(final Random random, final EmptyClusterStrategy emptyStrategy) {
        this.random        = random;
        this.emptyStrategy = emptyStrategy;
    }

    /**
     * Runs the K-means++ clustering algorithm.
     *
     * @param points the points to cluster
     * @param k the number of clusters to split the data into
     * @param maxIterations the maximum number of iterations to run the algorithm
     *     for.  If negative, no maximum will be used
     * @return a list of clusters containing the points
     * @throws MathIllegalArgumentException if the data points are null or the number
     *     of clusters is larger than the number of data points
     */
    public List<Cluster<T>> cluster(final Collection<T> points, final int k,
                                    final int maxIterations)
        throws MathIllegalArgumentException {

        // sanity checks
        MathUtils.checkNotNull(points);

        // number of clusters has to be smaller or equal the number of data points
        if (points.size() < k) {
            throw new NumberIsTooSmallException(points.size(), k, false);
        }

        // create the initial clusters
        List<Cluster<T>> clusters = chooseInitialCenters(points, k, random);

        // create an array containing the latest assignment of a point to a cluster
        // no need to initialize the array, as it will be filled with the first assignment
        int[] assignments = new int[points.size()];
        assignPointsToClusters(clusters, points, assignments);

        // iterate through updating the centers until we're done
        final int max = (maxIterations < 0) ? Integer.MAX_VALUE : maxIterations;
        for (int count = 0; count < max; count++) {
            boolean emptyCluster = false;
            List<Cluster<T>> newClusters = new ArrayList<Cluster<T>>();
            for (final Cluster<T> cluster : clusters) {
                final T newCenter;
                if (cluster.getPoints().isEmpty()) {
                    switch (emptyStrategy) {
                        case LARGEST_VARIANCE :
                            newCenter = getPointFromLargestVarianceCluster(clusters);
                            break;
                        case LARGEST_POINTS_NUMBER :
                            newCenter = getPointFromLargestNumberCluster(clusters);
                            break;
                        case FARTHEST_POINT :
                            newCenter = getFarthestPoint(clusters);
                            break;
                        default :
                            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
                    }
                    emptyCluster = true;
                } else {
                    newCenter = cluster.getCenter().centroidOf(cluster.getPoints());
                }
                newClusters.add(new Cluster<T>(newCenter));
            }
            int changes = assignPointsToClusters(newClusters, points, assignments);
            clusters = newClusters;

            // if there were no more changes in the point-to-cluster assignment
            // and there are no empty clusters left, return the current clusters
            if (changes == 0 && !emptyCluster) {
                return clusters;
            }
        }
        return clusters;
    }

    /**
     * Adds the given points to the closest {@link Cluster}.
     *
     * @param <T> type of the points to cluster
     * @param clusters the {@link Cluster}s to add the points to
     * @param points the points to add to the given {@link Cluster}s
     * @param assignments points assignments to clusters
     * @return the number of points assigned to different clusters as the iteration before
     */
    private static <T extends Clusterable<T>> int
        assignPointsToClusters(final List<Cluster<T>> clusters, final Collection<T> points,
                               final int[] assignments) {
        int assignedDifferently = 0;
        int pointIndex = 0;
        for (final T p : points) {
            int clusterIndex = getNearestCluster(clusters, p);
            if (clusterIndex != assignments[pointIndex]) {
                assignedDifferently++;
            }

            Cluster<T> cluster = clusters.get(clusterIndex);
            cluster.addPoint(p);
            assignments[pointIndex++] = clusterIndex;
        }

        return assignedDifferently;
    }

    /**
     * Use K-means++ to choose the initial centers.
     *
     * @param <T> type of the points to cluster
     * @param points the points to choose the initial centers from
     * @param k the number of centers to choose
     * @param random random generator to use
     * @return the initial centers
     */
    private static <T extends Clusterable<T>> List<Cluster<T>>
        chooseInitialCenters(final Collection<T> points, final int k, final Random random) {

        final List<T> pointSet = new ArrayList<T>(points);
        final List<Cluster<T>> resultSet = new ArrayList<Cluster<T>>();

        // Choose one center uniformly at random from among the data points.
        final T firstPoint = pointSet.remove(random.nextInt(pointSet.size()));
        resultSet.add(new Cluster<T>(firstPoint));

        final double[] dx2 = new double[pointSet.size()];
        while (resultSet.size() < k) {
            // For each data point x, compute D(x), the distance between x and
            // the nearest center that has already been chosen.
            double sum = 0;
            for (int i = 0; i < pointSet.size(); i++) {
                final T p = pointSet.get(i);
                int nearestClusterIndex = getNearestCluster(resultSet, p);
                final Cluster<T> nearest = resultSet.get(nearestClusterIndex);
                final double d = p.distanceFrom(nearest.getCenter());
                sum += d * d;
                dx2[i] = sum;
            }

            // Add one new data point as a center. Each point x is chosen with
            // probability proportional to D(x)2
            final double r = random.nextDouble() * sum;
            for (int i = 0 ; i < dx2.length; i++) {
                if (dx2[i] >= r) {
                    final T p = pointSet.remove(i);
                    resultSet.add(new Cluster<T>(p));
                    break;
                }
            }
        }

        return resultSet;

    }

    /**
     * Get a random point from the {@link Cluster} with the largest distance variance.
     *
     * @param clusters the {@link Cluster}s to search
     * @return a random point from the selected cluster
     */
    private T getPointFromLargestVarianceCluster(final Collection<Cluster<T>> clusters) {

        double maxVariance = Double.NEGATIVE_INFINITY;
        Cluster<T> selected = null;
        for (final Cluster<T> cluster : clusters) {
            if (!cluster.getPoints().isEmpty()) {

                // compute the distance variance of the current cluster
                final T center = cluster.getCenter();
                final Variance stat = new Variance();
                for (final T point : cluster.getPoints()) {
                    stat.increment(point.distanceFrom(center));
                }
                final double variance = stat.getResult();

                // select the cluster with the largest variance
                if (variance > maxVariance) {
                    maxVariance = variance;
                    selected = cluster;
                }

            }
        }

        // did we find at least one non-empty cluster ?
        if (selected == null) {
            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
        }

        // extract a random point from the cluster
        final List<T> selectedPoints = selected.getPoints();
        return selectedPoints.remove(random.nextInt(selectedPoints.size()));

    }

    /**
     * Get a random point from the {@link Cluster} with the largest number of points
     *
     * @param clusters the {@link Cluster}s to search
     * @return a random point from the selected cluster
     */
    private T getPointFromLargestNumberCluster(final Collection<Cluster<T>> clusters) {

        int maxNumber = 0;
        Cluster<T> selected = null;
        for (final Cluster<T> cluster : clusters) {

            // get the number of points of the current cluster
            final int number = cluster.getPoints().size();

            // select the cluster with the largest number of points
            if (number > maxNumber) {
                maxNumber = number;
                selected = cluster;
            }

        }

        // did we find at least one non-empty cluster ?
        if (selected == null) {
            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
        }

        // extract a random point from the cluster
        final List<T> selectedPoints = selected.getPoints();
        return selectedPoints.remove(random.nextInt(selectedPoints.size()));

    }

    /**
     * Get the point farthest to its cluster center
     *
     * @param clusters the {@link Cluster}s to search
     * @return point farthest to its cluster center
     */
    private T getFarthestPoint(final Collection<Cluster<T>> clusters) {

        double maxDistance = Double.NEGATIVE_INFINITY;
        Cluster<T> selectedCluster = null;
        int selectedPoint = -1;
        for (final Cluster<T> cluster : clusters) {

            // get the farthest point
            final T center = cluster.getCenter();
            final List<T> points = cluster.getPoints();
            for (int i = 0; i < points.size(); ++i) {
                final double distance = points.get(i).distanceFrom(center);
                if (distance > maxDistance) {
                    maxDistance     = distance;
                    selectedCluster = cluster;
                    selectedPoint   = i;
                }
            }

        }

        // did we find at least one non-empty cluster ?
        if (selectedCluster == null) {
            throw new ConvergenceException(LocalizedFormats.EMPTY_CLUSTER_IN_K_MEANS);
        }

        return selectedCluster.getPoints().remove(selectedPoint);

    }

    /**
     * Returns the nearest {@link Cluster} to the given point
     *
     * @param <T> type of the points to cluster
     * @param clusters the {@link Cluster}s to search
     * @param point the point to find the nearest {@link Cluster} for
     * @return the index of the nearest {@link Cluster} to the given point
     */
    private static <T extends Clusterable<T>> int
        getNearestCluster(final Collection<Cluster<T>> clusters, final T point) {
        double minDistance = Double.MAX_VALUE;
        int clusterIndex = 0;
        int minCluster = 0;
        for (final Cluster<T> c : clusters) {
            final double distance = point.distanceFrom(c.getCenter());
            if (distance < minDistance) {
                minDistance = distance;
                minCluster = clusterIndex;
            }
            clusterIndex++;
        }
        return minCluster;
    }

}