/******************************************************************************* * Copyright (c) 2010 Haifeng Li * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ package smile.clustering; import org.junit.After; import org.junit.AfterClass; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import smile.data.AttributeDataset; import smile.data.NominalAttribute; import smile.data.parser.DelimitedTextParser; import smile.stat.distribution.MultivariateGaussianDistribution; import smile.validation.AdjustedRandIndex; import smile.validation.RandIndex; import static org.junit.Assert.*; /** * * @author Haifeng Li */ public class KMeansTest { double[] mu1 = {1.0, 1.0, 1.0}; double[][] sigma1 = {{1.0, 0.0, 0.0}, {0.0, 1.0, 0.0}, {0.0, 0.0, 1.0}}; double[] mu2 = {-2.0, -2.0, -2.0}; double[][] sigma2 = {{1.0, 0.3, 0.8}, {0.3, 1.0, 0.5}, {0.8, 0.5, 1.0}}; double[] mu3 = {4.0, 2.0, 3.0}; double[][] sigma3 = {{1.0, 0.8, 0.3}, {0.8, 1.0, 0.5}, {0.3, 0.5, 1.0}}; double[] mu4 = {3.0, 5.0, 1.0}; double[][] sigma4 = {{1.0, 0.5, 0.5}, {0.5, 1.0, 0.5}, {0.5, 0.5, 1.0}}; double[][] data = new double[100000][]; int[] label = new int[100000]; public KMeansTest() { MultivariateGaussianDistribution g1 = new MultivariateGaussianDistribution(mu1, sigma1); for (int i = 0; i < 20000; i++) { data[i] = g1.rand(); label[i] = 0; } MultivariateGaussianDistribution g2 = new MultivariateGaussianDistribution(mu2, sigma2); for (int i = 0; i < 30000; i++) { data[20000 + i] = g2.rand(); label[i] = 1; } MultivariateGaussianDistribution g3 = new MultivariateGaussianDistribution(mu3, sigma3); for (int i = 0; i < 30000; i++) { data[50000 + i] = g3.rand(); label[i] = 2; } MultivariateGaussianDistribution g4 = new MultivariateGaussianDistribution(mu4, sigma4); for (int i = 0; i < 20000; i++) { data[80000 + i] = g4.rand(); label[i] = 3; } } @BeforeClass public static void setUpClass() throws Exception { } @AfterClass public static void tearDownClass() throws Exception { } @Before public void setUp() { } @After public void tearDown() { } /** * Test of learn method, of class KMeans. */ @Test public void testBBD4() { System.out.println("BBD 4"); KMeans kmeans = new KMeans(data, 4, 100); AdjustedRandIndex ari = new AdjustedRandIndex(); RandIndex rand = new RandIndex(); double r = rand.measure(label, kmeans.getClusterLabel()); double r2 = ari.measure(label, kmeans.getClusterLabel()); System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2); } /** * Test of lloyd method, of class KMeans. */ @Test public void testLloyd4() { System.out.println("Lloyd 4"); KMeans kmeans = KMeans.lloyd(data, 4, 100); AdjustedRandIndex ari = new AdjustedRandIndex(); RandIndex rand = new RandIndex(); double r = rand.measure(label, kmeans.getClusterLabel()); double r2 = ari.measure(label, kmeans.getClusterLabel()); System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2); } /** * Test of learn method, of class KMeans. */ @Test public void testBBD64() { System.out.println("BBD 64"); KMeans kmeans = new KMeans(data, 64, 100); AdjustedRandIndex ari = new AdjustedRandIndex(); RandIndex rand = new RandIndex(); double r = rand.measure(label, kmeans.getClusterLabel()); double r2 = ari.measure(label, kmeans.getClusterLabel()); System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2); } /** * Test of lloyd method, of class KMeans. */ @Test public void testLloyd64() { System.out.println("Lloyd 64"); KMeans kmeans = KMeans.lloyd(data, 64, 100); AdjustedRandIndex ari = new AdjustedRandIndex(); RandIndex rand = new RandIndex(); double r = rand.measure(label, kmeans.getClusterLabel()); double r2 = ari.measure(label, kmeans.getClusterLabel()); System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2); } /** * Test of learn method, of class KMeans. */ @Test public void testUSPS() { System.out.println("USPS"); DelimitedTextParser parser = new DelimitedTextParser(); parser.setResponseIndex(new NominalAttribute("class"), 0); try { AttributeDataset train = parser.parse("USPS Train", smile.data.parser.IOUtils.getTestDataFile("usps/zip.train")); AttributeDataset test = parser.parse("USPS Test", smile.data.parser.IOUtils.getTestDataFile("usps/zip.test")); double[][] x = train.toArray(new double[train.size()][]); int[] y = train.toArray(new int[train.size()]); double[][] testx = test.toArray(new double[test.size()][]); int[] testy = test.toArray(new int[test.size()]); KMeans kmeans = new KMeans(x, 10, 100, 4); AdjustedRandIndex ari = new AdjustedRandIndex(); RandIndex rand = new RandIndex(); double r = rand.measure(y, kmeans.getClusterLabel()); double r2 = ari.measure(y, kmeans.getClusterLabel()); System.out.format("Training rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2); assertTrue(r > 0.85); assertTrue(r2 > 0.45); int[] p = new int[testx.length]; for (int i = 0; i < testx.length; i++) { p[i] = kmeans.predict(testx[i]); } r = rand.measure(testy, p); r2 = ari.measure(testy, p); System.out.format("Testing rand index = %.2f%%\tadjusted rand index = %.2f%%%n", 100.0 * r, 100.0 * r2); assertTrue(r > 0.85); assertTrue(r2 > 0.45); } catch (Exception ex) { System.err.println(ex); } } }