/* * Apache License * Version 2.0, January 2004 * http://www.apache.org/licenses/ * * Copyright 2013 Aurelian Tutuianu * Copyright 2014 Aurelian Tutuianu * Copyright 2015 Aurelian Tutuianu * Copyright 2016 Aurelian Tutuianu * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. * */ package rapaio.core.stat; import org.junit.Assert; import org.junit.Test; import rapaio.core.CoreTools; import rapaio.data.*; import rapaio.io.Csv; import rapaio.sys.WS; import java.io.IOException; import java.util.Arrays; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static rapaio.core.CoreTools.*; /** * @author <a href="mailto:padreati@yahoo.com">Aurelian Tutuianu</a> */ public class CoreToolsTest { private final Frame df; public CoreToolsTest() throws IOException { this.df = new Csv().withHeader(true).withDefaultTypes(VarType.NUMERIC).read(getClass(), "core_stat.csv"); } @Test public void testRReference() throws IOException { mean(df.var(0)).printSummary(); var(df.var(0)).printSummary(); assertEquals(Double.valueOf("999.98132402093892779"), mean(df.var(0)).value(), 1e-12); assertEquals(Double.valueOf("1.0012615815492349469"), Math.sqrt(Variance.from(df.var(0)).value()), 1e-12); Assert.assertEquals(996.343866540788, Minimum.from(df.var(0)).value(), 1e-12); Assert.assertEquals(1004.24956126934, Maximum.from(df.var(0)).value(), 1e-12); } @Test public void testEmptyMean() { Numeric num1 = Numeric.copy(Double.NaN, Double.NaN, Double.NaN); double mean = mean(num1).value(); assertTrue(Double.isNaN(mean)); Numeric num2 = Numeric.wrap(1, 2, 3, 4); StringBuilder sb = new StringBuilder(); sb.append(mean(num2).summary()); assertEquals("\n> mean[?]\n" + "total rows: 4 (complete: 4, missing: 0)\n" + "mean: 2.5\n", sb.toString()); sb = new StringBuilder(); sb.append(var(num2).summary()); assertEquals("\n> variance[?]\n" + "total rows: 4 (complete: 4, missing: 0)\n" + "variance: 1.6666667\n" + "sd: 1.2909944\n", sb.toString()); mean(num2).printSummary(); var(num2).printSummary(); } @Test public void testQuantiles() { Numeric v = Numeric.seq(0, 1, 0.001); Quantiles q1 = quantiles(v, Numeric.seq(0, 1, 0.001)); assertTrue(v.deepEquals(Numeric.wrap(q1.values()))); Numeric vEmpty = Numeric.empty(10); Numeric vOne = vEmpty.solidCopy(); vOne.setValue(3, 10); Quantiles q2 = quantiles(vEmpty, Numeric.seq(0, 1, 0.1)); Assert.assertEquals(11, q2.values().length); for (int i = 0; i < q2.values().length; i++) { Assert.assertTrue(Double.isNaN(q2.values()[i])); } Quantiles q3 = quantiles(vOne, Numeric.seq(0, 1, 0.1)); Assert.assertEquals(11, q3.values().length); for (int i = 0; i < q3.values().length; i++) { Assert.assertEquals(10, q3.values()[i], 1e-20); } Quantiles q4 = quantiles(v, Quantiles.Type.R8, Numeric.seq(0, 1, 0.1)); Arrays.stream(q4.values()).forEach(val -> WS.println(WS.formatLong(val))); Numeric v4 = Numeric.copy(0, 0.09946666666666674, 0.19960000000000017, 0.2997333333333336, 0.399866666666667, 0.5000000000000003, 0.6001333333333337, 0.7002666666666671, 0.8004000000000006, 0.900533333333334, 1.0000000000000007); q4.printSummary(); assertTrue(v4.deepEquals(Numeric.wrap(q4.values()))); } @Test public void testMode() { assertEquals("[a, b]", Arrays.deepToString(modes(Nominal.copy("a", "a", "b", "a", "b", "c", "b")).values())); assertEquals("[a]", Arrays.deepToString(modes(Nominal.copy("a")).values())); assertEquals("[a]", Arrays.deepToString(modes(Nominal.copy("a", "a", "a", "b", "c", "b")).values())); assertEquals("[a, c, b]", Arrays.deepToString(modes(Nominal.copy("a", "c", "b")).values())); assertEquals("[]", Arrays.deepToString(modes(Nominal.copy()).values())); } @Test public void testCovariance() { Numeric v1 = Numeric.seq(0, 200, 0.1); Numeric v2 = Numeric.wrap(1, 201, 0.1); assertEquals(cov(v1, v1).value(), var(v1).value(), 1e-12); Numeric x = Numeric.copy(1, 2, 3, 4); assertEquals(cov(x, x).value(), var(x).value(), 1e-12); Numeric norm = distNormal().sample(20_000); assertEquals(cov(norm, norm).value(), var(norm).value(), 1e-12); Var x1 = Numeric.seq(0, 200, 1); Var x2 = Numeric.seq(0, 50, 0.25); assertEquals(845.875, cov(x1, x2).value(), 1e-12); } @Test public void testGeometricMean() { assertEquals(4, GeometricMean.from(Numeric.copy(2, 8)).value(), 1e-20); assertEquals(0.5, GeometricMean.from(Numeric.copy(4, 1, 1 / 32.)).value(), 1e-16); assertEquals(42.42640687119286, GeometricMean.from(Numeric.copy(6, 50, 9, 1200)).value(), 1e-20); GeometricMean.from(Numeric.copy(6, 50, 9, 1200)).printSummary(); Assert.assertFalse(Double.NaN == GeometricMean.from(Numeric.copy(1, -1)).value()); GeometricMean.from(Numeric.wrap(1, -1)).printSummary(); } @Test public void testToolsOnNonNumeric() { Var idx1 = Index.wrap(1, 2, Integer.MIN_VALUE, 3, Integer.MIN_VALUE, 4, 5, 6, Integer.MIN_VALUE, 7); Var idx2 = Index.wrap(1, 2, 3, 4, 5, 6, 7); Assert.assertEquals(4, CoreTools.mean(idx1).value(), 1e-20); Assert.assertEquals(CoreTools.var(idx2).value(), CoreTools.var(idx1).value(), 1e-20); Assert.assertEquals(7, CoreTools.var(idx1).completeCount()); Assert.assertEquals(3, CoreTools.var(idx1).missingCount()); Var bin1 = Binary.copy(1, 0, 1, -1, 1, -1, 0); Var bin2 = Binary.copy(true, false, true, true, false); Assert.assertEquals(0.6, CoreTools.mean(bin1).value(), 1e-20); Assert.assertEquals(CoreTools.var(bin2).value(), CoreTools.var(bin1).value(), 1e-20); } }