/* * Joinery -- Data frames for Java * Copyright (c) 2014, 2015 IBM Corp. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package joinery; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; import java.io.IOException; import java.util.Arrays; import java.util.List; import java.util.Map; import joinery.DataFrame.Aggregate; import joinery.DataFrame.KeyFunction; import org.junit.Before; import org.junit.Test; public class DataFrameGroupByTest { private DataFrame<Object> df; @Before public final void setUp() throws IOException { df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("grouping.csv")); } @Test public final void testGroupBy() { final DataFrame<Object> df = new DataFrame<>(); df.add("name", Arrays.<Object>asList("one", "two", "three", "four", "one", "two")); df.add("value", Arrays.<Object>asList(1, 2, 3, 4, 5, 6)); final DataFrame<Object> grouped = df.groupBy(0).count(); assertEquals( "group by result has correct number of rows", 4, grouped.length() ); assertArrayEquals( "group by result has correct names", new Object[] { "one", "two", "three", "four" }, grouped.index().toArray() ); assertArrayEquals( "group by result has correct values", new Object[] { 2, 2, 1, 1 }, grouped.col(1).toArray() ); } @Test(expected=IllegalArgumentException.class) public final void testGroupByInvalid() { new DataFrame<String>() .add("name", Arrays.<String>asList("one", "two", "three", "four", "one", "two")) .add("value", Arrays.<String>asList("1", "2", "3", "4", "1", "6")) .groupBy(0) .sum(); } @Test public final void testGroupByMultiple() { final DataFrame<Object> df = new DataFrame<>(); df.add("name", Arrays.<Object>asList("one", "two", "three", "four", "one", "two")); df.add("category", Arrays.<Object>asList("alpha", "beta", "alpha", "beta", "alpha", "beta")); df.add("value", Arrays.<Object>asList(1, 2, 3, 4, 5, 6)); final Object[][] expected = new Object[][] { new Object[] { "alpha", "one", 2 }, new Object[] { "beta", "two", 2 }, new Object[] { "alpha", "three", 1 }, new Object[] { "beta", "four", 1 } }; for (int i = 0; i < expected.length; i++) { assertArrayEquals( "group by result has correct values", expected[i], df.groupBy("category", "name").count().row(i).toArray() ); } } @Test public void testGroups() { final Map<Object, DataFrame<Object>> groups = df.groupBy("b").explode(); assertArrayEquals( new Object[] { "alpha", "bravo", "one", "one", 10L, 20L, 10.0, 20.0 }, groups.get("one").toArray() ); assertArrayEquals( new Object[] { "charlie", "delta", "two", "two", 30L, 40L, 30.0, 40.0 }, groups.get("two").toArray() ); assertArrayEquals( new Object[] { "echo", "foxtrot", "golf", "three", "three", "three", 50L, 60L, 70L, 50.0, 60.0, 70.0 }, groups.get("three").toArray() ); } @Test public void testGroupApply() { assertArrayEquals( new Object[] { "one", "two", "three", 2, 2, 3, 2, 2, 3, 2, 2, 3 }, df.groupBy("b").aggregate(new Aggregate<Object, Object>() { @Override public Integer apply(final List<Object> value) { return value.size(); } }).toArray() ); } @Test public void testKeyFunction() { assertArrayEquals( new Object[] { 30.0, 70.0, 180.0, 30.0, 70.0, 180.0 }, df.groupBy(new KeyFunction<Object>() { @Override public Object apply(final List<Object> value) { return value.get(1); } }).sum().toArray() ); } }