package com.linkedin.thirdeye.dataframe;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import org.testng.Assert;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
public class DataFrameTest {
final static byte TRUE = BooleanSeries.TRUE;
final static byte FALSE = BooleanSeries.FALSE;
final static double DNULL = DoubleSeries.NULL;
final static long LNULL = LongSeries.NULL;
final static String SNULL = StringSeries.NULL;
final static byte BNULL = BooleanSeries.NULL;
final static double COMPARE_DOUBLE_DELTA = 0.001;
final static long[] INDEX = new long[] { -1, 1, -2, 4, 3 };
final static double[] VALUES_DOUBLE = new double[] { -2.1, -0.1, 0.0, 0.5, 1.3 };
final static long[] VALUES_LONG = new long[] { -2, 1, 0, 1, 2 };
final static String[] VALUES_STRING = new String[] { "-2.3", "-1", "0.0", "0.5", "0.13e1" };
final static byte[] VALUES_BOOLEAN = new byte[] { 1, 1, 0, 1, 1 };
// TODO test double batch function
// TODO test string batch function
// TODO test boolean batch function
// TODO string test head, tail, accessors
// TODO boolean test head, tail, accessors
// TODO shift double, long, boolean
// TODO fill double, long, boolean
DataFrame df;
@BeforeMethod
public void before() {
df = new DataFrame(INDEX)
.addSeries("double", VALUES_DOUBLE)
.addSeries("long", VALUES_LONG)
.addSeries("string", VALUES_STRING)
.addSeries("boolean", VALUES_BOOLEAN);
}
@Test
public void testEnforceSeriesLengthPass() {
df.addSeries("series", VALUES_DOUBLE);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testEnforceSeriesLengthFail() {
df.addSeries("series", 0.1, 3.2);
}
@Test
public void testSeriesName() {
df.addSeries("ab", VALUES_DOUBLE);
df.addSeries("_a", VALUES_DOUBLE);
df.addSeries("a1", VALUES_DOUBLE);
}
@Test
public void testChainedEqualsSeparate() {
DataFrame dfChained = new DataFrame()
.addSeries("test", 1, 2, 3)
.addSeries("drop", 1, 2, 3)
.renameSeries("test", "checkme")
.dropSeries("drop");
DataFrame dfSeparate = new DataFrame();
dfSeparate.addSeries("test", 1, 2, 3);
dfSeparate.addSeries("drop", 1, 2, 3);
dfSeparate.renameSeries("test", "checkme");
dfSeparate.dropSeries("drop");
Assert.assertEquals(dfChained.getSeriesNames().size(), 1);
Assert.assertEquals(dfSeparate.getSeriesNames().size(), 1);
Assert.assertEquals(dfChained, dfSeparate);
}
@Test(expectedExceptions = IllegalArgumentException.class, dataProvider = "testSeriesNameFailProvider")
public void testSeriesNameFail(String name) {
df.addSeries(name, VALUES_DOUBLE);
}
@DataProvider(name = "testSeriesNameFailProvider")
public Object[][] testSeriesNameFailProvider() {
return new Object[][] { { null }, { "" }, { "1a" }, { "a,b" }, { "a-b" }, { "a+b" }, { "a*b" }, { "a/b" }, { "a=b" }, { "a>b" } };
}
@Test
public void testIndexColumn() {
DataFrame dfEmpty = new DataFrame();
Assert.assertTrue(dfEmpty.getSeriesNames().isEmpty());
DataFrame dfIndexRange = new DataFrame(0);
Assert.assertEquals(dfIndexRange.getSeriesNames(), Collections.singleton("index"));
}
@Test
public void testDoubleNoDataDuplication() {
DoubleSeries first = DataFrame.toSeries(VALUES_DOUBLE);
DoubleSeries second = DataFrame.toSeries(VALUES_DOUBLE);
Assert.assertSame(first.values(), second.values());
}
@Test
public void testDoubleToDouble() {
assertEquals(DataFrame.toSeries(VALUES_DOUBLE).getDoubles(), VALUES_DOUBLE);
}
@Test
public void testDoubleToLong() {
assertEquals(DataFrame.toSeries(VALUES_DOUBLE).getLongs(), -2, 0, 0, 0, 1);
}
@Test
public void testDoubleToBoolean() {
assertEquals(DataFrame.toSeries(VALUES_DOUBLE).getBooleans(), TRUE, TRUE, FALSE, TRUE, TRUE);
}
@Test
public void testDoubleToString() {
assertEquals(DataFrame.toSeries(VALUES_DOUBLE).getStrings(), "-2.1", "-0.1", "0.0", "0.5", "1.3");
}
@Test
public void testLongToDouble() {
assertEquals(DataFrame.toSeries(VALUES_LONG).getDoubles(), -2.0, 1.0, 0.0, 1.0, 2.0);
}
@Test
public void testLongToLong() {
assertEquals(DataFrame.toSeries(VALUES_LONG).getLongs(), VALUES_LONG);
}
@Test
public void testLongToBoolean() {
assertEquals(DataFrame.toSeries(VALUES_LONG).getBooleans(), TRUE, TRUE, FALSE, TRUE, TRUE);
}
@Test
public void testLongToString() {
assertEquals(DataFrame.toSeries(VALUES_LONG).getStrings(), "-2", "1", "0", "1", "2");
}
@Test
public void testBooleanToDouble() {
assertEquals(DataFrame.toSeries(VALUES_BOOLEAN).getDoubles(), 1.0, 1.0, 0.0, 1.0, 1.0);
}
@Test
public void testBooleanToLong() {
assertEquals(DataFrame.toSeries(VALUES_BOOLEAN).getLongs(), TRUE, TRUE, FALSE, TRUE, TRUE);
}
@Test
public void testBooleanToBoolean() {
assertEquals(DataFrame.toSeries(VALUES_BOOLEAN).getBooleans(), VALUES_BOOLEAN);
}
@Test
public void testBooleanToString() {
assertEquals(DataFrame.toSeries(VALUES_BOOLEAN).getStrings(), "true", "true", "false", "true", "true");
}
@Test
public void testStringToDouble() {
assertEquals(DataFrame.toSeries(VALUES_STRING).getDoubles(), -2.3, -1.0, 0.0, 0.5, 1.3);
}
@Test
public void testStringToDoubleNulls() {
Series s = DataFrame.toSeries("", null, "-2.1e1");
assertEquals(s.getDoubles(), DNULL, DNULL, -21.0d);
}
@Test
public void testStringToLong() {
// NOTE: transparent conversion via double
assertEquals(DataFrame.toSeries(VALUES_STRING).getLongs(), -2, -1, 0, 0, 1);
}
@Test
public void testStringToLongNulls() {
// NOTE: transparent conversion via double
Series s = DataFrame.toSeries("", null, "-1.0");
assertEquals(s.getLongs(), LNULL, LNULL, -1);
}
@Test
public void testStringToBoolean() {
// NOTE: transparent conversion via double
assertEquals(DataFrame.toSeries(VALUES_STRING).getBooleans(), TRUE, TRUE, FALSE, TRUE, TRUE);
}
@Test
public void testStringToBooleanNulls() {
// NOTE: transparent conversion via double
Series s = DataFrame.toSeries("", null, "true");
assertEquals(s.getBooleans(), BNULL, BNULL, TRUE);
}
@Test
public void testStringToString() {
assertEquals(DataFrame.toSeries(VALUES_STRING).getStrings(), VALUES_STRING);
}
@Test
public void testDoubleBuilderNull() {
assertEquals(DoubleSeries.builder().addValues((Double)null).build(), DNULL);
}
@Test
public void testLongBuilderNull() {
assertEquals(LongSeries.builder().addValues((Long)null).build(), LNULL);
}
@Test
public void testStringBuilderNull() {
assertEquals(StringSeries.builder().addValues((String)null).build(), SNULL);
}
@Test
public void testBooleanBuilderNull() {
assertEquals(BooleanSeries.builder().addValues((Byte)null).build(), BNULL);
}
@Test
public void testBooleanBuilderNullBoolean() {
assertEquals(BooleanSeries.builder().addBooleanValues((Boolean)null).build(), BNULL);
}
@Test
public void testDataFrameBuilderDynamicTyping() {
DataFrame.Builder builder = DataFrame.builder("double", "long", "string", "boolean");
builder.append(4.0d, 1, null, "true");
builder.append(null, 2, "2", "true");
builder.append(2.3d, "", "hi", "false");
builder.append(1.0d, 4, "4", "");
DataFrame df = builder.build();
Assert.assertEquals(df.get("double").type(), Series.SeriesType.DOUBLE);
Assert.assertEquals(df.get("long").type(), Series.SeriesType.LONG);
Assert.assertEquals(df.get("string").type(), Series.SeriesType.STRING);
Assert.assertEquals(df.get("boolean").type(), Series.SeriesType.BOOLEAN);
assertEquals(df.getDoubles("double"), 4, DNULL, 2.3, 1);
assertEquals(df.getLongs("long"), 1, 2, LNULL, 4);
assertEquals(df.getStrings("string"), SNULL, "2", "hi", "4");
assertEquals(df.getBooleans("boolean"),TRUE, TRUE, FALSE, BNULL);
}
@Test
public void testDataFrameBuilderStaticTyping() {
DataFrame.Builder builder = DataFrame.builder("double:DOUBLE", "long:LONG", "string:STRING", "boolean:BOOLEAN");
builder.append(4.0d, 1, null, "true");
builder.append(null, 2.34, "2", "1");
builder.append("2", "", "3", "false");
builder.append(1.0d, 4, "4", "");
DataFrame df = builder.build();
Assert.assertEquals(df.get("double").type(), Series.SeriesType.DOUBLE);
Assert.assertEquals(df.get("long").type(), Series.SeriesType.LONG);
Assert.assertEquals(df.get("string").type(), Series.SeriesType.STRING);
Assert.assertEquals(df.get("boolean").type(), Series.SeriesType.BOOLEAN);
assertEquals(df.getDoubles("double"), 4, DNULL, 2, 1);
assertEquals(df.getLongs("long"), 1, 2, LNULL, 4);
assertEquals(df.getStrings("string"), SNULL, "2", "3", "4");
assertEquals(df.getBooleans("boolean"),TRUE, TRUE, FALSE, BNULL);
}
@Test(expectedExceptions = NumberFormatException.class)
public void testDataFrameBuilderStaticTypingFailDouble() {
DataFrame.builder("double:DOUBLE").append("true").build();
}
@Test(expectedExceptions = NumberFormatException.class)
public void testDataFrameBuilderStaticTypingFailLong() {
DataFrame.builder("long:LONG").append("true").build();
}
@Test
public void testDoubleNull() {
Series s = DataFrame.toSeries(1.0, DNULL, 2.0);
assertEquals(s.getDoubles(), 1.0, DNULL, 2.0);
assertEquals(s.getLongs(), 1, LNULL, 2);
assertEquals(s.getBooleans(), TRUE, BNULL, TRUE);
assertEquals(s.getStrings(), "1.0", SNULL, "2.0");
}
@Test
public void testLongNull() {
Series s = DataFrame.toSeries(1, LNULL, 2);
assertEquals(s.getDoubles(), 1.0, DNULL, 2.0);
assertEquals(s.getLongs(), 1, LNULL, 2);
assertEquals(s.getBooleans(), TRUE, BNULL, TRUE);
assertEquals(s.getStrings(), "1", SNULL, "2");
}
@Test
public void testBooleanNull() {
Series s = DataFrame.toSeries(TRUE, BNULL, FALSE);
assertEquals(s.getDoubles(), 1.0, DNULL, 0.0);
assertEquals(s.getLongs(), 1, LNULL, 0);
assertEquals(s.getBooleans(), TRUE, BNULL, FALSE);
assertEquals(s.getStrings(), "true", SNULL, "false");
}
@Test
public void testStringNull() {
Series s = DataFrame.toSeries("1.0", SNULL, "2.0");
assertEquals(s.getDoubles(), 1.0, DNULL, 2.0);
assertEquals(s.getLongs(), 1, LNULL, 2);
assertEquals(s.getBooleans(), TRUE, BNULL, TRUE);
assertEquals(s.getStrings(), "1.0", SNULL, "2.0");
}
@Test
public void testDoubleInfinity() {
Series s = DataFrame.toSeries(DoubleSeries.POSITIVE_INFINITY, DoubleSeries.NEGATIVE_INFINITY);
assertEquals(s.getDoubles(), Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY);
assertEquals(s.getLongs(), LongSeries.MAX_VALUE, LongSeries.MIN_VALUE);
assertEquals(s.getBooleans(), BooleanSeries.TRUE, BooleanSeries.TRUE);
assertEquals(s.getStrings(), "Infinity", "-Infinity");
assertEquals(DataFrame.toSeries("Infinity", "-Infinity").getDoubles(),
Double.POSITIVE_INFINITY, Double.NEGATIVE_INFINITY);
}
@Test
public void testMapDoubleToDouble() {
DoubleSeries in = DataFrame.toSeries(VALUES_DOUBLE);
DoubleSeries out = in.map(new DoubleSeries.DoubleFunction() {
public double apply(double... values) {
return values[0] * 2;
}
});
assertEquals(out, -4.2, -0.2, 0.0, 1.0, 2.6);
}
@Test
public void testMapDoubleToBoolean() {
DoubleSeries in = DataFrame.toSeries(VALUES_DOUBLE);
BooleanSeries out = in.map(new DoubleSeries.DoubleConditional() {
public boolean apply(double... values) {
return values[0] <= 0.3;
}
});
assertEquals(out, TRUE, TRUE, TRUE, FALSE, FALSE);
}
@Test
public void testMapDataFrameAsDouble() {
DoubleSeries out = df.map(new Series.DoubleFunction() {
public double apply(double[] values) {
return values[0] + values[1] + values[2];
}
}, "long", "string", "boolean");
assertEquals(out, -3.3, 1.0, 0.0, 2.5, 4.3);
}
@Test
public void testOverrideWithGeneratedSeries() {
DoubleSeries out = df.getDoubles("double").map(new DoubleSeries.DoubleFunction() {
public double apply(double... values) {
return values[0] * 2;
}
});
df = df.addSeries("double", out);
assertEquals(df.getDoubles("double"), -4.2, -0.2, 0.0, 1.0, 2.6);
}
@Test
public void testSortDouble() {
DoubleSeries in = DataFrame.toSeries(3, 1.5, 1.3, 5, 1.9, DNULL);
assertEquals(in.sorted(), DNULL, 1.3, 1.5, 1.9, 3, 5);
}
@Test
public void testSortLong() {
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19, LNULL);
assertEquals(in.sorted(), LNULL, 3, 5, 13, 15, 19);
}
@Test
public void testSortString() {
StringSeries in = DataFrame.toSeries("b", "a", "ba", "ab", "aa", SNULL);
assertEquals(in.sorted(), SNULL, "a", "aa", "ab", "b", "ba");
}
@Test
public void testSortBoolean() {
BooleanSeries in = DataFrame.toSeries(TRUE, FALSE, FALSE, TRUE, FALSE, BNULL);
assertEquals(in.sorted(), BNULL, FALSE, FALSE, FALSE, TRUE, TRUE);
}
@Test
public void testProject() {
int[] fromIndex = new int[] { 1, -1, 4, 0 };
DataFrame ndf = df.project(fromIndex);
assertEquals(ndf.getLongs("index"), 1, LNULL, 3, -1);
assertEquals(ndf.getDoubles("double"), -0.1, DNULL, 1.3, -2.1);
assertEquals(ndf.getLongs("long"), 1, LNULL, 2, -2);
assertEquals(ndf.getStrings("string"), "-1", SNULL, "0.13e1", "-2.3");
assertEquals(ndf.getBooleans("boolean"), TRUE, BNULL, TRUE, TRUE);
}
@Test
public void testSortByIndex() {
df = df.sortedBy("index");
// NOTE: internal logic uses reorder() for all sorting
assertEquals(df.getLongs("index"), -2, -1, 1, 3, 4);
assertEquals(df.getDoubles("double"), 0.0, -2.1, -0.1, 1.3, 0.5);
assertEquals(df.getLongs("long"), 0, -2, 1, 2, 1);
assertEquals(df.getStrings("string"), "0.0", "-2.3", "-1", "0.13e1", "0.5");
assertEquals(df.getBooleans("boolean"), FALSE, TRUE, TRUE, TRUE, TRUE);
}
@Test
public void testSortByDouble() {
df = df.addSeries("myseries", 0.1, -2.1, 3.3, 4.6, -7.8 );
df = df.sortedBy("myseries");
assertEquals(df.getLongs("index"), 3, 1, -1, -2, 4);
assertEquals(df.getLongs("long"), 2, 1, -2, 0, 1);
}
@Test
public void testSortByLong() {
df = df.addSeries("myseries", 1, -21, 33, 46, -78 );
df = df.sortedBy("myseries");
assertEquals(df.getLongs("index"), 3, 1, -1, -2, 4);
assertEquals(df.getLongs("long"), 2, 1, -2, 0, 1);
}
@Test
public void testSortByString() {
df = df.addSeries("myseries", "b", "aa", "bb", "c", "a" );
df = df.sortedBy("myseries");
assertEquals(df.getLongs("index"), 3, 1, -1, -2, 4);
assertEquals(df.getLongs("long"), 2, 1, -2, 0, 1);
}
@Test
public void testSortByBoolean() {
// NOTE: boolean sorted should be stable
df = df.addSeries("myseries", true, true, false, false, true );
df = df.sortedBy("myseries");
assertEquals(df.getLongs("index"), -2, 4, -1, 1, 3);
assertEquals(df.getLongs("long"), 0, 1, -2, 1, 2);
}
@Test
public void testReverse() {
// NOTE: uses separate reverse() implementation by each series
df = df.reverse();
assertEquals(df.getLongs("index"), 3, 4, -2, 1, -1);
assertEquals(df.getDoubles("double"), 1.3, 0.5, 0.0, -0.1, -2.1);
assertEquals(df.getLongs("long"), 2, 1, 0, 1, -2);
assertEquals(df.getStrings("string"), "0.13e1", "0.5", "0.0", "-1", "-2.3");
assertEquals(df.getBooleans("boolean"), TRUE, TRUE, FALSE, TRUE, TRUE);
}
@Test
public void testAppendLongDouble() {
Series s = df.get("long").append(df.get("double"));
Assert.assertEquals(s.type(), Series.SeriesType.LONG);
assertEquals(s.getLongs(), -2, 1, 0, 1, 2, -2, 0, 0, 0, 1);
}
@Test
public void testAppendLongBoolean() {
Series s = df.get("long").append(df.get("boolean"));
Assert.assertEquals(s.type(), Series.SeriesType.LONG);
assertEquals(s.getLongs(), -2, 1, 0, 1, 2, 1, 1, 0, 1, 1);
}
@Test
public void testAppendLongString() {
Series s = df.get("long").append(df.get("string"));
Assert.assertEquals(s.type(), Series.SeriesType.LONG);
assertEquals(s.getLongs(), -2, 1, 0, 1, 2, -2, -1, 0, 0, 1);
}
@Test
public void testLongGroupByIntervalEmpty() {
Assert.assertTrue(DataFrame.toSeries(new long[0]).groupByInterval(1).isEmpty());
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testLongGroupByIntervalFailZero() {
DataFrame.toSeries(-1).groupByInterval(0);
}
@Test
public void testLongGroupByInterval() {
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19, 20);
Series.SeriesGrouping grouping = in.groupByInterval(4);
Assert.assertEquals(grouping.size(), 6);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 3 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] {});
Assert.assertEquals(grouping.buckets.get(3).fromIndex, new int[] { 1, 2 });
Assert.assertEquals(grouping.buckets.get(4).fromIndex, new int[] { 4 });
Assert.assertEquals(grouping.buckets.get(5).fromIndex, new int[] { 5 });
}
@Test
public void testLongGroupByCountEmpty() {
Assert.assertTrue(DataFrame.toSeries(new long[0]).groupByCount(1).isEmpty());
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testLongGroupByCountFailZero() {
DataFrame.toSeries(-1).groupByCount(0);
}
@Test
public void testLongGroupByCountAligned() {
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19, 20);
Series.SeriesGrouping grouping = in.groupByCount(3);
Assert.assertEquals(grouping.size(), 2);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1, 2 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 3, 4, 5 });
}
@Test
public void testLongBucketsByCountUnaligned() {
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19, 11, 12, 9);
Series.SeriesGrouping grouping = in.groupByCount(3);
Assert.assertEquals(grouping.size(), 3);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1, 2 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 3, 4, 5 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] { 6, 7 });
}
@Test
public void testLongGroupByPartitionsEmpty() {
Assert.assertTrue(DataFrame.toSeries(new long[0]).groupByPartitions(1).isEmpty());
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testLongGroupByPartitionsFailZero() {
DataFrame.toSeries(-1).groupByPartitions(0);
}
@Test
public void testLongGroupByPartitionsAligned() {
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19, 20, 5, 5, 8, 1);
Series.SeriesGrouping grouping = in.groupByPartitions(5);
Assert.assertEquals(grouping.size(), 5);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 2, 3 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] { 4, 5 });
Assert.assertEquals(grouping.buckets.get(3).fromIndex, new int[] { 6, 7 });
Assert.assertEquals(grouping.buckets.get(4).fromIndex, new int[] { 8, 9 });
}
@Test
public void testLongGroupByPartitionsUnaligned() {
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19, 20, 5, 5, 8, 1);
Series.SeriesGrouping grouping = in.groupByPartitions(3);
Assert.assertEquals(grouping.size(), 3);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1, 2 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 3, 4, 5, 6 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] { 7, 8, 9 });
}
@Test
public void testLongGroupByPartitionsUnalignedSmall() {
LongSeries in = DataFrame.toSeries(3, 15, 1);
Series.SeriesGrouping grouping = in.groupByPartitions(7);
Assert.assertEquals(grouping.size(), 7);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] {});
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 0 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] {});
Assert.assertEquals(grouping.buckets.get(3).fromIndex, new int[] { 1 });
Assert.assertEquals(grouping.buckets.get(4).fromIndex, new int[] {});
Assert.assertEquals(grouping.buckets.get(5).fromIndex, new int[] { 2 });
Assert.assertEquals(grouping.buckets.get(6).fromIndex, new int[] {});
}
@Test
public void testLongGroupByValueEmpty() {
Assert.assertTrue(DataFrame.toSeries(new long[0]).groupByValue().isEmpty());
}
@Test
public void testLongGroupByValue() {
LongSeries in = DataFrame.toSeries(3, 4, 5, 5, 3, 1, 5);
Series.SeriesGrouping grouping = in.groupByValue();
Assert.assertEquals(grouping.size(), 4);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 5 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 0, 4 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] { 1 });
Assert.assertEquals(grouping.buckets.get(3).fromIndex, new int[] { 2, 3, 6 });
}
@Test
public void testLongGroupByMovingWindow() {
LongSeries in = DataFrame.toSeries(3, 4, 5, 5, 3, 1, 5);
Series.SeriesGrouping grouping = in.groupByMovingWindow(3);
Assert.assertEquals(grouping.size(), 5);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1, 2 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 1, 2, 3 });
Assert.assertEquals(grouping.buckets.get(2).fromIndex, new int[] { 2, 3, 4 });
Assert.assertEquals(grouping.buckets.get(3).fromIndex, new int[] { 3, 4, 5 });
Assert.assertEquals(grouping.buckets.get(4).fromIndex, new int[] { 4, 5, 6 });
}
@Test
public void testLongGroupByMovingWindowTooLarge() {
LongSeries in = DataFrame.toSeries(3, 4, 5, 5, 3, 1, 5);
Series.SeriesGrouping grouping = in.groupByMovingWindow(8);
Assert.assertEquals(grouping.size(), 0);
}
@Test
public void testLongGroupByMovingWindowAggregation() {
LongSeries in = DataFrame.toSeries(3, 4, 5, 5, 3, 1, 5);
Series.SeriesGrouping grouping = in.groupByMovingWindow(3);
DataFrame out = grouping.aggregate(LongSeries.SUM);
Assert.assertEquals(out.size(), 5);
assertEquals(out.getLongs(Series.GROUP_KEY), 0, 1, 2, 3, 4);
assertEquals(out.getLongs(Series.GROUP_VALUE), 12, 14, 13, 9, 9);
}
@Test
public void testBooleanGroupByValueEmpty() {
Assert.assertTrue(DataFrame.toSeries(new boolean[0]).groupByValue().isEmpty());
}
@Test
public void testBooleanGroupByValue() {
BooleanSeries in = DataFrame.toSeries(true, false, false, true, false, true, false);
Series.SeriesGrouping grouping = in.groupByValue();
Assert.assertEquals(grouping.size(), 2);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 1, 2, 4, 6 });
Assert.assertEquals(grouping.buckets.get(1).fromIndex, new int[] { 0, 3, 5 });
}
@Test
public void testBooleanGroupByValueTrueOnly() {
BooleanSeries in = DataFrame.toSeries(true, true, true);
Series.SeriesGrouping grouping = in.groupByValue();
Assert.assertEquals(grouping.size(), 1);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1, 2 });
}
@Test
public void testBooleanGroupByValueFalseOnly() {
BooleanSeries in = DataFrame.toSeries(false, false, false);
Series.SeriesGrouping grouping = in.groupByValue();
Assert.assertEquals(grouping.size(), 1);
Assert.assertEquals(grouping.buckets.get(0).fromIndex, new int[] { 0, 1, 2 });
}
@Test
public void testLongAggregateSum() {
Series keys = DataFrame.toSeries(3, 5, 7);
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19 );
List<Series.Bucket> buckets = new ArrayList<>();
buckets.add(new Series.Bucket(new int[] { 1, 3, 4 }));
buckets.add(new Series.Bucket(new int[] {}));
buckets.add(new Series.Bucket(new int[] { 0, 2 }));
Series.SeriesGrouping grouping = new Series.SeriesGrouping(keys, in, buckets);
DataFrame out = grouping.aggregate(new LongSeries.LongSum());
assertEquals(out.getLongs("key"), 3, 5, 7);
assertEquals(out.getLongs("value"), 39, LNULL, 16);
}
@Test
public void testLongAggregateLast() {
Series keys = DataFrame.toSeries(3, 5, 7);
LongSeries in = DataFrame.toSeries(3, 15, 13, 5, 19 );
List<Series.Bucket> buckets = new ArrayList<>();
buckets.add(new Series.Bucket(new int[] { 1, 3, 4 }));
buckets.add(new Series.Bucket(new int[] {}));
buckets.add(new Series.Bucket(new int[] { 0, 2 }));
Series.SeriesGrouping grouping = new Series.SeriesGrouping(keys, in, buckets);
DataFrame out = grouping.aggregate(new LongSeries.LongLast());
assertEquals(out.getLongs("key"), 3, 5, 7);
assertEquals(out.getLongs("value"), 19, LNULL, 13);
}
@Test
public void testLongGroupByAggregateEndToEnd() {
LongSeries in = DataFrame.toSeries(0, 3, 12, 2, 4, 8, 5, 1, 7, 9, 6, 10, 11);
Series.SeriesGrouping grouping = in.groupByInterval(4);
Assert.assertEquals(grouping.size(), 4);
DataFrame out = grouping.aggregate(new LongSeries.LongSum());
assertEquals(out.getLongs("key"), 0, 4, 8, 12);
assertEquals(out.getLongs("value"), 6, 22, 38, 12);
}
@Test
public void testAggregateWithoutData() {
DoubleSeries s = DataFrame.toSeries(new double[0]);
Assert.assertEquals(s.sum(), DNULL);
}
@Test
public void testDoubleAggregateWithNull() {
DoubleSeries s = DataFrame.toSeries(1.0, 2.0, DNULL, 4.0);
Assert.assertEquals(s.sum(), DNULL);
Assert.assertEquals(s.fillNull().sum(), 7.0);
Assert.assertEquals(s.dropNull().sum(), 7.0);
}
@Test
public void testLongAggregateWithNull() {
LongSeries s = DataFrame.toSeries(1, 2, LNULL, 4);
Assert.assertEquals(s.sum(), LNULL);
Assert.assertEquals(s.fillNull().sum(), 7);
Assert.assertEquals(s.dropNull().sum(), 7);
}
@Test
public void testStringAggregateWithNull() {
StringSeries s = DataFrame.toSeries("a", "b", SNULL, "d");
Assert.assertEquals(s.join(), SNULL);
Assert.assertEquals(s.fillNull().join(), "abd");
Assert.assertEquals(s.dropNull().join(), "abd");
}
@Test
public void testBooleanAggregateWithNull() {
BooleanSeries s = DataFrame.toSeries(TRUE, FALSE, BNULL, TRUE);
Assert.assertEquals(s.aggregate(BooleanSeries.HAS_TRUE).value(), BNULL);
Assert.assertEquals(s.fillNull().aggregate(BooleanSeries.HAS_TRUE).value(), 1);
Assert.assertEquals(s.dropNull().aggregate(BooleanSeries.HAS_TRUE).value(), 1);
}
@Test
public void testDataFrameGroupBy() {
DataFrame.DataFrameGrouping grouping = df.groupBy("boolean");
DoubleSeries ds = grouping.aggregate("double", new DoubleSeries.DoubleSum()).getDoubles("double");
assertEquals(ds, 0.0, -0.4);
LongSeries ls = grouping.aggregate("long", new LongSeries.LongSum()).get("long").getLongs();
assertEquals(ls, 0, 2);
StringSeries ss = grouping.aggregate("string", new StringSeries.StringConcat("|")).get("string").getStrings();
assertEquals(ss, "0.0", "-2.3|-1|0.5|0.13e1");
}
@Test
public void testResampleEndToEnd() {
df = df.resampledBy("index", 2, new DataFrame.ResampleLast());
Assert.assertEquals(df.size(), 4);
Assert.assertEquals(df.getSeriesNames().size(), 5);
assertEquals(df.getLongs("index"), -2, 0, 2, 4);
assertEquals(df.getDoubles("double"), -2.1, -0.1, 1.3, 0.5);
assertEquals(df.getLongs("long"), -2, 1, 2, 1);
assertEquals(df.getStrings("string"), "-2.3", "-1", "0.13e1", "0.5");
assertEquals(df.getBooleans("boolean"), TRUE, TRUE, TRUE, TRUE);
}
@Test
public void testStableMultiSortDoubleLong() {
DataFrame mydf = new DataFrame(new long[] { 1, 2, 3, 4, 5, 6, 7, 8 })
.addSeries("double", 1.0, 1.0, 2.0, 2.0, 1.0, 1.0, 2.0, 2.0)
.addSeries("long", 2, 2, 2, 2, 1, 1, 1, 1);
DataFrame sdfa = mydf.sortedBy("double", "long");
assertEquals(sdfa.getLongs("index"), 5, 6, 1, 2, 7, 8, 3, 4);
DataFrame sdfb = mydf.sortedBy("long", "double");
assertEquals(sdfb.getLongs("index"), 3, 4, 7, 8, 1, 2, 5, 6);
}
@Test
public void testStableMultiSortStringBoolean() {
DataFrame mydf = new DataFrame(new long[] { 1, 2, 3, 4, 5, 6, 7, 8 })
.addSeries("string", "a", "a", "b", "b", "a", "a", "b", "b")
.addSeries("boolean", true, true, true, true, false, false, false, false);
DataFrame sdfa = mydf.sortedBy("string", "boolean");
assertEquals(sdfa.getLongs("index"), 5, 6, 1, 2, 7, 8, 3, 4);
DataFrame sdfb = mydf.sortedBy("boolean", "string");
assertEquals(sdfb.getLongs("index"), 3, 4, 7, 8, 1, 2, 5, 6);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testFilterUnequalLengthFail() {
df.filter(DataFrame.toSeries(false, true));
}
@Test
public void testFilter() {
df = df.filter(DataFrame.toSeries(true, false, true, true, false));
Assert.assertEquals(df.size(), 5);
df = df.dropNull();
Assert.assertEquals(df.size(), 3);
assertEquals(df.getLongs("index"),-1, -2, 4);
assertEquals(df.getDoubles("double"), -2.1, 0.0, 0.5);
assertEquals(df.getLongs("long"), -2, 0, 1);
assertEquals(df.getStrings("string"),"-2.3", "0.0", "0.5");
assertEquals(df.getBooleans("boolean"), TRUE, FALSE, TRUE);
}
@Test
public void testFilterAll() {
df = df.filter(DataFrame.toSeries(true, true, true, true, true));
Assert.assertEquals(df.size(), 5);
Assert.assertEquals(df.dropNull().size(), 5);
}
@Test
public void testFilterNone() {
df = df.filter(DataFrame.toSeries(false, false, false, false, false));
Assert.assertEquals(df.size(), 5);
Assert.assertEquals(df.dropNull().size(), 0);
}
@Test
public void testFilterNull() {
df = df.filter(DataFrame.toSeries(BNULL, FALSE, TRUE, BNULL, FALSE));
Assert.assertEquals(df.size(), 5);
Assert.assertEquals(df.dropNull().size(), 1);
}
@Test
public void testRenameSeries() {
df = df.renameSeries("double", "new");
df.getDoubles("new");
try {
df.getDoubles("double");
Assert.fail();
} catch(IllegalArgumentException e) {
// left blank
}
}
@Test
public void testRenameSeriesOverride() {
df = df.renameSeries("double", "long");
assertEquals(df.getDoubles("long"), VALUES_DOUBLE);
}
@Test
public void testContains() {
Assert.assertTrue(df.contains("double"));
Assert.assertFalse(df.contains("NOT_VALID"));
}
@Test
public void testCopy() {
DataFrame ndf = df.copy();
ndf.getDoubles("double").values()[0] = 100.0;
Assert.assertNotEquals(df.getDoubles("double").first(), ndf.getDoubles("double").first());
ndf.getLongs("long").values()[0] = 100;
Assert.assertNotEquals(df.getLongs("long").first(), ndf.getLongs("long").first());
ndf.getStrings("string").values()[0] = "other string";
Assert.assertNotEquals(df.getStrings("string").first(), ndf.getStrings("string").first());
ndf.getBooleans("boolean").values()[0] = 0;
Assert.assertNotEquals(df.getBooleans("boolean").first(), ndf.getBooleans("boolean").first());
}
@Test
public void testDoubleHead() {
DoubleSeries s = DataFrame.toSeries(VALUES_DOUBLE);
assertEquals(s.head(0), new double[0]);
assertEquals(s.head(3), Arrays.copyOfRange(VALUES_DOUBLE, 0, 3));
assertEquals(s.head(6), Arrays.copyOfRange(VALUES_DOUBLE, 0, 5));
}
@Test
public void testDoubleTail() {
DoubleSeries s = DataFrame.toSeries(VALUES_DOUBLE);
assertEquals(s.tail(0), new double[0]);
assertEquals(s.tail(3), Arrays.copyOfRange(VALUES_DOUBLE, 2, 5));
assertEquals(s.tail(6), Arrays.copyOfRange(VALUES_DOUBLE, 0, 5));
}
@Test
public void testDoubleAccessorsEmpty() {
DoubleSeries s = DoubleSeries.empty();
Assert.assertTrue(DoubleSeries.isNull(s.sum()));
Assert.assertTrue(DoubleSeries.isNull(s.min()));
Assert.assertTrue(DoubleSeries.isNull(s.max()));
Assert.assertTrue(DoubleSeries.isNull(s.mean()));
Assert.assertTrue(DoubleSeries.isNull(s.std()));
try {
s.first();
Assert.fail();
} catch(IllegalStateException expected) {
// left blank
}
try {
s.last();
Assert.fail();
} catch(IllegalStateException expected) {
// left blank
}
try {
s.value();
Assert.fail();
} catch(IllegalStateException expected) {
// left blank
}
}
@Test
public void testLongHead() {
LongSeries s = DataFrame.toSeries(VALUES_LONG);
assertEquals(s.head(0), new long[0]);
assertEquals(s.head(3), Arrays.copyOfRange(VALUES_LONG, 0, 3));
assertEquals(s.head(6), Arrays.copyOfRange(VALUES_LONG, 0, 5));
}
@Test
public void testLongTail() {
LongSeries s = DataFrame.toSeries(VALUES_LONG);
assertEquals(s.tail(0), new long[0]);
assertEquals(s.tail(3), Arrays.copyOfRange(VALUES_LONG, 2, 5));
assertEquals(s.tail(6), Arrays.copyOfRange(VALUES_LONG, 0, 5));
}
@Test
public void testLongAccessorsEmpty() {
LongSeries s = LongSeries.empty();
Assert.assertTrue(LongSeries.isNull(s.sum()));
Assert.assertTrue(LongSeries.isNull(s.min()));
Assert.assertTrue(LongSeries.isNull(s.max()));
try {
s.first();
Assert.fail();
} catch(IllegalStateException expected) {
// left blank
}
try {
s.last();
Assert.fail();
} catch(IllegalStateException expected) {
// left blank
}
try {
s.value();
Assert.fail();
} catch(IllegalStateException expected) {
// left blank
}
}
@Test
public void testLongUnique() {
LongSeries s1 = DataFrame.toSeries(new long[0]);
assertEquals(s1.unique(), new long[0]);
LongSeries s2 = DataFrame.toSeries(4, 5, 2, 1);
assertEquals(s2.unique(), 1, 2, 4, 5);
LongSeries s3 = DataFrame.toSeries(9, 1, 2, 3, 6, 1, 2, 9, 2, 7);
assertEquals(s3.unique(), 1, 2, 3, 6, 7, 9);
}
@Test
public void testDoubleUnique() {
DoubleSeries s1 = DataFrame.toSeries(new double[] {});
assertEquals(s1.unique(), new double[0]);
DoubleSeries s2 = DataFrame.toSeries(4.1, 5.2, 2.3, 1.4);
assertEquals(s2.unique(), 1.4, 2.3, 4.1, 5.2);
DoubleSeries s3 = DataFrame.toSeries(9.0, 1.1, 2.2, 3.0, 6.0, 1.1, 2.3, 9.0, 2.3, 7.0);
assertEquals(s3.unique(), 1.1, 2.2, 2.3, 3.0, 6.0, 7.0, 9.0);
}
@Test
public void testStringUnique() {
StringSeries s1 = DataFrame.toSeries(new String[] {});
assertEquals(s1.unique(), new String[0]);
StringSeries s2 = DataFrame.toSeries("a", "A", "b", "Cc");
Assert.assertEquals(new HashSet<>(s2.unique().toList()), new HashSet<>(Arrays.asList("a", "A", "b", "Cc")));
StringSeries s3 = DataFrame.toSeries("a", "A", "b", "Cc", "A", "cC", "a", "cC");
Assert.assertEquals(new HashSet<>(s3.unique().toList()), new HashSet<>(Arrays.asList("a", "A", "b", "Cc", "cC")));
}
@Test
public void testStringFillNull() {
StringSeries s = DataFrame.toSeries("a", SNULL, SNULL, "b", SNULL);
assertEquals(s.fillNull("N"), "a", "N", "N", "b", "N");
}
@Test
public void testStringShift() {
StringSeries s1 = DataFrame.toSeries(VALUES_STRING);
assertEquals(s1.shift(0), VALUES_STRING);
StringSeries s2 = DataFrame.toSeries(VALUES_STRING);
assertEquals(s2.shift(2), SNULL, SNULL, "-2.3", "-1", "0.0");
StringSeries s3 = DataFrame.toSeries(VALUES_STRING);
assertEquals(s3.shift(4), SNULL, SNULL, SNULL, SNULL, "-2.3");
StringSeries s4 = DataFrame.toSeries(VALUES_STRING);
assertEquals(s4.shift(-4), "0.13e1", SNULL, SNULL, SNULL, SNULL);
StringSeries s5 = DataFrame.toSeries(VALUES_STRING);
assertEquals(s5.shift(100), SNULL, SNULL, SNULL, SNULL, SNULL);
StringSeries s6 = DataFrame.toSeries(VALUES_STRING);
assertEquals(s6.shift(-100), SNULL, SNULL, SNULL, SNULL, SNULL);
}
@Test
public void testDoubleMapNullConditional() {
DoubleSeries in = DataFrame.toSeries(1.0, DNULL, 2.0);
BooleanSeries out = in.map(new Series.DoubleConditional() {
@Override
public boolean apply(double... values) {
return true;
}
});
assertEquals(out, TRUE, BNULL, TRUE);
}
@Test
public void testLongMapNullConditional() {
LongSeries in = DataFrame.toSeries(1, LNULL, 2);
BooleanSeries out = in.map(new Series.LongConditional() {
@Override
public boolean apply(long... values) {
return true;
}
});
assertEquals(out, TRUE, BNULL, TRUE);
}
@Test
public void testStringMapNullConditional() {
StringSeries in = DataFrame.toSeries("1.0", SNULL, "2.0");
BooleanSeries out = in.map(new Series.StringConditional() {
@Override
public boolean apply(String... values) {
return true;
}
});
assertEquals(out, TRUE, BNULL, TRUE);
}
@Test
public void testDoubleMapNullFunction() {
DoubleSeries in = DataFrame.toSeries(1.0, DNULL, 2.0);
DoubleSeries out = in.map(new DoubleSeries.DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] + 1.0;
}
});
assertEquals(out, 2.0, DNULL, 3.0);
}
@Test
public void testLongMapNullFunction() {
LongSeries in = DataFrame.toSeries(1, LNULL, 2);
LongSeries out = in.map(new LongSeries.LongFunction() {
@Override
public long apply(long... values) {
return values[0] + 1;
}
});
assertEquals(out, 2, LNULL, 3);
}
@Test
public void testStringMapNullFunction() {
StringSeries in = DataFrame.toSeries("1.0", SNULL, "2.0");
StringSeries out = in.map(new StringSeries.StringFunction() {
@Override
public String apply(String... values) {
return values[0] + "+";
}
});
assertEquals(out, "1.0+", SNULL, "2.0+");
}
@Test
public void testDropNullRows() {
DataFrame mdf = new DataFrame(new long[] { 1, 2, 3, 4, 5, 6 })
.addSeries("double", 1.0, 2.0, DNULL, 4.0, 5.0, 6.0)
.addSeries("long", LNULL, 2, 3, 4, 5, 6)
.addSeries("string", "1.0", "2", "bbb", "true", SNULL, "aaa")
.addSeries("boolean", true, true, false, false, false, false);
DataFrame ddf = mdf.dropNull();
Assert.assertEquals(ddf.size(), 3);
assertEquals(ddf.getLongs("index"), 2, 4, 6);
assertEquals(ddf.getDoubles("double"), 2.0, 4.0, 6.0);
assertEquals(ddf.getLongs("long"), 2, 4, 6);
assertEquals(ddf.getStrings("string"), "2", "true", "aaa");
assertEquals(ddf.getBooleans("boolean"), TRUE, FALSE, FALSE);
}
@Test
public void testDropNullRowsIdentity() {
Assert.assertEquals(df.dropNull().size(), df.size());
}
@Test
public void testDropNullColumns() {
DataFrame mdf = new DataFrame()
.addSeries("double_null", 1.0, 2.0, DNULL)
.addSeries("double", 1.0, 2.0, 3.0)
.addSeries("long_null", LNULL, 2, 3)
.addSeries("long", 1, 2, 3)
.addSeries("string_null", "true", SNULL, "aaa")
.addSeries("string", "true", "this", "aaa")
.addSeries("boolean", true, true, false);
DataFrame ddf = mdf.dropNullColumns();
Assert.assertEquals(ddf.size(), 3);
Assert.assertEquals(new HashSet<>(ddf.getSeriesNames()), new HashSet<>(Arrays.asList("double", "long", "string", "boolean")));
}
@Test
public void testMapExpression() {
DoubleSeries s = df.map("(double * 2 + long + boolean) / 2");
assertEquals(s, -2.6, 0.9, 0.0, 1.5, 2.8);
}
@Test
public void testMapExpressionNull() {
DataFrame mdf = new DataFrame(VALUES_LONG)
.addSeries("null", 1.0, 1.0, DNULL, 1.0, 1.0);
DoubleSeries out = mdf.map("null + 1");
assertEquals(out, 2.0, 2.0, DNULL, 2.0, 2.0);
}
@Test
public void testMapExpressionOtherNullPass() {
DataFrame mdf = new DataFrame(VALUES_LONG)
.addSeries("null", 1.0, 1.0, DNULL, 1.0, 1.0)
.addSeries("notnull", 1.0, 1.0, 1.0, 1.0, 1.0);
mdf.map("notnull + 1");
}
@Test
public void testMapExpressionWithNull() {
DataFrame mdf = new DataFrame(VALUES_LONG)
.addSeries("null", 1.0, 1.0, DNULL, 1.0, 1.0);
DoubleSeries s = mdf.map("null + 1");
assertEquals(s, 2.0, 2.0, DNULL, 2.0, 2.0);
}
@Test
public void testSeriesEquals() {
Assert.assertTrue(DataFrame.toSeries(0.0, 3.0, 4.0).equals(DataFrame.toSeries(0.0, 3.0, 4.0)));
Assert.assertTrue(DataFrame.toSeries(0, 3, 4).equals(DataFrame.toSeries(0, 3, 4)));
Assert.assertTrue(DataFrame.toSeries(false, true, true).equals(DataFrame.toSeries(false, true, true)));
Assert.assertTrue(DataFrame.toSeries("1", "3", "4").equals(DataFrame.toSeries("1", "3", "4")));
Assert.assertFalse(DataFrame.toSeries(0.0, 3.0, 4.0).equals(DataFrame.toSeries(0, 3, 4)));
Assert.assertFalse(DataFrame.toSeries(0, 3, 4).equals(DataFrame.toSeries(0.0, 3.0, 4.0)));
Assert.assertFalse(DataFrame.toSeries(false, true, true).equals(DataFrame.toSeries("0", "1", "1")));
Assert.assertFalse(DataFrame.toSeries("1", "3", "4").equals(DataFrame.toSeries(1, 3, 4)));
Assert.assertTrue(DataFrame.toSeries(0.0, 3.0, 4.0).equals(DataFrame.toSeries(0, 3, 4).getDoubles()));
Assert.assertTrue(DataFrame.toSeries(0, 3, 4).equals(DataFrame.toSeries(0.0, 3.0, 4.0).getLongs()));
Assert.assertTrue(DataFrame.toSeries(false, true, true).equals(DataFrame.toSeries("0", "1", "1").getBooleans()));
Assert.assertTrue(DataFrame.toSeries("1", "3", "4").equals(DataFrame.toSeries(1, 3, 4).getStrings()));
}
@Test
public void testLongJoinInner() {
Series sLeft = DataFrame.toSeries(4, 3, 1, 2);
Series sRight = DataFrame.toSeries(5, 4, 3, 3, 0);
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.INNER);
Assert.assertEquals(pairs.size(), 3);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(1, 2));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(1, 3));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(0, 1));
}
@Test
public void testLongJoinLeft() {
Series sLeft = DataFrame.toSeries(4, 3, 1, 2);
Series sRight = DataFrame.toSeries(5, 4, 3, 3, 0);
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.LEFT);
Assert.assertEquals(pairs.size(), 5);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(2, -1));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(3, -1));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(1, 2));
Assert.assertEquals(pairs.get(3), new Series.JoinPair(1, 3));
Assert.assertEquals(pairs.get(4), new Series.JoinPair(0, 1));
}
@Test
public void testLongJoinRight() {
Series sLeft = DataFrame.toSeries(4, 3, 1, 2);
Series sRight = DataFrame.toSeries(5, 4, 3, 3, 0);
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.RIGHT);
Assert.assertEquals(pairs.size(), 5);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(-1, 4));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(1, 2));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(1, 3));
Assert.assertEquals(pairs.get(3), new Series.JoinPair(0, 1));
Assert.assertEquals(pairs.get(4), new Series.JoinPair(-1, 0));
}
@Test
public void testLongJoinOuter() {
Series sLeft = DataFrame.toSeries(4, 3, 1, 2);
Series sRight = DataFrame.toSeries(5, 4, 3, 3, 0);
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.OUTER);
Assert.assertEquals(pairs.size(), 7);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(-1, 4));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(2, -1));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(3, -1));
Assert.assertEquals(pairs.get(3), new Series.JoinPair(1, 2));
Assert.assertEquals(pairs.get(4), new Series.JoinPair(1, 3));
Assert.assertEquals(pairs.get(5), new Series.JoinPair(0, 1));
Assert.assertEquals(pairs.get(6), new Series.JoinPair(-1, 0));
}
@Test
public void testLongDoubleJoinInner() {
Series sLeft = DataFrame.toSeries(4, 3, 1, 2);
Series sRight = DataFrame.toSeries(5.0, 4.0, 3.0, 3.0, 0.0);
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.INNER);
Assert.assertEquals(pairs.size(), 3);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(1, 2));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(1, 3));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(0, 1));
}
@Test
public void testStringJoinInner() {
Series sLeft = DataFrame.toSeries("4", "3", "1", "2");
Series sRight = DataFrame.toSeries("5", "4", "3", "3", "0");
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.INNER);
Assert.assertEquals(pairs.size(), 3);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(1, 2));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(1, 3));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(0, 1));
}
@Test
public void testBooleanJoinInner() {
Series sLeft = DataFrame.toSeries(true, false, false);
Series sRight = DataFrame.toSeries(false, true, true);
List<Series.JoinPair> pairs = sLeft.join(sRight, Series.JoinType.INNER);
Assert.assertEquals(pairs.size(), 4);
Assert.assertEquals(pairs.get(0), new Series.JoinPair(1, 0));
Assert.assertEquals(pairs.get(1), new Series.JoinPair(2, 0));
Assert.assertEquals(pairs.get(2), new Series.JoinPair(0, 1));
Assert.assertEquals(pairs.get(3), new Series.JoinPair(0, 2));
}
@Test
public void testJoinInner() {
DataFrame left = new DataFrame()
.addSeries("leftKey", 4, 2, 1, 3)
.addSeries("leftValue", "a", "d", "c", "b");
DataFrame right = new DataFrame()
.addSeries("rightKey", 5.0, 2.0, 1.0, 3.0, 1.0, 0.0)
.addSeries("rightValue", "v", "z", "w", "x", "y", "u");
DataFrame joined = left.joinInner(right, "leftKey", "rightKey");
Assert.assertEquals(joined.size(), 4);
Assert.assertEquals(joined.get("leftKey").type(), Series.SeriesType.LONG);
Assert.assertEquals(joined.get("leftValue").type(), Series.SeriesType.STRING);
Assert.assertEquals(joined.get("rightKey").type(), Series.SeriesType.DOUBLE);
Assert.assertEquals(joined.get("rightValue").type(), Series.SeriesType.STRING);
assertEquals(joined.getLongs("leftKey"), 1, 1, 2, 3);
assertEquals(joined.getDoubles("rightKey"),1.0, 1.0, 2.0, 3.0);
assertEquals(joined.getStrings("leftValue"), "c", "c", "d", "b");
assertEquals(joined.getStrings("rightValue"), "w", "y", "z", "x");
}
@Test
public void testJoinOuter() {
DataFrame left = new DataFrame()
.addSeries("leftKey", 4, 2, 1, 3)
.addSeries("leftValue", "a", "d", "c", "b");
DataFrame right = new DataFrame()
.addSeries("rightKey", 5.0, 2.0, 1.0, 3.0, 1.0, 0.0)
.addSeries("rightValue", "v", "z", "w", "x", "y", "u");
DataFrame joined = left.joinOuter(right, "leftKey", "rightKey");
Assert.assertEquals(joined.size(), 7);
Assert.assertEquals(joined.get("leftKey").type(), Series.SeriesType.LONG);
Assert.assertEquals(joined.get("leftValue").type(), Series.SeriesType.STRING);
Assert.assertEquals(joined.get("rightKey").type(), Series.SeriesType.DOUBLE);
Assert.assertEquals(joined.get("rightValue").type(), Series.SeriesType.STRING);
assertEquals(joined.getLongs("leftKey"), LNULL, 1, 1, 2, 3, 4, LNULL);
assertEquals(joined.getDoubles("rightKey"), 0.0, 1.0, 1.0, 2.0, 3.0, DNULL, 5.0);
assertEquals(joined.getStrings("leftValue"), SNULL, "c", "c", "d", "b", "a", SNULL);
assertEquals(joined.getStrings("rightValue"), "u", "w", "y", "z", "x", SNULL, "v");
}
@Test
public void testJoinSameName() {
DataFrame left = new DataFrame()
.addSeries("name", 1, 2, 3, 4)
.addSeries("value", 1, 2, 3, 4)
.addSeries("left", 1, 2, 3, 4);
DataFrame right = new DataFrame()
.addSeries("name", 3, 4, 5, 6)
.addSeries("value", 3, 4, 5, 6)
.addSeries("right", 1, 2, 3, 4);
DataFrame df = left.joinInner(right, "name", "name");
Assert.assertEquals(df.getSeriesNames().size(), 5);
Assert.assertTrue(df.contains("name"));
Assert.assertFalse(df.contains("name" + DataFrame.COLUMN_JOIN_LEFT));
Assert.assertFalse(df.contains("name" + DataFrame.COLUMN_JOIN_RIGHT));
Assert.assertFalse(df.contains("value"));
Assert.assertTrue(df.contains("value" + DataFrame.COLUMN_JOIN_LEFT));
Assert.assertTrue(df.contains("value" + DataFrame.COLUMN_JOIN_RIGHT));
Assert.assertTrue(df.contains("left"));
Assert.assertFalse(df.contains("left" + DataFrame.COLUMN_JOIN_LEFT));
Assert.assertFalse(df.contains("left" + DataFrame.COLUMN_JOIN_RIGHT));
Assert.assertTrue(df.contains("right"));
Assert.assertFalse(df.contains("right" + DataFrame.COLUMN_JOIN_LEFT));
Assert.assertFalse(df.contains("right" + DataFrame.COLUMN_JOIN_RIGHT));
}
@Test
public void testJoinDifferentName() {
DataFrame left = new DataFrame()
.addSeries("name", 1, 2, 3, 4);
DataFrame right = new DataFrame()
.addSeries("key", 3, 4, 5, 6);
DataFrame df = left.joinInner(right, "name", "key");
Assert.assertEquals(df.getSeriesNames().size(), 2);
Assert.assertTrue(df.contains("name"));
Assert.assertTrue(df.contains("key"));
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testJoinIndexFailNoIndex() {
DataFrame dfIndex = new DataFrame(5);
DataFrame dfNoIndex = new DataFrame().addSeries(DataFrame.COLUMN_INDEX_DEFAULT, DataFrame.toSeries(VALUES_DOUBLE));
dfIndex.joinOuter(dfNoIndex);
}
@Test
public void testJoinIndex() {
DataFrame dfLeft = new DataFrame(5).addSeries("one", 5, 4, 3, 2, 1);
DataFrame dfRight = new DataFrame(3).addSeries("two", "A", "B", "C");
DataFrame joined = dfLeft.joinLeft(dfRight);
assertEquals(joined.getStrings("one"), "5", "4", "3", "2", "1");
assertEquals(joined.getStrings("two"), "A", "B", "C", null, null);
}
@Test
public void testBooleanHasTrueFalseNull() {
BooleanSeries s1 = DataFrame.toSeries(new boolean[0]);
Assert.assertFalse(s1.hasFalse());
Assert.assertFalse(s1.hasTrue());
Assert.assertFalse(s1.hasNull());
BooleanSeries s2 = DataFrame.toSeries(true, true);
Assert.assertFalse(s2.hasFalse());
Assert.assertTrue(s2.hasTrue());
Assert.assertFalse(s2.hasNull());
BooleanSeries s3 = DataFrame.toSeries(false, false);
Assert.assertTrue(s3.hasFalse());
Assert.assertFalse(s3.hasTrue());
Assert.assertFalse(s3.hasNull());
BooleanSeries s4 = DataFrame.toSeries(true, false);
Assert.assertTrue(s4.hasFalse());
Assert.assertTrue(s4.hasTrue());
Assert.assertFalse(s4.hasNull());
BooleanSeries s5 = DataFrame.toSeries(TRUE, FALSE, BNULL);
Assert.assertFalse(s5.hasFalse());
Assert.assertFalse(s5.hasTrue());
Assert.assertTrue(s5.hasNull());
}
@Test
public void testBooleanAllTrueFalse() {
BooleanSeries s1 = BooleanSeries.empty();
Assert.assertFalse(s1.allTrue());
Assert.assertFalse(s1.allFalse());
BooleanSeries s2 = DataFrame.toSeries(true, true);
Assert.assertFalse(s2.allFalse());
Assert.assertTrue(s2.allTrue());
BooleanSeries s3 = DataFrame.toSeries(false, false);
Assert.assertTrue(s3.allFalse());
Assert.assertFalse(s3.allTrue());
BooleanSeries s4 = DataFrame.toSeries(true, false);
Assert.assertFalse(s4.allFalse());
Assert.assertFalse(s4.allTrue());
BooleanSeries s5 = DataFrame.toSeries(TRUE, TRUE, BNULL);
Assert.assertFalse(s5.allFalse());
Assert.assertFalse(s5.allTrue());
BooleanSeries s6 = DataFrame.toSeries(FALSE, FALSE, BNULL);
Assert.assertFalse(s6.allFalse());
Assert.assertFalse(s6.allTrue());
BooleanSeries s7 = DataFrame.toSeries(TRUE, FALSE, BNULL);
Assert.assertFalse(s7.allFalse());
Assert.assertFalse(s7.allTrue());
}
@Test
public void testStringInferSeriesTypeDoubleDot() {
Series.SeriesType t = StringSeries.buildFrom("1", "2", "3.", "", null).inferType();
Assert.assertEquals(t, Series.SeriesType.DOUBLE);
}
@Test
public void testStringInferSeriesTypeDoubleExp() {
Series.SeriesType t = StringSeries.buildFrom("1", "2e1", "3", "", null).inferType();
Assert.assertEquals(t, Series.SeriesType.DOUBLE);
}
@Test
public void testStringInferSeriesTypeLong() {
Series.SeriesType t = StringSeries.buildFrom("2", "-4", "-0", "", null).inferType();
Assert.assertEquals(t, Series.SeriesType.LONG);
}
@Test
public void testStringInferSeriesTypeBoolean() {
Series.SeriesType t = StringSeries.buildFrom("true", "False", "false", "", null).inferType();
Assert.assertEquals(t, Series.SeriesType.BOOLEAN);
}
@Test
public void testStringInferSeriesTypeString() {
Series.SeriesType t = StringSeries.buildFrom("true", "", "-0.2e1", null).inferType();
Assert.assertEquals(t, Series.SeriesType.STRING);
}
@Test
public void testCompareInversion() {
StringSeries string = StringSeries.buildFrom("0", "", "true");
BooleanSeries bool = BooleanSeries.buildFrom(FALSE, BNULL, TRUE);
Assert.assertTrue(string.compare(bool, 0, 0) < 0); // "0" < "false"
Assert.assertTrue(bool.compare(string, 0, 0) == 0);
Assert.assertTrue(string.compare(bool, 1, 1) > 0); // "" > null
Assert.assertTrue(bool.compare(string, 1, 1) == 0);
Assert.assertTrue(string.compare(bool, 2, 2) == 0);
Assert.assertTrue(bool.compare(string, 2, 2) == 0);
}
@Test
public void testDataFrameFromCsv() throws IOException {
Reader in = new InputStreamReader(this.getClass().getResourceAsStream("test.csv"));
DataFrame df = DataFrame.fromCsv(in);
Assert.assertEquals(df.getSeriesNames().size(), 3);
Assert.assertEquals(df.size(), 6);
Series a = df.get("header_A");
Assert.assertEquals(a.type(), Series.SeriesType.STRING);
assertEquals(a.getStrings(), "a1", "A2", "two words", "", "with comma, semicolon; and more", "");
Series b = df.get("_1headerb");
Assert.assertEquals(b.type(), Series.SeriesType.LONG);
assertEquals(b.getLongs(), 1, 2, 3, 4, 5, 6);
Series c = df.get("Header_C");
Assert.assertEquals(c.type(), Series.SeriesType.BOOLEAN);
assertEquals(c.getBooleans(), BNULL, TRUE, FALSE, FALSE, BNULL, TRUE);
}
@Test
public void testDoubleFunctionConversion() {
Series out = df.map(new Series.DoubleFunction() {
@Override
public double apply(double... values) {
return values[0] + 1;
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.DOUBLE);
}
@Test
public void testLongFunctionConversion() {
Series out = df.map(new Series.LongFunction() {
@Override
public long apply(long... values) {
return values[0] + 1;
}
}, "double");
Assert.assertEquals(out.type(), Series.SeriesType.LONG);
}
@Test
public void testStringFunctionConversion() {
Series out = df.map(new Series.StringFunction() {
@Override
public String apply(String... values) {
return values[0] + "-";
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.STRING);
}
@Test
public void testBooleanFunctionConversion() {
Series out = df.map(new Series.BooleanFunction() {
@Override
public boolean apply(boolean... values) {
return !values[0];
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.BOOLEAN);
}
@Test
public void testBooleanFunctionExConversion() {
Series out = df.map(new Series.BooleanFunctionEx() {
@Override
public byte apply(byte... values) {
return TRUE;
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.BOOLEAN);
}
@Test
public void testDoubleConditionalConversion() {
Series out = df.map(new Series.DoubleConditional() {
@Override
public boolean apply(double... values) {
return true;
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.BOOLEAN);
}
@Test
public void testLongConditionalConversion() {
Series out = df.map(new Series.LongConditional() {
@Override
public boolean apply(long... values) {
return true;
}
}, "double");
Assert.assertEquals(out.type(), Series.SeriesType.BOOLEAN);
}
@Test
public void testStringConditionalConversion() {
Series out = df.map(new Series.StringConditional() {
@Override
public boolean apply(String... values) {
return true;
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.BOOLEAN);
}
@Test
public void testBooleanConditionalConversion() {
Series out = df.map(new Series.BooleanConditional() {
@Override
public boolean apply(boolean... values) {
return true;
}
}, "long");
Assert.assertEquals(out.type(), Series.SeriesType.BOOLEAN);
}
@Test
public void testFillForward() {
// must pass
LongSeries.empty().fillNullForward();
// must pass
LongSeries.buildFrom(LNULL).fillNullForward();
LongSeries in = LongSeries.buildFrom(LNULL, 1, LNULL, 2, 3, LNULL);
assertEquals(in.fillNullForward(), LNULL, 1, 1, 2, 3, 3);
}
@Test
public void testFillBackward() {
// must pass
LongSeries.empty().fillNullBackward();
// must pass
LongSeries.buildFrom(LNULL).fillNullBackward();
LongSeries in = LongSeries.buildFrom(LNULL, 1, LNULL, 2, 3, LNULL);
assertEquals(in.fillNullBackward(), 1, 1, 2, 2, 3, LNULL);
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testIndexNone() {
DataFrame df = new DataFrame();
Assert.assertFalse(df.hasIndex());
df.getIndex();
}
@Test
public void testIndexDefault() {
Assert.assertTrue(new DataFrame(0).hasIndex());
Assert.assertTrue(new DataFrame(1, 2, 3).hasIndex());
Assert.assertTrue(new DataFrame(DataFrame.toSeries(VALUES_STRING)).hasIndex());
}
@Test
public void testIndexCopy() {
DataFrame df = new DataFrame(5)
.addSeries("test", DataFrame.toSeries(VALUES_BOOLEAN))
.setIndex("test");
Assert.assertEquals(df.copy().getIndexName(), "test");
}
@Test(expectedExceptions = IllegalArgumentException.class)
public void testIndexSetInvalid() {
DataFrame df = new DataFrame(0);
df.setIndex("test");
}
@Test
public void testIndexRename() {
DataFrame df = new DataFrame(0);
Series index = df.getIndex();
df.renameSeries(df.getIndexName(), "test");
df.addSeries(DataFrame.COLUMN_INDEX_DEFAULT, DataFrame.toSeries(new double[0]));
Assert.assertEquals(df.getIndexName(), "test");
Assert.assertEquals(df.getIndex(), index);
}
@Test
public void testDoubleNormalize() {
DoubleSeries s = DataFrame.toSeries(1.5, 2.0, 3.5).normalize();
assertEquals(s, 0, 0.25, 1.0);
}
@Test
public void testDoubleNormalizeFailInvalid() {
DoubleSeries s = DataFrame.toSeries(1.5, 1.5, 1.5).normalize();
assertEquals(s, DoubleSeries.nulls(3));
}
@Test
public void testDoubleZScore() {
DoubleSeries s = DataFrame.toSeries(0.0, 1.0, 2.0).zscore();
assertEquals(s, -0.707, 0.0, 0.707);
}
@Test
public void testDoubleZScoreFailInvalid() {
DoubleSeries s = DataFrame.toSeries(1.5, 1.5, 1.5).zscore();
assertEquals(s, DoubleSeries.nulls(3));
}
@Test
public void testDoubleOperationsSeries() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
DoubleSeries mod = DataFrame.toSeries(1, 1, 1, 0, DNULL);
assertEquals(base.add(mod), DNULL, 1, 2, 1.5, DNULL);
assertEquals(base.subtract(mod), DNULL, -1, 0, 1.5, DNULL);
assertEquals(base.multiply(mod), DNULL, 0, 1, 0, DNULL);
assertEquals(base.divide(mod.replace(0, 1)), DNULL, 0, 1, 1.5, DNULL);
assertEquals(base.eq(mod), BNULL, FALSE, TRUE, FALSE, BNULL);
try {
base.divide(mod);
Assert.fail();
} catch(ArithmeticException expected) {
// left blank
}
}
@Test
public void testDoubleOperationsSeriesMisaligned() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
DoubleSeries mod = DataFrame.toSeries(1, 1, 1, DNULL);
try {
base.add(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.subtract(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.multiply(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.divide(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.eq(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
}
@Test
public void testDoubleOperationAddConstant() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.add(1), DNULL, 1, 2, 2.5, 1.003);
assertEquals(base.add(0), DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.add(-1), DNULL, -1, 0, 0.5, -0.997);
assertEquals(base.add(DNULL), DoubleSeries.nulls(5));
}
@Test
public void testDoubleOperationSubtractConstant() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.subtract(1), DNULL, -1, 0, 0.5, -0.997);
assertEquals(base.subtract(0), DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.subtract(-1), DNULL, 1, 2, 2.5, 1.003);
assertEquals(base.subtract(DNULL), DoubleSeries.nulls(5));
}
@Test
public void testDoubleOperationMultiplyConstant() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.multiply(1), DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.multiply(0), DNULL, 0, 0, 0, 0);
assertEquals(base.multiply(-1), DNULL, 0, -1, -1.5, -0.003);
assertEquals(base.multiply(DNULL), DoubleSeries.nulls(5));
}
@Test
public void testDoubleOperationDivideConstant() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.divide(1), DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.divide(-1), DNULL, 0, -1, -1.5, -0.003);
assertEquals(base.divide(DNULL), DoubleSeries.nulls(5));
try {
base.divide(0);
Assert.fail();
} catch(ArithmeticException expected) {
// left blank
}
}
@Test
public void testDoubleOperationEqConstant() {
DoubleSeries base = DataFrame.toSeries(DNULL, 0, 1, 1.5, 0.003);
assertEquals(base.eq(1), BNULL, FALSE, TRUE, FALSE, FALSE);
assertEquals(base.eq(0), BNULL, TRUE, FALSE, FALSE, FALSE);
assertEquals(base.eq(-1), BNULL, FALSE, FALSE, FALSE, FALSE);
assertEquals(base.eq(DNULL), BooleanSeries.nulls(5));
}
@Test
public void testDoubleCount() {
DoubleSeries base = DataFrame.toSeries(DNULL, 1, 1, 1.5, 0.003);
Assert.assertEquals(base.count(1), 2);
Assert.assertEquals(base.count(2), 0);
Assert.assertEquals(base.count(DNULL), 1);
}
@Test
public void testDoubleContains() {
DoubleSeries base = DataFrame.toSeries(DNULL, 1, 1, 1.5, 0.003);
Assert.assertTrue(base.contains(1));
Assert.assertFalse(base.contains(2));
Assert.assertTrue(base.contains(DNULL));
}
@Test
public void testDoubleReplace() {
DoubleSeries base = DataFrame.toSeries(DNULL, 1, 1, 1.5, 0.003);
assertEquals(base.replace(1, 2), DNULL, 2, 2, 1.5, 0.003);
assertEquals(base.replace(1, DNULL), DNULL, DNULL, DNULL, 1.5, 0.003);
assertEquals(base.replace(2, 1), DNULL, 1, 1, 1.5, 0.003);
assertEquals(base.replace(1.5, DNULL), DNULL, 1, 1, DNULL, 0.003);
assertEquals(base.replace(DNULL, 1), 1, 1, 1, 1.5, 0.003);
}
@Test
public void testDoubleFilterSeries() {
DoubleSeries base = DataFrame.toSeries(DNULL, 1, 1, 1.5, 0.003);
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(mod), DNULL, 1, 1, DNULL, DNULL);
}
@Test
public void testDoubleFilterConditional() {
DoubleSeries base = DataFrame.toSeries(DNULL, 1, 1, 1.5, 0.003);
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(new Series.DoubleConditional() {
@Override
public boolean apply(double... values) {
return (values[0] >= 1 && values[0] < 1.5) || values[0] == 0.003;
}
}), DNULL, 1, 1, DNULL, 0.003);
}
@Test
public void testLongOperationsSeries() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
LongSeries mod = DataFrame.toSeries(1, 1, 1, 0, LNULL);
assertEquals(base.add(mod), LNULL, 1, 2, 5, LNULL);
assertEquals(base.subtract(mod), LNULL, -1, 0, 5, LNULL);
assertEquals(base.multiply(mod), LNULL, 0, 1, 0, LNULL);
assertEquals(base.divide(mod.replace(0, 1)), LNULL, 0, 1, 5, LNULL);
assertEquals(base.eq(mod), BNULL, FALSE, TRUE, FALSE, BNULL);
try {
base.divide(mod);
Assert.fail();
} catch(ArithmeticException expected) {
// left blank
}
}
@Test
public void testLongOperationsSeriesMisaligned() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
LongSeries mod = DataFrame.toSeries(1, 1, 1, LNULL);
try {
base.add(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.subtract(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.multiply(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.divide(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.eq(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
}
@Test
public void testLongOperationAddConstant() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
assertEquals(base.add(1), LNULL, 1, 2, 6, 11);
assertEquals(base.add(0), LNULL, 0, 1, 5, 10);
assertEquals(base.add(-1), LNULL, -1, 0, 4, 9);
assertEquals(base.add(LNULL), LongSeries.nulls(5));
}
@Test
public void testLongOperationSubtractConstant() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
assertEquals(base.subtract(1), LNULL, -1, 0, 4, 9);
assertEquals(base.subtract(0), LNULL, 0, 1, 5, 10);
assertEquals(base.subtract(-1), LNULL, 1, 2, 6, 11);
assertEquals(base.subtract(LNULL), LongSeries.nulls(5));
}
@Test
public void testLongOperationMultiplyConstant() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
assertEquals(base.multiply(1), LNULL, 0, 1, 5, 10);
assertEquals(base.multiply(0), LNULL, 0, 0, 0, 0);
assertEquals(base.multiply(-1), LNULL, 0, -1, -5, -10);
assertEquals(base.multiply(LNULL), LongSeries.nulls(5));
}
@Test
public void testLongOperationDivideConstant() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
assertEquals(base.divide(1), LNULL, 0, 1, 5, 10);
assertEquals(base.divide(-1), LNULL, 0, -1, -5, -10);
assertEquals(base.divide(LNULL), LongSeries.nulls(5));
try {
base.divide(0);
Assert.fail();
} catch(ArithmeticException expected) {
// left blank
}
}
@Test
public void testLongOperationEqConstant() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 1, 5, 10);
assertEquals(base.eq(1), BNULL, FALSE, TRUE, FALSE, FALSE);
assertEquals(base.eq(0), BNULL, TRUE, FALSE, FALSE, FALSE);
assertEquals(base.eq(-1), BNULL, FALSE, FALSE, FALSE, FALSE);
assertEquals(base.eq(LNULL), BooleanSeries.nulls(5));
}
@Test
public void testLongCount() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 0, 5, 10);
Assert.assertEquals(base.count(0), 2);
Assert.assertEquals(base.count(2), 0);
Assert.assertEquals(base.count(LNULL), 1);
}
@Test
public void testLongContains() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 0, 5, 10);
Assert.assertTrue(base.contains(0));
Assert.assertFalse(base.contains(2));
Assert.assertTrue(base.contains(LNULL));
}
@Test
public void testLongReplace() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 0, 5, 10);
assertEquals(base.replace(0, 1), LNULL, 1, 1, 5, 10);
assertEquals(base.replace(0, LNULL), LNULL, LNULL, LNULL, 5, 10);
assertEquals(base.replace(2, 1), LNULL, 0, 0, 5, 10);
assertEquals(base.replace(5, LNULL), LNULL, 0, 0, LNULL, 10);
assertEquals(base.replace(LNULL, 1), 1, 0, 0, 5, 10);
}
@Test
public void testLongFilterSeries() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 0, 5, 10);
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(mod), LNULL, 0, 0, LNULL, LNULL);
}
@Test
public void testLongFilterConditional() {
LongSeries base = DataFrame.toSeries(LNULL, 0, 0, 5, 10);
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(new Series.LongConditional() {
@Override
public boolean apply(long... values) {
return values[0] >= 0 && values[0] <= 5;
}
}), LNULL, 0, 0, 5, LNULL);
}
@Test
public void testStringOperationsSeries() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "b", "c", "d");
StringSeries mod = DataFrame.toSeries("A", "A", "b", "B", SNULL);
assertEquals(base.concat(mod), SNULL, "aA", "bb", "cB", SNULL);
assertEquals(base.eq(mod), BNULL, FALSE, TRUE, FALSE, BNULL);
}
@Test
public void testStringOperationsSeriesMisaligned() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "b", "c", "d");
StringSeries mod = DataFrame.toSeries("A", "A", "b", SNULL);
try {
base.concat(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.eq(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
}
@Test
public void testStringOperationConcatConstant() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "b", "c", "d");
assertEquals(base.concat("X"), SNULL, "aX", "bX", "cX", "dX");
assertEquals(base.concat(""), SNULL, "a", "b", "c", "d");
assertEquals(base.concat(SNULL), StringSeries.nulls(5));
}
@Test
public void testStringOperationEqConstant() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "b", "c", "d");
assertEquals(base.eq("a"), BNULL, TRUE, FALSE, FALSE, FALSE);
assertEquals(base.eq("b"), BNULL, FALSE, TRUE, FALSE, FALSE);
assertEquals(base.eq(""), BNULL, FALSE, FALSE, FALSE, FALSE);
assertEquals(base.eq(SNULL), BooleanSeries.nulls(5));
}
@Test
public void testStringCount() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "a", "b", "A");
Assert.assertEquals(base.count("a"), 2);
Assert.assertEquals(base.count("d"), 0);
Assert.assertEquals(base.count(SNULL), 1);
}
@Test
public void testStringContains() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "a", "b", "A");
Assert.assertTrue(base.contains("a"));
Assert.assertFalse(base.contains(""));
Assert.assertTrue(base.contains(SNULL));
}
@Test
public void testStringReplace() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "a", "b", "A");
assertEquals(base.replace("a", "AA"), SNULL, "AA", "AA", "b", "A");
assertEquals(base.replace("a", SNULL), SNULL, SNULL, SNULL, "b", "A");
assertEquals(base.replace("b", "B"), SNULL, "a", "a", "B", "A");
assertEquals(base.replace("", "X"), SNULL, "a", "a", "b", "A");
assertEquals(base.replace(SNULL, "N"), "N", "a", "a", "b", "A");
}
@Test
public void testStringFilterSeries() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "a", "b", "A");
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(mod), SNULL, "a", "a", SNULL, SNULL);
}
@Test
public void testStringFilterConditional() {
StringSeries base = DataFrame.toSeries(SNULL, "a", "a", "b", "A");
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(new Series.StringConditional() {
@Override
public boolean apply(String... values) {
return values[0].equals("a") || values[0].equals("A");
}
}), SNULL, "a", "a", SNULL, "A");
}
@Test
public void testBooleanOperationsSeries() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.and(mod), BNULL, TRUE, FALSE, FALSE, BNULL);
assertEquals(base.or(mod), BNULL, TRUE, TRUE, TRUE, BNULL);
assertEquals(base.xor(mod), BNULL, FALSE, TRUE, TRUE, BNULL);
assertEquals(base.implies(mod), BNULL, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.eq(mod), BNULL, TRUE, FALSE, FALSE, BNULL);
}
@Test
public void testBooleanOperationsSeriesMisaligned() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
BooleanSeries mod = DataFrame.toSeries(BNULL, TRUE, FALSE, BNULL);
try {
base.and(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.or(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.xor(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.implies(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
try {
base.eq(mod);
Assert.fail();
} catch(IllegalArgumentException expected) {
// left blank
}
}
@Test
public void testBooleanOperationAndConstant() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.and(true), BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.and(false), BNULL, FALSE, FALSE, FALSE, FALSE);
assertEquals(base.and(BNULL), BooleanSeries.nulls(5));
}
@Test
public void testBooleanOperationOrConstant() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.or(true), BNULL, TRUE, TRUE, TRUE, TRUE);
assertEquals(base.or(false), BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.or(BNULL), BooleanSeries.nulls(5));
}
@Test
public void testBooleanOperationXorConstant() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.xor(true), BNULL, FALSE, TRUE, FALSE, TRUE);
assertEquals(base.xor(false), BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.xor(BNULL), BooleanSeries.nulls(5));
}
@Test
public void testBooleanOperationImpliesConstant() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.implies(true), BNULL, TRUE, TRUE, TRUE, TRUE);
assertEquals(base.implies(false), BNULL, FALSE, TRUE, FALSE, TRUE);
assertEquals(base.implies(BNULL), BooleanSeries.nulls(5));
}
@Test
public void testBooleanOperationEqConstant() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.eq(true), BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.eq(false), BNULL, FALSE, TRUE, FALSE, TRUE);
assertEquals(base.eq(BNULL), BooleanSeries.nulls(5));
}
@Test
public void testBooleanCount() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
Assert.assertEquals(base.count(TRUE), 2);
Assert.assertEquals(base.count(FALSE), 2);
Assert.assertEquals(base.count(BNULL), 1);
}
@Test
public void testBooleanContains() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
Assert.assertTrue(base.contains(TRUE));
Assert.assertTrue(base.contains(FALSE));
Assert.assertTrue(base.contains(BNULL));
}
@Test
public void testBooleanReplace() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.replace(TRUE, FALSE), BNULL, FALSE, FALSE, FALSE, FALSE);
assertEquals(base.replace(TRUE, BNULL), BNULL, BNULL, FALSE, BNULL, FALSE);
assertEquals(base.replace(FALSE, TRUE), BNULL, TRUE, TRUE, TRUE, TRUE);
assertEquals(base.replace(FALSE, BNULL), BNULL, TRUE, BNULL, TRUE, BNULL);
assertEquals(base.replace(BNULL, TRUE), TRUE, TRUE, FALSE, TRUE, FALSE);
}
@Test
public void testBooleanFilterSeries() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
BooleanSeries mod = DataFrame.toSeries(TRUE, TRUE, TRUE, FALSE, BNULL);
assertEquals(base.filter(mod), BNULL, TRUE, FALSE, BNULL, BNULL);
}
public void testBooleanFilterConditional() {
BooleanSeries base = DataFrame.toSeries(BNULL, TRUE, FALSE, TRUE, FALSE);
assertEquals(base.filter(new Series.BooleanConditional() {
@Override
public boolean apply(boolean... values) {
return values[0];
}
}), BNULL, TRUE, BNULL, TRUE, BNULL);
}
@Test
public void testAppend() {
DataFrame base = new DataFrame();
base.addSeries("A", 1, 2, 3, 4);
base.addSeries("B", "a", "b", "c", "d");
base.setIndex("B");
DataFrame other = new DataFrame();
other.addSeries("A", 5.0d, 6.3d, 7.1d);
other.addSeries("C", true, true, false);
DataFrame another = new DataFrame();
another.addSeries("C", false, false);
DataFrame res = base.append(other, another);
Assert.assertEquals(res.getSeriesNames(), new HashSet<>(Arrays.asList("A", "B")));
Assert.assertEquals(res.get("A").type(), Series.SeriesType.LONG);
Assert.assertEquals(res.get("B").type(), Series.SeriesType.STRING);
assertEquals(res.getLongs("A"), 1, 2, 3, 4, 5, 6, 7, LongSeries.NULL, LongSeries.NULL);
assertEquals(res.getStrings("B"), "a", "b", "c", "d", null, null, null, null, null);
}
/* **************************************************************************
* Helpers
***************************************************************************/
static void assertEquals(Series actual, Series expected) {
Assert.assertEquals(actual, expected);
}
static void assertEquals(DoubleSeries actual, double... expected) {
assertEquals(actual.getDoubles().values(), expected);
}
static void assertEquals(double[] actual, double... expected) {
if(actual.length != expected.length)
Assert.fail(String.format("expected array length [%d] but found [%d]", actual.length, expected.length));
for(int i=0; i<actual.length; i++) {
if(Double.isNaN(actual[i]) && Double.isNaN(expected[i]))
continue;
Assert.assertEquals(actual[i], expected[i], COMPARE_DOUBLE_DELTA, "index=" + i);
}
}
static void assertEquals(LongSeries actual, long... expected) {
assertEquals(actual.getLongs().values(), expected);
}
static void assertEquals(long[] actual, long... expected) {
if(actual.length != expected.length)
Assert.fail(String.format("expected array length [%d] but found [%d]", actual.length, expected.length));
for(int i=0; i<actual.length; i++) {
Assert.assertEquals(actual[i], expected[i], "index=" + i);
}
}
static void assertEquals(StringSeries actual, String... expected) {
assertEquals(actual.getStrings().values(), expected);
}
static void assertEquals(String[] actual, String... expected) {
if(actual.length != expected.length)
Assert.fail(String.format("expected array length [%d] but found [%d]", actual.length, expected.length));
for(int i=0; i<actual.length; i++) {
Assert.assertEquals(actual[i], expected[i], "index=" + i);
}
}
static void assertEquals(BooleanSeries actual, byte... expected) {
assertEquals(actual.getBooleans().values(), expected);
}
static void assertEquals(BooleanSeries actual, boolean... expected) {
BooleanSeries s = actual.getBooleans();
if(s.hasNull())
Assert.fail("Encountered NULL when comparing against booleans");
assertEquals(s.valuesBoolean(), expected);
}
static void assertEquals(byte[] actual, byte... expected) {
if(actual.length != expected.length)
Assert.fail(String.format("expected array length [%d] but found [%d]", actual.length, expected.length));
for(int i=0; i<actual.length; i++) {
Assert.assertEquals(actual[i], expected[i], "index=" + i);
}
}
static void assertEquals(boolean[] actual, boolean... expected) {
if(actual.length != expected.length)
Assert.fail(String.format("expected array length [%d] but found [%d]", actual.length, expected.length));
for(int i=0; i<actual.length; i++) {
Assert.assertEquals(actual[i], expected[i], "index=" + i);
}
}
}