package water.udf;
import com.google.common.io.Files;
import org.junit.Test;
import water.util.fp.Function;
import water.util.fp.Predicate;
import water.util.fp.PureFunctions;
import water.udf.specialized.Enums;
import water.util.StringUtils;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.*;
import static org.junit.Assert.*;
import static water.udf.specialized.Dates.*;
import static water.udf.specialized.Doubles.*;
import static water.udf.specialized.Strings.*;
import static water.util.FileUtils.*;
/**
* Test for UDF
*/
public class UdfTest extends UdfTestBase {
int requiredCloudSize() { return 2; }
private DataColumn<Double> sines() throws java.io.IOException {
return willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return (i > 10 && i < 20) ? null : Math.sin(i); }
}));
}
private DataColumn<Double> sinesShort() throws java.io.IOException {
return willDrop(Doubles.newColumn(1001590, new Function<Long, Double>() {
public Double apply(Long i) { return (i > 10 && i < 20) ? null : Math.sin(i); }
}));
}
private DataColumn<Double> five_x() throws java.io.IOException {
return willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return i*5.0; }
}));
}
@Test
public void testIsNA() throws Exception {
Column<Double> c = sines();
assertFalse(c.apply(10).isNaN());
Double x11 = c.apply(11);
assertTrue(x11.isNaN());
assertTrue(c.apply(19).isNaN());
assertFalse(c.apply(20).isNaN());
assertFalse(c.isNA(10));
assertTrue(c.isNA(11));
assertTrue(c.isNA(19));
assertFalse(c.isNA(20));
}
@Test
public void testOfDoubles() throws Exception {
Column<Double> c = five_x();
assertEquals(0.0, c.apply(0), 0.000001);
assertEquals(210.0, c.apply(42), 0.000001);
assertEquals(100000.0, c.apply(20000), 0.000001);
}
@Test
public void testOfStrings() throws Exception {
Column<String> c = willDrop(Strings.newColumn(1 << 20, new Function<Long, String>() {
public String apply(Long i) {
return i == 42 ? null : "<<" + i + ">>";
}
}));
assertEquals("<<0>>", c.apply(0));
assertEquals(null, c.apply(42));
assertEquals("<<2016>>", c.apply(2016));
Column<String> materialized = Strings.materialize(c);
for (int i = 0; i < 100000; i++) {
assertEquals(c.apply(i), materialized.apply(i));
}
}
@Test
public void testOfEnums() throws Exception {
Column<Integer> c = willDrop(Enums.enums(new String[] {"Red", "White", "Blue"})
.newColumn(1 << 20, new Function<Long, Integer>() {
public Integer apply(Long i) { return (int)( i % 3); }
}));
assertEquals(0, c.apply(0).intValue());
assertEquals(0, c.apply(42).intValue());
assertEquals(1, c.apply(100).intValue());
assertEquals(2, c.apply(20000).intValue());
Column<Integer> materialized = Enums.enums(new String[] {"Red", "White", "Blue"}).materialize(c);
for (int i = 0; i < 100000; i++) {
assertEquals(c.apply(i), materialized.apply(i));
}
}
@Test
public void testOfDates() throws Exception {
Column<Date> c = willDrop(Dates.newColumn(1 << 20, new Function<Long, Date>() {
public Date apply(Long i) {
return new Date(i*3600000L*24);
}
}));
assertEquals(new Date(0), c.apply(0));
assertEquals(new Date(258 * 24 * 3600 * 1000L), c.apply(258));
Column<Date> materialized = Dates.materialize(c);
for (int i = 0; i < 100000; i++) {
assertEquals(c.apply(i), materialized.apply(i));
}
}
//// All UUID functionality is currently disabled
// @Test
// public void testOfUUIDs() throws Exception {
// Column<UUID> c = willDrop(UUIDs.newColumn(1 << 20, new Function<Long, UUID>() {
// public UUID apply(Long i) {
// return new UUID(i * 7, i * 13);
// }
// }));
// assertEquals(new UUID(0, 0), c.apply(0));
// assertEquals(new UUID(258*7, 258*13), c.apply(258));
//
// Column<UUID> materialized = UUIDs.materialize(c);
//
// for (int i = 0; i < 100000; i++) {
// assertEquals(c.apply(i), materialized.apply(i));
// }
// }
@Test
public void testOfEnumFun() throws Exception {
final String[] domain = {"Red", "White", "Blue"};
Column<Integer> x = willDrop(Enums.enums(domain)
.newColumn(1 << 20, new Function<Long, Integer>() {
public Integer apply(Long i) { return (int)( i % 3); }
}));
Column<String> y = new FunColumn<>(new Function<Integer, String>() {
public String apply(Integer i) { return domain[i]; }
}, x);
assertEquals("Red", y.apply(0));
assertEquals("Red", y.apply(42));
assertEquals("White", y.apply(100));
assertEquals("Blue", y.apply(20000));
}
@Test
public void testOfSquares() throws Exception {
Column<Double> x = five_x();
Column<Double> y = new FunColumn<>(PureFunctions.SQUARE, x);
assertEquals(0.0, y.apply(0), 0.000001);
assertEquals(44100.0, y.apply(42), 0.000001);
assertEquals(10000000000.0, y.apply(20000), 0.000001);
}
@Test
public void testIsFunNA() throws Exception {
Column<Double> x = sines();
Column<Double> y = new FunColumn<>(PureFunctions.SQUARE, x);
assertFalse(y.isNA(10));
assertTrue(y.isNA(11));
assertTrue(y.isNA(19));
assertFalse(y.isNA(20));
assertEquals(0.295958969093304, y.apply(10), 0.0001);
}
@Test
public void testFun2() throws Exception {
Column<Double> x = five_x();
Column<Double> y = sines();
Column<Double> y2 = willDrop(new FunColumn<>(PureFunctions.SQUARE, y));
Column<Double> z1 = willDrop(new Fun2Column<>(PureFunctions.PLUS, x, y2));
Column<Double> z2 = willDrop(new Fun2Column<>(PureFunctions.X2_PLUS_Y2, x, y));
assertEquals(0.0, z1.apply(0), 0.000001);
assertEquals(210.84001174779368, z1.apply(42), 0.000001);
assertEquals(100000.3387062632, z1.apply(20000), 0.000001);
assertEquals(0.0, z2.apply(0), 0.000001);
assertEquals(44100.840011747794, z2.apply(42), 0.000001);
assertEquals(10000000000.3387062632, z2.apply(20000), 0.000001);
Column<Double> materialized = willDrop(Doubles.materialize(z2));
for (int i = 0; i < 100000; i++) {
Double expected = z2.apply(i);
assertTrue(z2.isNA(i) == materialized.isNA(i));
// the following exposes a problem. nulls being returned.
if (expected == null) assertTrue("At " + i + ":", materialized.isNA(i));
Double actual = materialized.apply(i);
if (!z2.isNA(i)) assertEquals(expected, actual, 0.0001);
}
}
@Test
public void testFun2Compatibility() throws Exception {
Column<Double> x = five_x();
Column<Double> y = sinesShort();
Column<Double> z = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.sin(i*0.0001); }
}));
try {
Column<Double> z1 = new Fun2Column<>(PureFunctions.PLUS, x, y);
fail("Column incompatibility should be detected");
} catch (AssertionError ae) {
// as designed
}
try {
Column<Double> r = new Fun3Column<>(PureFunctions.X2_PLUS_Y2_PLUS_Z2, x, y, z);
fail("Column incompatibility should be detected");
} catch (AssertionError ae) {
// as designed
}
try {
Column<Double> r = new Fun3Column<>(PureFunctions.X2_PLUS_Y2_PLUS_Z2, x, z, y);
fail("Column incompatibility should be detected");
} catch (AssertionError ae) {
// as designed
}
}
@Test
public void testFun2CompatibilityWithConst() throws Exception {
Column<Double> x = five_x();
Column<Double> y = Doubles.constColumn(42.0, 1 << 20);
Column<Double> z = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.sin(i*0.0001); }
}));
try {
Column<Double> z1 = new Fun2Column<>(PureFunctions.PLUS, x, y);
fail("Column incompatibility should be detected");
} catch (AssertionError ae) {
// as designed
}
try {
Column<Double> r = new Fun3Column<>(PureFunctions.X2_PLUS_Y2_PLUS_Z2, x, y, z);
fail("Column incompatibility should be detected");
} catch (AssertionError ae) {
// as designed
}
try {
Column<Double> r = new Fun3Column<>(PureFunctions.X2_PLUS_Y2_PLUS_Z2, x, z, y);
fail("Column incompatibility should be detected");
} catch (AssertionError ae) {
// as designed
}
}
@Test
public void testFun3() throws Exception {
Column<Double> x = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.cos(i*0.0001)*Math.cos(i*0.0000001); }
}));
Column<Double> y = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.cos(i*0.0001)*Math.sin(i*0.0000001); }
}));
Column<Double> z = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.sin(i*0.0001); }
}));
Column<Double> r = new Fun3Column<>(PureFunctions.X2_PLUS_Y2_PLUS_Z2, x, y, z);
for (int i = 0; i < 100000; i++) {
assertEquals(1.00, r.apply(i*10), 0.0001);
}
Column<Double> materialized = Doubles.materialize(r);
for (int i = 0; i < 100000; i++) {
assertEquals(r.apply(i), materialized.apply(i), 0.0001);
}
}
@Test
public void testFoldingColumn() throws Exception {
Column<Double> x = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.cos(i*0.0001)*Math.cos(i*0.0000001); }
}));
Column<Double> y = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.cos(i*0.0001)*Math.sin(i*0.0000001); }
}));
Column<Double> z = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.sin(i*0.0001); }
}));
Column<Double> r = new FoldingColumn<>(PureFunctions.SUM_OF_SQUARES, x, y, z);
for (int i = 0; i < 100000; i++) {
assertEquals(1.00, r.apply(i*10), 0.0001);
}
Column<Double> x1 = new FoldingColumn<>(PureFunctions.SUM_OF_SQUARES, x);
for (int i = 0; i < 100000; i++) {
double xi = x.apply(i);
assertEquals(xi*xi, x1.apply(i), 0.0001);
}
try {
Column<Double> x0 = new FoldingColumn<>(PureFunctions.SUM_OF_SQUARES);
fail("This should have failed - no empty foldings");
} catch (AssertionError ae) {
// good, good!
}
Column<Double> materialized = Doubles.materialize(r);
for (int i = 0; i < 100000; i++) {
assertEquals(r.apply(i), materialized.apply(i), 0.0001);
}
}
@Test
public void testFoldingColumnCompatibility() throws Exception {
Column<Double> x = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.cos(i*0.0001)*Math.cos(i*0.0000001); }
}));
Column<Double> y = willDrop(Doubles.newColumn(1 << 20, new Function<Long, Double>() {
public Double apply(Long i) { return Math.cos(i*0.0001)*Math.sin(i*0.0000001); }
}));
Column<Double> z = sinesShort();
try {
Column<Double> r = new FoldingColumn<>(PureFunctions.SUM_OF_SQUARES, x, y, z);
fail("Should have failed on incompatibility");
} catch(AssertionError ae) {
// as expected
}
}
// test how file can be unfolded into multiple columns
@Test public void testUnfoldingColumn() throws IOException {
// here's the file
File file = getFile("smalldata/chicago/chicagoAllWeather.csv");
// get all its lines
final List<String> lines = Files.readLines(file, Charset.defaultCharset());
// store it in H2O, with typed column as a wrapper (core H2O storage is a type-unaware Vec class)
Column<String> source = willDrop(Strings.newColumn(lines));
// produce another (virtual) column that stores a list of strings as a row value
Column<List<String>> split = new UnfoldingColumn<>(PureFunctions.splitBy(","), source, 10);
// now check that we have the right data
for (int i = 0; i < lines.size(); i++) {
// since we specified width (10), the rest of the list is filled with nulls; have to ignore them.
// It's important to have the same width for the whole frame.
String actual = StringUtils.join(" ", Predicate.NOT_NULL.filter(split.apply(i)));
// so, have we lost any data?
assertEquals(lines.get(i).replaceAll("\\,", " ").trim(), actual);
}
}
@Test
public void testUnfoldingFrame() throws IOException {
File file = getFile("smalldata/chicago/chicagoAllWeather.csv");
final List<String> lines = Files.readLines(file, Charset.defaultCharset());
Column<String> source = willDrop(Strings.newColumn(lines));
Column<List<String>> split = new UnfoldingColumn<>(PureFunctions.splitBy(","), source, 10);
UnfoldingFrame<String> frame = new UnfoldingFrame<>(Strings, split.size(), split, 11);
List<DataColumn<String>> columns = frame.materialize();
for (int i = 0; i < lines.size(); i++) {
List<String> fromColumns = new ArrayList<>(10);
for (int j = 0; j < 10; j++) {
String value = columns.get(j).get(i);
if (value != null) fromColumns.add(value);
}
String actual = StringUtils.join(" ", fromColumns);
assertEquals(lines.get(i).replaceAll("\\,", " ").trim(), actual);
}
assertTrue("Need to align the result", columns.get(5).isCompatibleWith(source));
}
}