/*
* Joinery -- Data frames for Java
* Copyright (c) 2014, 2015 IBM Corp.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package joinery;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Arrays;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import joinery.impl.Conversion;
import org.junit.Before;
import org.junit.Test;
public class DataFrameConversionTest {
private DataFrame<Object> df;
@Before
public void setUp()
throws Exception {
df = new DataFrame<>(
Arrays.<Object>asList("row1", "row2", "row3", "row4", "row5", "row6"),
Arrays.<Object>asList("string", "long", "double", "date", "bool", "null"),
Arrays.<List<Object>>asList(
Arrays.<Object>asList("one", "two", "three", "four", "five", "six"),
Arrays.<Object>asList("1", "2", "3", "4", "5", "6"),
Arrays.<Object>asList("1.1", "2.2", "3.3", "4.4", "5.5", "6.6"),
Arrays.<Object>asList("2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05", "2014-01-06"),
Arrays.<Object>asList("t", "true", "f", "false", "yes", "no"),
Arrays.<Object>asList(null, null, null, null, null, null)
)
);
}
@Test
public void testCast() {
final DataFrame<String> strings = df.cast(String.class);
assertArrayEquals(
new String[] {
"one", "two", "three", "four", "five", "six",
"1", "2", "3", "4", "5", "6",
"1.1", "2.2", "3.3", "4.4", "5.5", "6.6",
"2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05", "2014-01-06",
"t", "true", "f", "false", "yes", "no",
null, null, null, null, null, null
},
strings.toArray()
);
}
@Test(expected=ClassCastException.class)
public void testCastFails() {
final DataFrame<Date> dates = df.cast(Date.class);
@SuppressWarnings("unused")
final Date dt = dates.get(0, 0);
}
@Test
public void testConvert() {
df.convert();
assertEquals(
String.class,
df.get("row1", "string").getClass()
);
assertEquals(
Long.class,
df.get("row1", "long").getClass()
);
assertEquals(
Double.class,
df.get("row1", "double").getClass()
);
assertEquals(
Date.class,
df.get("row1", "date").getClass()
);
assertEquals(
Boolean.class,
df.get("row1", "bool").getClass()
);
}
@Test(expected=ClassCastException.class)
public void testConvertFails() {
final DataFrame<String> bad = new DataFrame<>(
Arrays.<Object>asList("row1", "row2", "row3", "row4", "row5", "row6"),
Arrays.<Object>asList("string", "long", "double", "date"),
Arrays.<List<String>>asList(
Arrays.<String>asList("one", "two", "three", "four", "five", "six"),
Arrays.<String>asList("1", "2", "3", "4", "5", "6"),
Arrays.<String>asList("1.1", "2.2", "3.3", "4.4", "5.5", "6.6"),
Arrays.<String>asList("2014-01-01", "2014-01-02", "2014-01-03", "2014-01-04", "2014-01-05", "2014-01-06")
)
);
bad.convert();
@SuppressWarnings("unused")
final String tmp = bad.get("row1", "long");
}
@Test
public void testIsNull() {
final DataFrame<Boolean> nulls = df.isnull();
final Object[] expected = new Boolean[] {
false, false, false, false, false, false,
false, false, false, false, false, false,
false, false, false, false, false, false,
false, false, false, false, false, false,
false, false, false, false, false, false,
true, true, true, true, true, true
};
assertArrayEquals(
expected,
nulls.toArray()
);
}
@Test
public void testNotNull() {
final DataFrame<Boolean> nonnulls = df.notnull();
final Object[] expected = new Boolean[] {
true, true, true, true, true, true,
true, true, true, true, true, true,
true, true, true, true, true, true,
true, true, true, true, true, true,
true, true, true, true, true, true,
false, false, false, false, false, false
};
assertArrayEquals(
expected,
nonnulls.toArray()
);
}
@Test
public void testConvertColumns() {
df.convert(null, Long.class, Number.class);
assertEquals(
String.class,
df.get("row1", "string").getClass()
);
assertEquals(
Long.class,
df.get("row1", "long").getClass()
);
assertEquals(
Double.class,
df.get("row1", "double").getClass()
);
assertEquals(
String.class,
df.get("row1", "date").getClass()
);
assertEquals(
String.class,
df.get("row1", "bool").getClass()
);
}
@Test
public void testTwoDimensionalToArray()
throws ParseException {
final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
assertArrayEquals(
df.convert().toArray(new Object[df.length()][df.size()]),
new Object[][] {
new Object[] { "one", 1L, 1.1, sdf.parse("2014-01-01"), true, null },
new Object[] { "two", 2L, 2.2, sdf.parse("2014-01-02"), true, null },
new Object[] { "three", 3L, 3.3, sdf.parse("2014-01-03"), false, null },
new Object[] { "four", 4L, 4.4, sdf.parse("2014-01-04"), false, null },
new Object[] { "five", 5L, 5.5, sdf.parse("2014-01-05"), true, null },
new Object[] { "six", 6L, 6.6, sdf.parse("2014-01-06"), false, null }
}
);
}
@Test
public void testToArrayType()
throws ParseException {
final SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
assertArrayEquals(
df.convert().toArray(Object[][].class),
new Object[][] {
new Object[] { "one", 1L, 1.1, sdf.parse("2014-01-01"), true, null },
new Object[] { "two", 2L, 2.2, sdf.parse("2014-01-02"), true, null },
new Object[] { "three", 3L, 3.3, sdf.parse("2014-01-03"), false, null },
new Object[] { "four", 4L, 4.4, sdf.parse("2014-01-04"), false, null },
new Object[] { "five", 5L, 5.5, sdf.parse("2014-01-05"), true, null },
new Object[] { "six", 6L, 6.6, sdf.parse("2014-01-06"), false, null }
}
);
}
@Test(expected=IllegalArgumentException.class)
public void testToArrayTypeInvalid() {
df.toArray(Double[][].class);
}
@Test
public void testToArrayPrimitiveType() {
assertArrayEquals(
df.convert().numeric().toArray(double[][].class),
new double[][] {
new double[] { 1.0, 1.1 },
new double[] { 2.0, 2.2 },
new double[] { 3.0, 3.3 },
new double[] { 4.0, 4.4 },
new double[] { 5.0, 5.5 },
new double[] { 6.0, 6.6 }
}
);
}
@Test
public void testToModelMatrixWithIntercept() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
DataFrame<Number> mm = Conversion.toModelMatrixDataFrame(df, null, true, null, null);
//System.out.println(mm);
// Intercept + {a,b,c}.size() + {alpha,bravo...}.size() + value
int expectedColNos = 1+2+5+1;
//System.out.println(mm.col(1));
assertEquals(expectedColNos, mm.columns().size());
// Intercept
assertEquals(1.0, mm.get(0, 0));
assertEquals(1.0, mm.get(1, 0));
assertEquals(1.0, mm.get(2, 0));
assertEquals(1.0, mm.get(3, 0));
assertEquals(1.0, mm.get(4, 0));
assertEquals(1.0, mm.get(5, 0));
// First category dummy variable i.e "a"
assertEquals(1.0, mm.get(0, 1));
assertEquals(1.0, mm.get(1, 1));
assertEquals(0.0, mm.get(2, 1));
assertEquals(0.0, mm.get(3, 1));
assertEquals(0.0, mm.get(4, 1));
assertEquals(0.0, mm.get(5, 1));
// Second category dummy variable i.e "b"
assertEquals(0.0, mm.get(0, 2));
assertEquals(0.0, mm.get(1, 2));
assertEquals(1.0, mm.get(2, 2));
assertEquals(1.0, mm.get(3, 2));
assertEquals(0.0, mm.get(4, 2));
assertEquals(0.0, mm.get(5, 2));
// First name dummy variable i.e "alpha"
assertEquals(1.0, mm.get(0, 3));
assertEquals(0.0, mm.get(1, 3));
assertEquals(0.0, mm.get(2, 3));
assertEquals(0.0, mm.get(3, 3));
assertEquals(0.0, mm.get(4, 3));
assertEquals(0.0, mm.get(5, 3));
// Second name dummy variable i.e "bravo"
assertEquals(0.0, mm.get(0, 4));
assertEquals(1.0, mm.get(1, 4));
assertEquals(0.0, mm.get(2, 4));
assertEquals(0.0, mm.get(3, 4));
assertEquals(0.0, mm.get(4, 4));
assertEquals(0.0, mm.get(5, 4));
// Third name dummy variable i.e "charlie"
assertEquals(0.0, mm.get(0, 5));
assertEquals(0.0, mm.get(1, 5));
assertEquals(1.0, mm.get(2, 5));
assertEquals(0.0, mm.get(3, 5));
assertEquals(0.0, mm.get(4, 5));
assertEquals(0.0, mm.get(5, 5));
// Forth name dummy variable i.e "delta"
assertEquals(0.0, mm.get(0, 6));
assertEquals(0.0, mm.get(1, 6));
assertEquals(0.0, mm.get(2, 6));
assertEquals(1.0, mm.get(3, 6));
assertEquals(0.0, mm.get(4, 6));
assertEquals(0.0, mm.get(5, 6));
// Fifth name dummy variable i.e "echo"
assertEquals(0.0, mm.get(0, 7));
assertEquals(0.0, mm.get(1, 7));
assertEquals(0.0, mm.get(2, 7));
assertEquals(0.0, mm.get(3, 7));
assertEquals(1.0, mm.get(4, 7));
assertEquals(0.0, mm.get(5, 7));
// Value column
assertEquals(1L, mm.get(0, 8));
assertEquals(2L, mm.get(1, 8));
assertEquals(3L, mm.get(2, 8));
assertEquals(4L, mm.get(3, 8));
assertEquals(5L, mm.get(4, 8));
assertEquals(6L, mm.get(5, 8));
}
@Test
public void testToModelMatrix() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
DataFrame<Number> mm = Conversion.toModelMatrixDataFrame(df, null, false, null, null);
//System.out.println(mm);
// {a,b,c}.size() + {alpha,bravo...}.size() + value
int expectedColNos = 2+5+1;
assertEquals(expectedColNos, mm.columns().size());
// First category dummy variable i.e "a"
assertEquals(1.0, mm.get(0, 0));
assertEquals(1.0, mm.get(1, 0));
assertEquals(0.0, mm.get(2, 0));
assertEquals(0.0, mm.get(3, 0));
assertEquals(0.0, mm.get(4, 0));
assertEquals(0.0, mm.get(5, 0));
// Second category dummy variable i.e "b"
assertEquals(0.0, mm.get(0, 1));
assertEquals(0.0, mm.get(1, 1));
assertEquals(1.0, mm.get(2, 1));
assertEquals(1.0, mm.get(3, 1));
assertEquals(0.0, mm.get(4, 1));
assertEquals(0.0, mm.get(5, 1));
// First name dummy variable i.e "alpha"
assertEquals(1.0, mm.get(0, 2));
assertEquals(0.0, mm.get(1, 2));
assertEquals(0.0, mm.get(2, 2));
assertEquals(0.0, mm.get(3, 2));
assertEquals(0.0, mm.get(4, 2));
assertEquals(0.0, mm.get(5, 2));
// Second name dummy variable i.e "bravo"
assertEquals(0.0, mm.get(0, 3));
assertEquals(1.0, mm.get(1, 3));
assertEquals(0.0, mm.get(2, 3));
assertEquals(0.0, mm.get(3, 3));
assertEquals(0.0, mm.get(4, 3));
assertEquals(0.0, mm.get(5, 3));
// Third name dummy variable i.e "charlie"
assertEquals(0.0, mm.get(0, 4));
assertEquals(0.0, mm.get(1, 4));
assertEquals(1.0, mm.get(2, 4));
assertEquals(0.0, mm.get(3, 4));
assertEquals(0.0, mm.get(4, 4));
assertEquals(0.0, mm.get(5, 4));
// Forth name dummy variable i.e "delta"
assertEquals(0.0, mm.get(0, 5));
assertEquals(0.0, mm.get(1, 5));
assertEquals(0.0, mm.get(2, 5));
assertEquals(1.0, mm.get(3, 5));
assertEquals(0.0, mm.get(4, 5));
assertEquals(0.0, mm.get(5, 5));
// Fifth name dummy variable i.e "echo"
assertEquals(0.0, mm.get(0, 6));
assertEquals(0.0, mm.get(1, 6));
assertEquals(0.0, mm.get(2, 6));
assertEquals(0.0, mm.get(3, 6));
assertEquals(1.0, mm.get(4, 6));
assertEquals(0.0, mm.get(5, 6));
// Value column
assertEquals(1L, mm.get(0, 7));
assertEquals(2L, mm.get(1, 7));
assertEquals(3L, mm.get(2, 7));
assertEquals(4L, mm.get(3, 7));
assertEquals(5L, mm.get(4, 7));
assertEquals(6L, mm.get(5, 7));
}
@Test
public void testToModelMatrixWithReferenceFactorOnAll() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
Map<String,String> references = new TreeMap<String,String>();
references.put("category","a");
references.put("name","bravo");
DataFrame<Number> mm = Conversion.toModelMatrixDataFrame(df, null, false, references, null);
//System.out.println(mm);
// {a,b,c}.size() + {alpha,bravo...}.size() + value
int expectedColNos = 2+5+1;
assertEquals(expectedColNos, mm.columns().size());
// First category dummy variable i.e "b"
assertEquals(0.0, mm.get(0, 0));
assertEquals(0.0, mm.get(1, 0));
assertEquals(1.0, mm.get(2, 0));
assertEquals(1.0, mm.get(3, 0));
assertEquals(0.0, mm.get(4, 0));
assertEquals(0.0, mm.get(5, 0));
// Second category dummy variable i.e "c"
assertEquals(0.0, mm.get(0, 1));
assertEquals(0.0, mm.get(1, 1));
assertEquals(0.0, mm.get(2, 1));
assertEquals(0.0, mm.get(3, 1));
assertEquals(1.0, mm.get(4, 1));
assertEquals(1.0, mm.get(5, 1));
// First name dummy variable i.e "alpha"
assertEquals(1.0, mm.get(0, 2));
assertEquals(0.0, mm.get(1, 2));
assertEquals(0.0, mm.get(2, 2));
assertEquals(0.0, mm.get(3, 2));
assertEquals(0.0, mm.get(4, 2));
assertEquals(0.0, mm.get(5, 2));
// Second name dummy variable i.e "charlie" since we use bravo as reference
assertEquals(0.0, mm.get(0, 3));
assertEquals(0.0, mm.get(1, 3));
assertEquals(1.0, mm.get(2, 3));
assertEquals(0.0, mm.get(3, 3));
assertEquals(0.0, mm.get(4, 3));
assertEquals(0.0, mm.get(5, 3));
// Third name dummy variable i.e "delta"
assertEquals(0.0, mm.get(0, 4));
assertEquals(0.0, mm.get(1, 4));
assertEquals(0.0, mm.get(2, 4));
assertEquals(1.0, mm.get(3, 4));
assertEquals(0.0, mm.get(4, 4));
assertEquals(0.0, mm.get(5, 4));
// Forth name dummy variable i.e "echo"
assertEquals(0.0, mm.get(0, 5));
assertEquals(0.0, mm.get(1, 5));
assertEquals(0.0, mm.get(2, 5));
assertEquals(0.0, mm.get(3, 5));
assertEquals(1.0, mm.get(4, 5));
assertEquals(0.0, mm.get(5, 5));
// Fifth name dummy variable i.e "foxtrot"
assertEquals(0.0, mm.get(0, 6));
assertEquals(0.0, mm.get(1, 6));
assertEquals(0.0, mm.get(2, 6));
assertEquals(0.0, mm.get(3, 6));
assertEquals(0.0, mm.get(4, 6));
assertEquals(1.0, mm.get(5, 6));
// Value column
assertEquals(1L, mm.get(0, 7));
assertEquals(2L, mm.get(1, 7));
assertEquals(3L, mm.get(2, 7));
assertEquals(4L, mm.get(3, 7));
assertEquals(5L, mm.get(4, 7));
assertEquals(6L, mm.get(5, 7));
}
@Test
public void testToModelMatrixWithReferenceFactorOnOne() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
Map<String,String> references = new TreeMap<String,String>();
references.put("name","bravo");
DataFrame<Number> mm = Conversion.toModelMatrixDataFrame(df, null, false, references, null);
//System.out.println(mm);
// {a,b,c}.size() + {alpha,bravo...}.size() + value
int expectedColNos = 2+5+1;
assertEquals(expectedColNos, mm.columns().size());
// First category dummy variable i.e "a"
assertEquals(1.0, mm.get(0, 0));
assertEquals(1.0, mm.get(1, 0));
assertEquals(0.0, mm.get(2, 0));
assertEquals(0.0, mm.get(3, 0));
assertEquals(0.0, mm.get(4, 0));
assertEquals(0.0, mm.get(5, 0));
// Second category dummy variable i.e "b"
assertEquals(0.0, mm.get(0, 1));
assertEquals(0.0, mm.get(1, 1));
assertEquals(1.0, mm.get(2, 1));
assertEquals(1.0, mm.get(3, 1));
assertEquals(0.0, mm.get(4, 1));
assertEquals(0.0, mm.get(5, 1));
// First name dummy variable i.e "alpha"
assertEquals(1.0, mm.get(0, 2));
assertEquals(0.0, mm.get(1, 2));
assertEquals(0.0, mm.get(2, 2));
assertEquals(0.0, mm.get(3, 2));
assertEquals(0.0, mm.get(4, 2));
assertEquals(0.0, mm.get(5, 2));
// Second name dummy variable i.e "charlie" since we use bravo as reference
assertEquals(0.0, mm.get(0, 3));
assertEquals(0.0, mm.get(1, 3));
assertEquals(1.0, mm.get(2, 3));
assertEquals(0.0, mm.get(3, 3));
assertEquals(0.0, mm.get(4, 3));
assertEquals(0.0, mm.get(5, 3));
// Third name dummy variable i.e "delta"
assertEquals(0.0, mm.get(0, 4));
assertEquals(0.0, mm.get(1, 4));
assertEquals(0.0, mm.get(2, 4));
assertEquals(1.0, mm.get(3, 4));
assertEquals(0.0, mm.get(4, 4));
assertEquals(0.0, mm.get(5, 4));
// Forth name dummy variable i.e "echo"
assertEquals(0.0, mm.get(0, 5));
assertEquals(0.0, mm.get(1, 5));
assertEquals(0.0, mm.get(2, 5));
assertEquals(0.0, mm.get(3, 5));
assertEquals(1.0, mm.get(4, 5));
assertEquals(0.0, mm.get(5, 5));
// Fifth name dummy variable i.e "foxtrot"
assertEquals(0.0, mm.get(0, 6));
assertEquals(0.0, mm.get(1, 6));
assertEquals(0.0, mm.get(2, 6));
assertEquals(0.0, mm.get(3, 6));
assertEquals(0.0, mm.get(4, 6));
assertEquals(1.0, mm.get(5, 6));
// Value column
assertEquals(1L, mm.get(0, 7));
assertEquals(2L, mm.get(1, 7));
assertEquals(3L, mm.get(2, 7));
assertEquals(4L, mm.get(3, 7));
assertEquals(5L, mm.get(4, 7));
assertEquals(6L, mm.get(5, 7));
}
@Test(expected=IllegalArgumentException.class)
public void testToModelMatrixWithWrongReference() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
Map<String,String> references = new TreeMap<String,String>();
references.put("name","gustav");
Conversion.toModelMatrixDataFrame(df, null, false, references, null);
}
@Test
public void testToModelMatrixWithReferenceFactorOnOneMissingData() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_missing_data.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
Map<String,String> references = new TreeMap<String,String>();
references.put("name","charlie");
DataFrame<Number> mm = Conversion.toModelMatrixDataFrame(df, null, false, references, null);
//System.out.println(mm);
// {a,b,c}.size() + {alpha,bravo...}.size() + value
int expectedColNos = 2+5+1;
assertEquals(expectedColNos, mm.columns().size());
// First category dummy variable i.e "a"
assertEquals(1.0, mm.get(0, 0));
assertEquals(1.0, mm.get(1, 0));
assertEquals(0.0, mm.get(2, 0));
assertEquals(0.0, mm.get(3, 0));
assertEquals(0.0, mm.get(4, 0));
assertEquals(0.0, mm.get(5, 0));
// Second category dummy variable i.e "b"
assertEquals(0.0, mm.get(0, 1));
assertEquals(0.0, mm.get(1, 1));
assertEquals(1.0, mm.get(2, 1));
assertEquals(1.0, mm.get(3, 1));
assertEquals(0.0, mm.get(4, 1));
assertEquals(0.0, mm.get(5, 1));
// Second name dummy variable i.e "NA"
assertEquals(0.0, mm.get(0, 2));
assertEquals(1.0, mm.get(1, 2));
assertEquals(0.0, mm.get(2, 2));
assertEquals(0.0, mm.get(3, 2));
assertEquals(0.0, mm.get(4, 2));
assertEquals(0.0, mm.get(5, 2));
// First name dummy variable i.e "alpha"
assertEquals(1.0, mm.get(0, 3));
assertEquals(0.0, mm.get(1, 3));
assertEquals(0.0, mm.get(2, 3));
assertEquals(0.0, mm.get(3, 3));
assertEquals(0.0, mm.get(4, 3));
assertEquals(0.0, mm.get(5, 3));
// Third name dummy variable i.e "delta"
assertEquals(0.0, mm.get(0, 4));
assertEquals(0.0, mm.get(1, 4));
assertEquals(0.0, mm.get(2, 4));
assertEquals(1.0, mm.get(3, 4));
assertEquals(0.0, mm.get(4, 4));
assertEquals(0.0, mm.get(5, 4));
// Forth name dummy variable i.e "echo"
assertEquals(0.0, mm.get(0, 5));
assertEquals(0.0, mm.get(1, 5));
assertEquals(0.0, mm.get(2, 5));
assertEquals(0.0, mm.get(3, 5));
assertEquals(1.0, mm.get(4, 5));
assertEquals(0.0, mm.get(5, 5));
// Fifth name dummy variable i.e "foxtrot"
assertEquals(0.0, mm.get(0, 6));
assertEquals(0.0, mm.get(1, 6));
assertEquals(0.0, mm.get(2, 6));
assertEquals(0.0, mm.get(3, 6));
assertEquals(0.0, mm.get(4, 6));
assertEquals(1.0, mm.get(5, 6));
// Value column
assertEquals(1L, mm.get(0, 7));
assertEquals(2L, mm.get(1, 7));
assertEquals(3L, mm.get(2, 7));
assertEquals(4L, mm.get(3, 7));
assertEquals(5L, mm.get(4, 7));
assertEquals(6L, mm.get(5, 7));
}
@Test
public void testToModelMatrixWithReferenceFactorNAOnOneMissingData() throws IOException {
DataFrame<Object> df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_missing_data.csv"));
assertEquals(3, df.columns().size());
//System.out.println(df);
//System.out.println(df.types());
Map<String,String> references = new TreeMap<String,String>();
references.put("name","NA");
DataFrame<Number> mm = Conversion.toModelMatrixDataFrame(df, null, false, references, null);
//System.out.println(mm);
// {a,b,c}.size() + {alpha,bravo...}.size() + value
int expectedColNos = 2+5+1;
assertEquals(expectedColNos, mm.columns().size());
// First category dummy variable i.e "a"
assertEquals(1.0, mm.get(0, 0));
assertEquals(1.0, mm.get(1, 0));
assertEquals(0.0, mm.get(2, 0));
assertEquals(0.0, mm.get(3, 0));
assertEquals(0.0, mm.get(4, 0));
assertEquals(0.0, mm.get(5, 0));
// Second category dummy variable i.e "b"
assertEquals(0.0, mm.get(0, 1));
assertEquals(0.0, mm.get(1, 1));
assertEquals(1.0, mm.get(2, 1));
assertEquals(1.0, mm.get(3, 1));
assertEquals(0.0, mm.get(4, 1));
assertEquals(0.0, mm.get(5, 1));
// First name dummy variable i.e "alpha"
assertEquals(1.0, mm.get(0, 2));
assertEquals(0.0, mm.get(1, 2));
assertEquals(0.0, mm.get(2, 2));
assertEquals(0.0, mm.get(3, 2));
assertEquals(0.0, mm.get(4, 2));
assertEquals(0.0, mm.get(5, 2));
// Second name dummy variable i.e "charlie" since we use bravo as reference
assertEquals(0.0, mm.get(0, 3));
assertEquals(0.0, mm.get(1, 3));
assertEquals(1.0, mm.get(2, 3));
assertEquals(0.0, mm.get(3, 3));
assertEquals(0.0, mm.get(4, 3));
assertEquals(0.0, mm.get(5, 3));
// Third name dummy variable i.e "delta"
assertEquals(0.0, mm.get(0, 4));
assertEquals(0.0, mm.get(1, 4));
assertEquals(0.0, mm.get(2, 4));
assertEquals(1.0, mm.get(3, 4));
assertEquals(0.0, mm.get(4, 4));
assertEquals(0.0, mm.get(5, 4));
// Forth name dummy variable i.e "echo"
assertEquals(0.0, mm.get(0, 5));
assertEquals(0.0, mm.get(1, 5));
assertEquals(0.0, mm.get(2, 5));
assertEquals(0.0, mm.get(3, 5));
assertEquals(1.0, mm.get(4, 5));
assertEquals(0.0, mm.get(5, 5));
// Fifth name dummy variable i.e "foxtrot"
assertEquals(0.0, mm.get(0, 6));
assertEquals(0.0, mm.get(1, 6));
assertEquals(0.0, mm.get(2, 6));
assertEquals(0.0, mm.get(3, 6));
assertEquals(0.0, mm.get(4, 6));
assertEquals(1.0, mm.get(5, 6));
// Value column
assertEquals(1L, mm.get(0, 7));
assertEquals(2L, mm.get(1, 7));
assertEquals(3L, mm.get(2, 7));
assertEquals(4L, mm.get(3, 7));
assertEquals(5L, mm.get(4, 7));
assertEquals(6L, mm.get(5, 7));
}
}