/*
* Joinery -- Data frames for Java
* Copyright (c) 2014, 2015 IBM Corp.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package joinery;
import static org.hamcrest.CoreMatchers.containsString;
import static org.hamcrest.CoreMatchers.not;
import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertThat;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.Arrays;
import java.util.Date;
import org.junit.Before;
import org.junit.Test;
public class DataFrameSerializationTest {
private DataFrame<Object> df;
@Before
public void setUp()
throws Exception {
df = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization.csv"));
}
@Test(expected=FileNotFoundException.class)
public void testReadCsvString()
throws IOException {
DataFrame.readCsv("does-not-exist.csv");
}
@Test
public void testReadCsvInputStream() {
final Object[][] expected = new Object[][] {
new Object[] { "a", "a", "b", "b", "c", "c" },
new Object[] { "alpha", "bravo", "charlie", "delta", "echo", "foxtrot" },
new Object[] { 1L, 2L, 3L, 4L, 5L, 6L }
};
for (int i = 0; i < expected.length; i++) {
assertArrayEquals(
expected[i],
df.col(i).toArray()
);
}
}
@Test
public void testReadCsvNAInputStream() throws IOException {
DataFrame<Object> nadf = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_wNA.csv"), ",", "NA");
final Object[][] expected = new Object[][] {
new Object[] { "a", "a", "b", "b", "c", "c" },
new Object[] { "alpha", null, "charlie", "delta", "echo", "foxtrot" },
new Object[] { 1L, 2L, 3L, null, 5L, 6L }
};
for (int i = 0; i < expected.length; i++) {
assertArrayEquals(
expected[i],
nadf.col(i).toArray()
);
}
}
@Test
public void testReadCsvNoHeaderInputStream() throws IOException {
DataFrame<Object> df_noHeader = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_no_header.csv"), ",", "NA", false);
final Object[][] expected = new Object[][] {
new Object[] { "a", "a", "b", "b", "c", "c" },
new Object[] { "alpha", "bravo", "charlie", "delta", "echo", "foxtrot" },
new Object[] { 1L, 2L, 3L, 4L, 5L, 6L }
};
for (int i = 0; i < expected.length; i++) {
assertArrayEquals(
expected[i],
df_noHeader.col(i).toArray()
);
}
}
@Test
public void testReadCsvSemicolonInputStream() throws IOException {
DataFrame<Object> cdf = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_semicolon.csv"), ";");
final Object[][] expected = new Object[][] {
new Object[] { "a", "a", "b", "b", "c", "c" },
new Object[] { "alpha", "bravo", "charlie", "delta", "echo", "foxtrot" },
new Object[] { 1L, 2L, 3L, 4L, 5L, 6L }
};
for (int i = 0; i < expected.length; i++) {
assertArrayEquals(
expected[i],
cdf.col(i).toArray()
);
}
}
@Test
public void testReadCsvTabInputStream() throws IOException {
DataFrame<Object> cdf = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_tab.csv"), "\\t");
final Object[][] expected = new Object[][] {
new Object[] { "a", "a", "b", "b", "c", "c" },
new Object[] { "alpha", "bravo", "charlie", "delta", "echo", "foxtrot" },
new Object[] { 1L, 2L, 3L, 4L, 5L, 6L }
};
for (int i = 0; i < expected.length; i++) {
assertArrayEquals(
expected[i],
cdf.col(i).toArray()
);
}
}
@Test
public void testWriteCsvString()
throws IOException {
final File tmp = File.createTempFile(getClass().getName(), ".csv");
tmp.deleteOnExit();
df.writeCsv(tmp.getPath());
assertTrue(tmp.length() > 64);
}
@Test
public void testWriteCsvInputStream()
throws IOException {
final File tmp = File.createTempFile(getClass().getName(), ".csv");
tmp.deleteOnExit();
df.writeCsv(new FileOutputStream(tmp));
assertTrue(tmp.length() > 64);
}
@Test
public void testReadWriteCsvTypes()
throws IOException {
final File tmp = File.createTempFile(getClass().getName(), ".csv");
tmp.deleteOnExit();
final DataFrame<Object> original = new DataFrame<>("date", "long", "double", "bool", "string");
original.append(Arrays.asList(new Date(), 1L, 1.0, true, "test"));
original.writeCsv(tmp.getPath());
assertArrayEquals(
original.types().toArray(),
DataFrame.readCsv(tmp.getPath()).types().toArray()
);
}
@Test
public void testWriteCsvNonStringIndex()
throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final DataFrame<Object> df = new DataFrame<>(Arrays.asList(1L, 2L, 3L, 4L));
df.append(Arrays.asList(1, 2, 3, 4));
df.writeCsv(out);
assertTrue("writeCsv does not throw due to non-string indices", true);
}
@Test(expected=FileNotFoundException.class)
public void testReadXlsString()
throws IOException {
DataFrame.readXls("does-not-exist.xls");
}
@Test
public void testReadXlsInputStream()
throws IOException {
final DataFrame<Object> df = DataFrame.readXls(ClassLoader.getSystemResourceAsStream("serialization.xls"));
final Object[][] expected = new Object[][] {
new Object[] { "a", "a", "b", "b", "c", "c" },
new Object[] { "alpha", "bravo", "charlie", "delta", "echo", "foxtrot" },
new Object[] { 1.0, 2.0, 3.0, 4.0, 5.0, 6.0 }
};
for (int i = 0; i < expected.length; i++) {
assertArrayEquals(
expected[i],
df.col(i).toArray()
);
}
}
@Test
public void testWriteXlsString()
throws IOException {
final DataFrame<Object> df = DataFrame.readXls(ClassLoader.getSystemResourceAsStream("serialization.xls"));
final File tmp = File.createTempFile(getClass().getName(), ".xls");
tmp.deleteOnExit();
df.writeXls(tmp.getPath());
assertTrue(tmp.length() > 1024);
}
@Test
public void testWriteXlsInputStream()
throws IOException {
final DataFrame<Object> df = DataFrame.readXls(ClassLoader.getSystemResourceAsStream("serialization.xls"));
final File tmp = File.createTempFile(getClass().getName(), ".xls");
tmp.deleteOnExit();
df.writeXls(new FileOutputStream(tmp));
assertTrue(tmp.length() > 1024);
}
@Test
public void testReadWriteXlsTypes()
throws IOException {
final File tmp = File.createTempFile(getClass().getName(), ".xls");
tmp.deleteOnExit();
final DataFrame<Object> original = new DataFrame<>("date", "double", "bool", "string");
original.append(Arrays.asList(new Date(), 1.0, true, "test"));
original.writeXls(tmp.getPath());
assertArrayEquals(
original.types().toArray(),
DataFrame.readXls(tmp.getPath()).types().toArray()
);
}
@Test
public void testWriteXlsNonStringIndex()
throws IOException {
final ByteArrayOutputStream out = new ByteArrayOutputStream();
final DataFrame<Object> df = new DataFrame<>(Arrays.asList(1L, 2L, 3L, 4L));
df.append(Arrays.asList(1, 2, 3, 4));
df.writeXls(out);
assertTrue("writeXls does not throw due to non-string indices", true);
}
@Test
public void testToStringInt() {
assertThat(
df.toString(2),
containsString(String.format("... %d rows skipped ...", df.length() - 2))
);
assertEquals(
6,
df.toString(2).split("\n").length
);
}
@Test
public void testToString() {
assertThat(
df.toString(),
not(containsString("..."))
);
assertEquals(
7,
df.toString().split("\n").length
);
}
@Test
public void testToStringEmptyHeader() throws IOException {
DataFrame<Object> dfEmptyHeader = DataFrame.readCsv(ClassLoader.getSystemResourceAsStream("serialization_empty_header.csv"));
dfEmptyHeader.transpose().toString();
}
}