/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.studio.io.data.internal.file.csv;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.net.URISyntaxException;
import java.nio.charset.StandardCharsets;
import java.text.SimpleDateFormat;
import java.util.NoSuchElementException;
import org.junit.BeforeClass;
import org.junit.Test;
import com.rapidminer.core.io.data.DataSetException;
import com.rapidminer.core.io.data.DataSetRow;
import com.rapidminer.core.io.data.ParseException;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.nio.model.CSVResultSetConfiguration;
import com.rapidminer.operator.nio.model.DataResultSet;
import com.rapidminer.studio.io.data.internal.ResultSetAdapter;
import com.rapidminer.studio.io.data.internal.file.FileDataSourceTestUtils;
import com.rapidminer.studio.io.data.internal.file.csv.CSVDataSource;
import com.rapidminer.studio.io.data.internal.file.csv.CSVResultSetAdapter;
/**
* A test case for the {@link CSVResultSetAdapter}.
*
* @author Nils Woehler, Gisa Schaefer
*
*/
public class CSVResultSetAdapterTest {
private static File simpleTestFile;
private static File missingsTestFile;
@BeforeClass
public static void setup() throws URISyntaxException {
simpleTestFile = new File(CSVResultSetAdapterTest.class.getResource("resultSetTest.csv").toURI());
missingsTestFile = new File(CSVResultSetAdapterTest.class.getResource("missingsResultSetTest.csv").toURI());
}
@Test
public void testSimpleImport() throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(simpleTestFile.toString());
configuration.setEncoding(StandardCharsets.UTF_8);
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, ResultSetAdapter.NO_END_ROW)) {
int index = -1;
assertEquals(-1, csvResultSet.getNumberOfRows());
assertEquals(6, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
// check data content of 0th and 10th row
if (index == 0) {
assertFirstSheetFirstRowContent(row);
} else if (index == 10) {
assertEquals(4.9, row.getDouble(0), 1e-10);
assertEquals(3.1, row.getDouble(1), 1e-10);
assertEquals(1.5, row.getDouble(2), 1e-10);
assertEquals(.1, row.getDouble(3), 1e-10);
assertEquals("id_10", row.getString(4));
assertEquals("Iris-setosa", row.getString(5));
}
}
assertEquals(150, csvResultSet.getCurrentRowIndex());
// check reset
csvResultSet.reset();
assertEquals(csvResultSet.getCurrentRowIndex(), -1);
assertTrue(csvResultSet.hasNext());
assertFirstSheetFirstRowContent(csvResultSet.nextRow());
}
}
}
private void assertFirstSheetFirstRowContent(DataSetRow row) throws ParseException {
assertEquals("a1", row.getString(0));
assertEquals("a2", row.getString(1));
assertEquals("a3133333333333333331311313", row.getString(2));
assertEquals("a4", row.getString(3));
assertEquals("id", row.getString(4));
assertEquals(FileDataSourceTestUtils.getUtf8Label(), row.getString(5));
}
@Test
public void testDateImport()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
String datePattern = "dd.MM.yyyy";
// configure data import
configuration.setCsvFile(missingsTestFile.toString());
CSVDataSource dataSource = new CSVDataSource();
dataSource.getMetadata().setDateFormat(new SimpleDateFormat(datePattern));
SimpleDateFormat dateFormat = new SimpleDateFormat(datePattern);
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 10, dataSource)) {
int index = -1;
assertEquals(4, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(3));
assertEquals(dateFormat.parse("01.01.1876"), row.getDate(3));
} else if (index == 3) {
assertTrue(row.isMissing(3));
assertEquals(null, row.getDate(3));
} else if (index == 4) {
assertFalse(row.isMissing(3));
assertEquals(dateFormat.parse("02.01.1923"), row.getDate(3));
return;
}
}
}
}
}
@Test(expected = ParseException.class)
public void testDateImportWithoutDatePattern()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(simpleTestFile.toString());
String datePattern = "dd.MM.yyyy";
SimpleDateFormat dateFormat = new SimpleDateFormat(datePattern);
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(6, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(3));
assertEquals(dateFormat.parse("01.01.1876"), row.getDate(3));
return;
}
}
}
}
}
@Test
public void testDateImportAsString()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(missingsTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(4, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(3));
assertEquals("01.01.1876", row.getString(3));
return;
}
}
}
}
}
@Test
public void testImportMissings()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(missingsTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(4, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 3) {
// missing numerical
assertTrue(row.isMissing(0));
assertEquals(Double.NaN, row.getDouble(0), 1e-10);
// missing date
assertTrue(row.isMissing(3));
assertEquals(null, row.getDate(3));
} else if (index == 6) {
// missing string
assertTrue(row.isMissing(2));
assertEquals(null, row.getString(2));
}
}
}
}
}
@Test
public void testFithRowAsStartRow()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(simpleTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 4, 10)) {
int index = -1;
assertEquals(6, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 0) {
assertFalse(row.isMissing(0));
assertEquals(4.6, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_4", row.getString(4));
} else if (index == 6) {
assertFalse(row.isMissing(0));
assertEquals(4.9, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(4));
assertEquals("id_10", row.getString(4));
}
}
}
}
}
@Test
public void testFithRowAsStartAndNinthRowAsEndRow()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(simpleTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 4, 8)) {
int index = -1;
assertEquals(6, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 0) {
assertFalse(row.isMissing(0));
assertEquals(4.6, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(4));
assertEquals("id_4", row.getString(4));
} else if (index == 4) {
assertFalse(row.isMissing(0));
assertEquals(5.0, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(4));
assertEquals("id_8", row.getString(4));
}
}
}
}
}
@Test
public void testEndRowBehindActualData()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(missingsTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 20)) {
int index = -1;
assertEquals(4, csvResultSet.getNumberOfColumns());
assertEquals(index, csvResultSet.getCurrentRowIndex());
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
DataSetRow row = csvResultSet.nextRow();
// check if index has changed
++index;
assertEquals(csvResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(0));
assertEquals(5.1, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_1", row.getString(1));
} else if (index == 10) {
assertFalse(row.isMissing(0));
assertEquals(4.9, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_2", row.getString(1));
}
}
// cannot read more data than available
assertEquals(10, index);
}
}
}
@Test(expected = IndexOutOfBoundsException.class)
public void testAccessOutOfColumnBoundsImport()
throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(simpleTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 100)) {
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
csvResultSet.nextRow().getString(6);
}
}
}
}
@Test(expected = NoSuchElementException.class)
public void testAccessNextRowOutOfBoundsImport()
throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (CSVResultSetConfiguration configuration = new CSVResultSetConfiguration()) {
// configure data import
configuration.setCsvFile(simpleTestFile.toString());
try (CSVResultSetAdapter csvResultSet = makeResultSet(configuration, 0, 100)) {
assertTrue(csvResultSet.hasNext());
while (csvResultSet.hasNext()) {
csvResultSet.nextRow();
}
csvResultSet.nextRow();
}
}
}
private CSVResultSetAdapter makeResultSet(CSVResultSetConfiguration configuration, int startRow, int endRow)
throws DataSetException, OperatorException {
return makeResultSet(configuration, startRow, endRow, new CSVDataSource());
}
private CSVResultSetAdapter makeResultSet(CSVResultSetConfiguration configuration, int startRow, int endRow,
CSVDataSource dataSource) throws DataSetException, OperatorException {
DataResultSet dataResultSet = configuration.makeDataResultSet(null);
return new CSVResultSetAdapter(dataSource, dataResultSet, startRow, endRow);
}
}