/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.studio.io.data.internal.file.excel;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.File;
import java.net.URISyntaxException;
import java.text.SimpleDateFormat;
import java.util.NoSuchElementException;
import org.junit.BeforeClass;
import org.junit.Test;
import com.rapidminer.core.io.data.DataSetException;
import com.rapidminer.core.io.data.DataSetRow;
import com.rapidminer.core.io.data.ParseException;
import com.rapidminer.operator.OperatorException;
import com.rapidminer.operator.nio.model.DataResultSet;
import com.rapidminer.operator.nio.model.ExcelResultSetConfiguration;
import com.rapidminer.studio.io.data.internal.ResultSetAdapter;
import com.rapidminer.studio.io.data.internal.file.FileDataSourceTestUtils;
import com.rapidminer.studio.io.data.internal.file.excel.ExcelResultSetAdapter;
/**
* A test case for the {@link ExcelResultSetAdapter}.
*
* @author Nils Woehler
*
*/
public class ExcelResultSetAdapterTest {
private static File testFile;
@BeforeClass
public static void setup() throws URISyntaxException {
testFile = new File(ExcelResultSetAdapterTest.class.getResource("resultSetTest.xlsx").toURI());
}
@Test
public void testSimpleImport() throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(0);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, ResultSetAdapter.NO_END_ROW)) {
int index = -1;
assertEquals(6, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
// check data content of first, 10th, and last row
if (index == 0) {
assertFirstSheetFirstRowContent(row);
} else if (index == 10) {
assertEquals(4.9, row.getDouble(0), 1e-10);
assertEquals(3.1, row.getDouble(1), 1e-10);
assertEquals(1.5, row.getDouble(2), 1e-10);
assertEquals(.1, row.getDouble(3), 1e-10);
assertEquals("id_10", row.getString(4));
assertEquals("Iris-setosa", row.getString(5));
} else if (index == 150) {
assertEquals(5.9, row.getDouble(0), 1e-10);
assertEquals(3.0, row.getDouble(1), 1e-10);
assertEquals(5.1, row.getDouble(2), 1e-10);
assertEquals(1.8, row.getDouble(3), 1e-10);
assertEquals("id_150", row.getString(4));
assertEquals("Iris-virginica", row.getString(5));
}
}
assertEquals(150, excelResultSet.getCurrentRowIndex());
// check reset
excelResultSet.reset();
assertEquals(excelResultSet.getCurrentRowIndex(), -1);
assertTrue(excelResultSet.hasNext());
assertFirstSheetFirstRowContent(excelResultSet.nextRow());
}
}
}
private void assertFirstSheetFirstRowContent(DataSetRow row) throws ParseException {
assertEquals("a1", row.getString(0));
assertEquals("a2", row.getString(1));
assertEquals("a3133333333333333331311313", row.getString(2));
assertEquals("a4", row.getString(3));
assertEquals("id", row.getString(4));
assertEquals(FileDataSourceTestUtils.getUtf8Label(), row.getString(5));
}
@Test
public void testSecondSheetImport() throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(1);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 100)) {
int index = -1;
assertEquals(3, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 0) {
assertFalse(row.isMissing(0));
assertEquals("a1", row.getString(0));
assertFalse(row.isMissing(1));
assertEquals("id", row.getString(1));
assertFalse(row.isMissing(2));
assertEquals("label", row.getString(2));
} else if (index == 10) {
assertFalse(row.isMissing(0));
assertEquals(4.9, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_10", row.getString(1));
assertFalse(row.isMissing(2));
assertEquals("Iris-setosa", row.getString(2));
}
}
}
}
}
@Test
public void testDateImport()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
String datePattern = "dd.MM.yyyy";
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
configuration.setDatePattern(datePattern);
SimpleDateFormat dateFormat = new SimpleDateFormat(datePattern);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(3));
assertEquals(dateFormat.parse("01.01.1876"), row.getDate(3));
} else if (index == 3) {
assertTrue(row.isMissing(3));
assertEquals(null, row.getDate(3));
} else if (index == 4) {
assertFalse(row.isMissing(3));
assertEquals(dateFormat.parse("02.01.1923"), row.getDate(3));
return;
}
}
}
}
}
@Test(expected = ParseException.class)
public void testDateImportWithoutDatePattern()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
String datePattern = "dd.MM.YYYY";
SimpleDateFormat dateFormat = new SimpleDateFormat(datePattern);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(3));
assertEquals(dateFormat.parse("01.01.1876"), row.getDate(3));
return;
}
}
}
}
}
@Test
public void testDateImportAsString()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(3));
assertEquals("01.01.1876", row.getString(3));
return;
}
}
}
}
}
@Test
public void testImportMissings()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 10)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 3) {
// missing numerical
assertTrue(row.isMissing(0));
assertEquals(Double.NaN, row.getDouble(0), 1e-10);
// missing date
assertTrue(row.isMissing(3));
assertEquals(null, row.getDate(3));
} else if (index == 6) {
// missing string
assertTrue(row.isMissing(2));
assertEquals(null, row.getString(2));
}
}
}
}
}
@Test
public void testFithRowAsStartRow()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 4, 10)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 0) {
assertFalse(row.isMissing(0));
assertEquals(4.6, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_4", row.getString(1));
} else if (index == 5) {
assertTrue(row.isMissing(0));
assertFalse(row.isMissing(1));
assertEquals("id_4", row.getString(1));
}
}
}
}
}
@Test
public void testFithRowAsStartAndNinthRowAsEndRow()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 4, 8)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 0) {
assertFalse(row.isMissing(0));
assertEquals(4.6, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_4", row.getString(1));
} else if (index == 4) {
assertFalse(row.isMissing(0));
assertEquals(4.7, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_3", row.getString(1));
}
}
}
}
}
@Test
public void testEndRowBehindActualData()
throws DataSetException, OperatorException, URISyntaxException, ParseException, java.text.ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(2);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 20)) {
int index = -1;
assertEquals(4, excelResultSet.getNumberOfColumns());
assertEquals(index, excelResultSet.getCurrentRowIndex());
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
DataSetRow row = excelResultSet.nextRow();
// check if index has changed
++index;
assertEquals(excelResultSet.getCurrentRowIndex(), index);
if (index == 1) {
assertFalse(row.isMissing(0));
assertEquals(5.1, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_1", row.getString(1));
} else if (index == 10) {
assertFalse(row.isMissing(0));
assertEquals(4.9, row.getDouble(0), 1e-10);
assertFalse(row.isMissing(1));
assertEquals("id_2", row.getString(1));
}
}
// cannot read more data than available
assertEquals(10, index);
}
}
}
@Test(expected = IndexOutOfBoundsException.class)
public void testAccessOutOfColumnBoundsImport()
throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(1);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 100)) {
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
excelResultSet.nextRow().getString(5);
}
}
}
}
@Test(expected = NoSuchElementException.class)
public void testAccessNextRowOutOfBoundsImport()
throws DataSetException, OperatorException, URISyntaxException, ParseException {
try (ExcelResultSetConfiguration configuration = new ExcelResultSetConfiguration()) {
// configure data import
configuration.setWorkbookFile(testFile);
configuration.setSheet(1);
try (ExcelResultSetAdapter excelResultSet = makeResultSet(configuration, 0, 100)) {
assertTrue(excelResultSet.hasNext());
while (excelResultSet.hasNext()) {
excelResultSet.nextRow();
}
excelResultSet.nextRow();
}
}
}
private ExcelResultSetAdapter makeResultSet(ExcelResultSetConfiguration configuration, int startRow, int endRow)
throws DataSetException, OperatorException {
DataResultSet dataResultSet = configuration.makeDataResultSet(null);
return new ExcelResultSetAdapter(dataResultSet, startRow, endRow);
}
}