/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.studio.io.data.internal.file.excel; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.File; import java.text.SimpleDateFormat; import org.junit.Test; import com.rapidminer.core.io.data.ColumnMetaData; import com.rapidminer.core.io.data.ColumnMetaData.ColumnType; import com.rapidminer.core.io.data.DataSet; import com.rapidminer.core.io.data.DataSetException; import com.rapidminer.core.io.data.DataSetRow; import com.rapidminer.core.io.data.ParseException; import com.rapidminer.studio.io.data.HeaderRowBehindStartRowException; import com.rapidminer.studio.io.data.HeaderRowNotFoundException; import com.rapidminer.studio.io.data.StartRowNotFoundException; import com.rapidminer.studio.io.data.internal.ResultSetAdapter; import com.rapidminer.studio.io.data.internal.file.FileDataSourceTestUtils; import com.rapidminer.tools.Tools; /** * Abstract super class for all Excel tests * * @author Nils Woehler * */ public abstract class AbstractExcelDataSourceDataTest { static File testFile; static File nominalDateTestFile; static File dateDateTestFile; @Test public void defaultMetaDataTest() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); assertFalse(dataSource.getMetadata().isFaultTolerant()); assertEquals(Tools.DATE_TIME_FORMAT.get(), dataSource.getMetadata().getDateFormat()); assertEquals(6, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(5), FileDataSourceTestUtils.getUtf8Label(), ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertEquals(-1, data.getNumberOfRows()); assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of 0th and 10th row if (index == 0) { assertFirstSheetRowContent(row); } else if (index == 9) { assertEquals(4.9, row.getDouble(0), 1e-10); assertEquals(3.1, row.getDouble(1), 1e-10); assertEquals(1.5, row.getDouble(2), 1e-10); assertEquals(.1, row.getDouble(3), 1e-10); assertEquals("id_10", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 149) { assertEquals(5.9, row.getDouble(0), 1e-10); assertEquals(3.0, row.getDouble(1), 1e-10); assertEquals(5.1, row.getDouble(2), 1e-10); assertEquals(1.8, row.getDouble(3), 1e-10); assertEquals("id_150", row.getString(4)); assertEquals("Iris-virginica", row.getString(5)); } } assertEquals(149, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); assertFirstSheetRowContent(data.nextRow()); } } } @Test(expected = HeaderRowBehindStartRowException.class) public void headerRowBehindStartRow() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); configureDataSource(dataSource); // set header row behind the start row dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(1); // configure meta data dataSource.createMetaData(); } } @Test(expected = HeaderRowBehindStartRowException.class) public void headerRowBehindStartRow2() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); configureDataSource(dataSource); // set header row behind data start row dataSource.getResultSetConfiguration().setRowOffset(10); dataSource.setHeaderRowIndex(15); // configure the meta data dataSource.createMetaData(); } } @Test(expected = StartRowNotFoundException.class) public void startRowNotAvailable() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); configureDataSource(dataSource); // set start row behind actual data content dataSource.getResultSetConfiguration().setRowOffset(151); dataSource.setHeaderRowIndex(150); // configure the meta data dataSource.createMetaData(); } } @Test(expected = HeaderRowNotFoundException.class) public void headerRowNotFound() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.getResultSetConfiguration().setRowOffset(155); dataSource.setHeaderRowIndex(155); dataSource.createMetaData(); } } @Test public void dataContentStartsAtFithRow() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.getResultSetConfiguration().setRowOffset(4); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(5), FileDataSourceTestUtils.getUtf8Label(), ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th, last row if (index == 0) { assertEquals(4.6, row.getDouble(0), 1e-10); assertEquals(3.1, row.getDouble(1), 1e-10); assertEquals(1.5, row.getDouble(2), 1e-10); assertEquals(.2, row.getDouble(3), 1e-10); assertEquals("id_4", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 9) { assertEquals(4.8, row.getDouble(0), 1e-10); assertEquals(3.0, row.getDouble(1), 1e-10); assertEquals(1.4, row.getDouble(2), 1e-10); assertEquals(.1, row.getDouble(3), 1e-10); assertEquals("id_13", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 146) { assertEquals(5.9, row.getDouble(0), 1e-10); assertEquals(3.0, row.getDouble(1), 1e-10); assertEquals(5.1, row.getDouble(2), 1e-10); assertEquals(1.8, row.getDouble(3), 1e-10); assertEquals("id_150", row.getString(4)); assertEquals("Iris-virginica", row.getString(5)); } } assertEquals(146, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); } } } @Test public void noHeaderRowDefined() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.getResultSetConfiguration().setRowOffset(0); dataSource.setHeaderRowIndex(ResultSetAdapter.NO_HEADER_ROW); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "A", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "B", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "C", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "D", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "E", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(5), "F", ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertEquals(-1, data.getNumberOfRows()); assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th, last row if (index == 0) { assertEquals("a1", row.getString(0)); assertEquals("a2", row.getString(1)); assertEquals("a3133333333333333331311313", row.getString(2)); assertEquals("a4", row.getString(3)); assertEquals("id", row.getString(4)); assertEquals(FileDataSourceTestUtils.getUtf8Label(), row.getString(5)); } else if (index == 9) { assertEquals("2.9", row.getString(1)); assertEquals("1.4", row.getString(2)); assertEquals(getCategoricalBelowOne(), row.getString(3)); assertEquals("id_9", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 150) { assertEquals("5.9", row.getString(0)); assertEquals(getCategoricalInteger(), row.getString(1)); assertEquals("1.8", row.getString(3)); assertEquals("id_150", row.getString(4)); assertEquals("Iris-virginica", row.getString(5)); } } assertEquals(150, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); } } } /** * @return result for categorical integer number */ protected abstract String getCategoricalInteger(); /** * @return result for categorical number below 1.0 */ protected abstract String getCategoricalBelowOne(); @Test public void missingInHeaderRow() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(3); dataSource.getResultSetConfiguration().setRowOffset(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "label", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "D", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "a4", ColumnType.REAL); } } @Test public void dataContentStartsAtFithRowHeaderRowAsSecondRow() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); configureDataSource(dataSource); // set content start row to 5th row and header row to second row dataSource.getResultSetConfiguration().setRowOffset(4); dataSource.setHeaderRowIndex(1); // use default guessed meta data dataSource.createMetaData(); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "5.1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "3.5", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "1.4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "0.2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "id_1", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(5), "Iris-setosa", ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertEquals(-1, data.getNumberOfRows()); assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th and last row if (index == 0) { assertEquals(4.6, row.getDouble(0), 1e-10); assertEquals(3.1, row.getDouble(1), 1e-10); assertEquals(1.5, row.getDouble(2), 1e-10); assertEquals(.2, row.getDouble(3), 1e-10); assertEquals("id_4", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 9) { assertEquals(4.8, row.getDouble(0), 1e-10); assertEquals(3.0, row.getDouble(1), 1e-10); assertEquals(1.4, row.getDouble(2), 1e-10); assertEquals(.1, row.getDouble(3), 1e-10); assertEquals("id_13", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 146) { assertEquals(5.9, row.getDouble(0), 1e-10); assertEquals(3.0, row.getDouble(1), 1e-10); assertEquals(5.1, row.getDouble(2), 1e-10); assertEquals(1.8, row.getDouble(3), 1e-10); assertEquals("id_150", row.getString(4)); assertEquals("Iris-virginica", row.getString(5)); } } assertEquals(146, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); } } } @Test public void firstColumnSkipped() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // skip first column dataSource.getResultSetConfiguration().setColumnOffset(1); // use default guessed meta data dataSource.createMetaData(); assertEquals(5, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), FileDataSourceTestUtils.getUtf8Label(), ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th and last row if (index == 0) { assertFirstSheetRowContent(row, 1); } else if (index == 9) { assertEquals(3.1, row.getDouble(0), 1e-10); assertEquals(1.5, row.getDouble(1), 1e-10); assertEquals(.1, row.getDouble(2), 1e-10); assertEquals("id_10", row.getString(3)); assertEquals("Iris-setosa", row.getString(4)); } else if (index == 149) { assertEquals(3.0, row.getDouble(0), 1e-10); assertEquals(5.1, row.getDouble(1), 1e-10); assertEquals(1.8, row.getDouble(2), 1e-10); assertEquals("id_150", row.getString(3)); assertEquals("Iris-virginica", row.getString(4)); } } assertEquals(149, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); assertFirstSheetRowContent(data.nextRow(), 1); } } } @Test public void firstAndSecondColumnSkipped() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // skip first two columns dataSource.getResultSetConfiguration().setColumnOffset(2); // use default guessed meta data dataSource.createMetaData(); assertEquals(4, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), FileDataSourceTestUtils.getUtf8Label(), ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th and last row if (index == 0) { assertFirstSheetRowContent(row, 2); } else if (index == 9) { assertEquals(1.5, row.getDouble(0), 1e-10); assertEquals(.1, row.getDouble(1), 1e-10); assertEquals("id_10", row.getString(2)); assertEquals("Iris-setosa", row.getString(3)); } else if (index == 149) { assertEquals(5.1, row.getDouble(0), 1e-10); assertEquals(1.8, row.getDouble(1), 1e-10); assertEquals("id_150", row.getString(2)); assertEquals("Iris-virginica", row.getString(3)); } } assertEquals(149, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); assertFirstSheetRowContent(data.nextRow(), 2); } } } @Test public void lastColumnSkipped() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // remove last column dataSource.getResultSetConfiguration().setColumnLast(4); // use default guessed meta data dataSource.createMetaData(); assertEquals(5, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "id", ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertEquals(-1, data.getNumberOfRows()); assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th and last row if (index == 0) { assertFirstSheetRowContent(row, 0, 4); } else if (index == 9) { assertEquals(4.9, row.getDouble(0), 1e-10); assertEquals(3.1, row.getDouble(1), 1e-10); assertEquals(1.5, row.getDouble(2), 1e-10); assertEquals(.1, row.getDouble(3), 1e-10); assertEquals("id_10", row.getString(4)); } else if (index == 149) { assertEquals(5.9, row.getDouble(0), 1e-10); assertEquals(3.0, row.getDouble(1), 1e-10); assertEquals(5.1, row.getDouble(2), 1e-10); assertEquals(1.8, row.getDouble(3), 1e-10); assertEquals("id_150", row.getString(4)); } } assertEquals(149, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); assertFirstSheetRowContent(data.nextRow(), 0, 4); } } } @Test public void firstAndLastColumnSkipped() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // remove first column dataSource.getResultSetConfiguration().setColumnOffset(1); // remove last column dataSource.getResultSetConfiguration().setColumnLast(4); // use default guessed meta data dataSource.createMetaData(); assertEquals(4, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "id", ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th and last row if (index == 0) { assertFirstSheetRowContent(row, 1, 4); } else if (index == 9) { assertEquals(3.1, row.getDouble(0), 1e-10); assertEquals(1.5, row.getDouble(1), 1e-10); assertEquals(.1, row.getDouble(2), 1e-10); assertEquals("id_10", row.getString(3)); } else if (index == 149) { assertEquals(3.0, row.getDouble(0), 1e-10); assertEquals(5.1, row.getDouble(1), 1e-10); assertEquals(1.8, row.getDouble(2), 1e-10); assertEquals("id_150", row.getString(3)); } } assertEquals(149, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); assertFirstSheetRowContent(data.nextRow(), 1, 4); } } } @Test public void lastRowDefined() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // set last row to 100th data row // (index = 100 because column names are retrieved from index 0) dataSource.getResultSetConfiguration().setRowLast(100); // use default guessed meta data dataSource.createMetaData(); assertFalse(dataSource.getMetadata().isFaultTolerant()); assertEquals(Tools.DATE_TIME_FORMAT.get(), dataSource.getMetadata().getDateFormat()); assertEquals(6, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(5), FileDataSourceTestUtils.getUtf8Label(), ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first, 10th and last row if (index == 0) { assertFirstSheetRowContent(row); } else if (index == 9) { assertEquals(4.9, row.getDouble(0), 1e-10); assertEquals(3.1, row.getDouble(1), 1e-10); assertEquals(1.5, row.getDouble(2), 1e-10); assertEquals(.1, row.getDouble(3), 1e-10); assertEquals("id_10", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 99) { assertEquals(5.7, row.getDouble(0), 1e-10); assertEquals(2.8, row.getDouble(1), 1e-10); assertEquals(4.1, row.getDouble(2), 1e-10); assertEquals(1.3, row.getDouble(3), 1e-10); assertEquals("id_100", row.getString(4)); assertEquals("Iris-versicolor", row.getString(5)); } } assertEquals(99, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); assertFirstSheetRowContent(data.nextRow()); } } } @Test public void firstAndLastDataRowDefined() throws DataSetException, ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); configureDataSource(dataSource); // start with 50th data row dataSource.getResultSetConfiguration().setRowOffset(50); // set last row to 100th data row // (index = 100 because column names are retrieved from index 0) dataSource.getResultSetConfiguration().setRowLast(100); // use default guessed meta data dataSource.createMetaData(); assertFalse(dataSource.getMetadata().isFaultTolerant()); assertEquals(Tools.DATE_TIME_FORMAT.get(), dataSource.getMetadata().getDateFormat()); assertEquals(6, dataSource.getMetadata().getColumnMetaData().size()); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(0), "a1", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(1), "a2", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(2), "a3133333333333333331311313", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(3), "a4", ColumnType.REAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(4), "id", ColumnType.CATEGORICAL); checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(5), FileDataSourceTestUtils.getUtf8Label(), ColumnType.CATEGORICAL); try (DataSet data = dataSource.getData()) { assertTrue(data.hasNext()); int index = -1; assertEquals(index, data.getCurrentRowIndex()); while (data.hasNext()) { DataSetRow row = data.nextRow(); ++index; assertEquals(index, data.getCurrentRowIndex()); // check data content of first and last row if (index == 0) { assertEquals(5.0, row.getDouble(0), 1e-10); assertEquals(3.3, row.getDouble(1), 1e-10); assertEquals(1.4, row.getDouble(2), 1e-10); assertEquals(.2, row.getDouble(3), 1e-10); assertEquals("id_50", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } else if (index == 50) { assertEquals(5.7, row.getDouble(0), 1e-10); assertEquals(2.8, row.getDouble(1), 1e-10); assertEquals(4.1, row.getDouble(2), 1e-10); assertEquals(1.3, row.getDouble(3), 1e-10); assertEquals("id_100", row.getString(4)); assertEquals("Iris-versicolor", row.getString(5)); } } assertEquals(50, data.getCurrentRowIndex()); // check reset data.reset(); assertEquals(data.getCurrentRowIndex(), -1); assertTrue(data.hasNext()); // check first row one more time DataSetRow row = data.nextRow(); assertEquals(5.0, row.getDouble(0), 1e-10); assertEquals(3.3, row.getDouble(1), 1e-10); assertEquals(1.4, row.getDouble(2), 1e-10); assertEquals(.2, row.getDouble(3), 1e-10); assertEquals("id_50", row.getString(4)); assertEquals("Iris-setosa", row.getString(5)); } } } @Test public void cachingTest() throws DataSetException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); DataSet firstPreviewSet = dataSource.getPreview(500); DataSet firstDataSet = dataSource.getData(); DataSet secondPreviewSet = dataSource.getPreview(500); DataSet secondDataSet = dataSource.getData(); assertTrue(firstPreviewSet == secondPreviewSet); assertTrue(firstDataSet == secondDataSet); dataSource.getResultSetConfiguration().setColumnOffset(3); DataSet thirdPreviewSet = dataSource.getPreview(500); DataSet thirdDataSet = dataSource.getData(); assertFalse(thirdPreviewSet == firstPreviewSet); assertFalse(thirdDataSet == firstDataSet); } } @Test public void onlyHeaderRowSelected() throws DataSetException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.setHeaderRowIndex(0); dataSource.getResultSetConfiguration().setRowLast(0); dataSource.getResultSetConfiguration().setRowOffset(0); DataSet firstDataSet = dataSource.getData(); assertFalse(firstDataSet.hasNext()); } } @Test public void lengthTest() throws DataSetException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(testFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); dataSource.getResultSetConfiguration().setRowOffset(3); configureDataSource(dataSource); DataSet preview = dataSource.getPreview(10); preview.reset(); assertEquals(-1, preview.getCurrentRowIndex()); while (preview.hasNext()) { preview.nextRow(); } assertEquals(9, preview.getCurrentRowIndex()); DataSet set = dataSource.getData(); set.reset(); assertEquals(-1, set.getCurrentRowIndex()); while (set.hasNext()) { set.nextRow(); } assertEquals(147, set.getCurrentRowIndex()); } } @Test public void simpleNominalToDateTest() throws DataSetException, ParseException, IndexOutOfBoundsException, java.text.ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(nominalDateTestFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); int dateColumnIndex = 6; SimpleDateFormat dateFormat = new SimpleDateFormat("M/d/yy h:mm a"); // check meta data and set to date checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(dateColumnIndex), "date", ColumnType.CATEGORICAL); dataSource.getMetadata().getColumnMetaData().get(dateColumnIndex).setType(ColumnType.DATETIME); // set correct date format dataSource.getMetadata().setDateFormat(dateFormat); DataSet ds = dataSource.getData(); while (ds.hasNext()) { DataSetRow row = ds.nextRow(); if (ds.getCurrentRowIndex() != 65) { assertFalse(row.isMissing(dateColumnIndex)); } else { assertTrue(row.isMissing(dateColumnIndex)); } if (ds.getCurrentRowIndex() == 20) { assertEquals(dateFormat.parse("2/2/17 8:24 AM"), row.getDate(dateColumnIndex)); } else if (ds.getCurrentRowIndex() == 50) { assertEquals(dateFormat.parse("6/11/16 12:24 PM"), row.getDate(dateColumnIndex)); } } } } @Test(expected = ParseException.class) public void wrongDateFormatTest() throws DataSetException, ParseException, IndexOutOfBoundsException, java.text.ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(nominalDateTestFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); int dateColumnIndex = 6; // check meta data and set to date checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(dateColumnIndex), "date", ColumnType.CATEGORICAL); dataSource.getMetadata().getColumnMetaData().get(dateColumnIndex).setType(ColumnType.DATETIME); DataSet ds = dataSource.getData(); while (ds.hasNext()) { DataSetRow row = ds.nextRow(); // will throw a parse exception row.getDate(dateColumnIndex); } } } @Test public void simpleDateToDateTest() throws DataSetException, ParseException, IndexOutOfBoundsException, java.text.ParseException { try (ExcelDataSource dataSource = new ExcelDataSource()) { dataSource.setLocation(dateDateTestFile.toPath()); dataSource.getResultSetConfiguration().setSheet(0); configureDataSource(dataSource); // use default guessed meta data dataSource.createMetaData(); int dateColumnIndex = 6; // check meta data and set to date checkColumnMetaData(dataSource.getMetadata().getColumnMetaData(dateColumnIndex), "date", ColumnType.DATETIME); DataSet ds = dataSource.getData(); while (ds.hasNext()) { DataSetRow row = ds.nextRow(); if (ds.getCurrentRowIndex() != 65) { assertFalse(row.isMissing(dateColumnIndex)); } else { assertTrue(row.isMissing(dateColumnIndex)); } if (ds.getCurrentRowIndex() == 20) { assertEquals(getDataToDateRow20(), row.getDate(dateColumnIndex).getTime()); } else if (ds.getCurrentRowIndex() == 50) { assertEquals(getDateToDateRow50(), row.getDate(dateColumnIndex).getTime()); } } } } /** * @return the date value at row 20 */ protected abstract long getDataToDateRow20(); /** * @return the date value at row 50 */ protected abstract long getDateToDateRow50(); /** * Called by every test that uses a {@link ExcelDataSource} to adapt to XLS and XLSX special * configurations * * @param dataSource * the data source to configure */ protected abstract void configureDataSource(ExcelDataSource dataSource); private void assertFirstSheetRowContent(DataSetRow row) throws ParseException { assertFirstSheetRowContent(row, 0, 5); } private void assertFirstSheetRowContent(DataSetRow row, int firstColumn) throws ParseException { assertFirstSheetRowContent(row, firstColumn, 5); } private void assertFirstSheetRowContent(DataSetRow row, int firstColumn, int lastColumn) throws ParseException { if (firstColumn < 1) { assertEquals(5.1, row.getDouble(0), 1e-10); } if (firstColumn < 2) { assertEquals(3.5, row.getDouble(1 - firstColumn), 1e-10); } if (firstColumn < 3 && lastColumn >= 2) { assertEquals(1.4, row.getDouble(2 - firstColumn), 1e-10); } if (firstColumn < 4 && lastColumn >= 3) { assertEquals(0.2, row.getDouble(3 - firstColumn), 1e-10); } if (firstColumn < 5 && lastColumn >= 4) { assertEquals("id_1", row.getString(4 - firstColumn)); } if (firstColumn < 6 && lastColumn >= 5) { assertEquals("Iris-setosa", row.getString(5 - firstColumn)); } } private void checkColumnMetaData(ColumnMetaData columnMetaData, String name, ColumnType type) { checkColumnMetaData(columnMetaData, name, null, type); } private void checkColumnMetaData(ColumnMetaData columnMetaData, String name, String role, ColumnType type) { assertEquals(name, columnMetaData.getName()); assertEquals(role, columnMetaData.getRole()); assertEquals(type, columnMetaData.getType()); } }