/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.studio.io.data; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import java.util.ArrayList; import java.util.Arrays; import java.util.Date; import java.util.List; import java.util.NoSuchElementException; import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import com.rapidminer.core.io.data.ColumnMetaData; import com.rapidminer.core.io.data.ColumnMetaData.ColumnType; import com.rapidminer.core.io.data.DataSet; import com.rapidminer.core.io.data.DataSetException; import com.rapidminer.core.io.data.DataSetRow; import com.rapidminer.core.io.data.ParseException; import com.rapidminer.example.Attribute; import com.rapidminer.example.Attributes; import com.rapidminer.example.ExampleSet; import com.rapidminer.operator.ProcessStoppedException; import com.rapidminer.operator.UserError; import com.rapidminer.studio.io.data.DataSetReader; import com.rapidminer.studio.io.data.DefaultColumnMetaData; /** * Tests the {@link DataSetReader}. * * @author Gisa Schaefer * */ public class DataSetReaderTest { private static int numberOfRows = 10; private static DataSetRow row; private static DataSet dataSet; private static Date testDate = new Date(); private DataSetReader reader; private List<ColumnMetaData> columnMetaData; @BeforeClass public static void setUpForAll() { // row with number, String, date, missing row = new DataSetRow() { @Override public Date getDate(int columnIndex) throws ParseException { switch (columnIndex) { case 0: case 1: throw new ParseException("not a date"); case 2: return testDate; } return null; } @Override public String getString(int columnIndex) throws ParseException { switch (columnIndex) { case 0: return "5.27"; case 1: return "Xxxx"; case 2: return "1.1.1900"; } return null; } @Override public double getDouble(int columnIndex) throws ParseException { switch (columnIndex) { case 0: return 5.27; case 1: case 2: throw new ParseException("not a number"); } return Double.NaN; } @Override public boolean isMissing(int columnIndex) { switch (columnIndex) { case 0: case 1: case 2: return false; } return true; } }; dataSet = new DataSet() { int counter = -1; @Override public boolean hasNext() { return counter < numberOfRows - 1; } @Override public DataSetRow nextRow() throws DataSetException, NoSuchElementException { counter++; return row; } @Override public int getCurrentRowIndex() { return counter; } @Override public void reset() throws DataSetException { counter = -1; } @Override public int getNumberOfColumns() { return 4; } @Override public int getNumberOfRows() { return numberOfRows; } @Override public void close() throws DataSetException {} }; } @Before public void setUp() { columnMetaData = new ArrayList<ColumnMetaData>( Arrays.asList(new ColumnMetaData[] { new DefaultColumnMetaData("att1", ColumnType.REAL), new DefaultColumnMetaData("att2", ColumnType.CATEGORICAL), new DefaultColumnMetaData("att3", ColumnType.DATE), new DefaultColumnMetaData("att4", ColumnType.INTEGER) })); reader = new DataSetReader(null, columnMetaData, false); } @Test public void attributeCreation() throws UserError, ProcessStoppedException, DataSetException, ParseException { ExampleSet result = reader.read(dataSet, null); Attributes attributes = result.getAttributes(); assertEquals(4, attributes.size()); assertTrue(attributes.get("att1").isNumerical()); assertTrue(attributes.get("att2").isNominal()); assertTrue(attributes.get("att3").isDateTime()); assertTrue(attributes.get("att4").isNumerical()); } @Test public void numberOfRows() throws UserError, ProcessStoppedException, DataSetException, ParseException { ExampleSet result = reader.read(dataSet, null); assertEquals(numberOfRows, result.size()); } @Test public void removingColumn() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(0).setRemoved(true); ExampleSet result = reader.read(dataSet, null); Attributes attributes = result.getAttributes(); assertEquals(null, attributes.get("att1")); assertEquals(3, attributes.size()); } @Test public void checkValue0() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(1).setRemoved(true); columnMetaData.get(2).setRemoved(true); columnMetaData.get(3).setRemoved(true); ExampleSet result = reader.read(dataSet, null); Attribute attribute = result.getAttributes().get("att1"); for (int i = 0; i < numberOfRows; i++) { assertEquals(5.27, result.getExample(i).getValue(attribute), 1e-15); } } @Test public void checkValue1() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(0).setRemoved(true); columnMetaData.get(2).setRemoved(true); columnMetaData.get(3).setRemoved(true); ExampleSet result = reader.read(dataSet, null); Attribute attribute = result.getAttributes().get("att2"); for (int i = 0; i < numberOfRows; i++) { assertEquals("Xxxx", result.getExample(0).getNominalValue(attribute)); } } @Test public void checkValue2() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(0).setRemoved(true); columnMetaData.get(1).setRemoved(true); columnMetaData.get(3).setRemoved(true); ExampleSet result = reader.read(dataSet, null); Attribute attribute = result.getAttributes().get("att3"); for (int i = 0; i < numberOfRows; i++) { assertEquals(testDate, result.getExample(i).getDateValue(attribute)); } } @Test public void checkValue3() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(0).setRemoved(true); columnMetaData.get(1).setRemoved(true); columnMetaData.get(2).setRemoved(true); ExampleSet result = reader.read(dataSet, null); Attribute attribute = result.getAttributes().get("att4"); for (int i = 0; i < numberOfRows; i++) { assertEquals(Double.NaN, result.getExample(i).getValue(attribute), 1e-15); } } @Test public void specialRole() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(0).setRole("label"); ExampleSet result = reader.read(dataSet, null); assertEquals("label", result.getAttributes().getRole("att1").getSpecialName()); } @Test(expected = ParseException.class) public void parseExceptionNominalInNumericColumn() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(1).setType(ColumnType.REAL); reader.read(dataSet, null); } @Test public void faultTolerantNominalInNumericColumn() throws UserError, ProcessStoppedException, DataSetException, ParseException { reader.setFaultTolerant(true); columnMetaData.get(1).setType(ColumnType.REAL); ExampleSet result = reader.read(dataSet, null); Attribute attribute = result.getAttributes().get("att2"); result.getExample(0).get(attribute); assertTrue(attribute.isNumerical()); assertEquals(Double.NaN, result.getExample(0).getNumericalValue(attribute), 1e-15); } @Test public void faultTolerantNominalInDateColumn() throws UserError, ProcessStoppedException, DataSetException, ParseException { reader.setFaultTolerant(true); columnMetaData.get(1).setType(ColumnType.DATE); ExampleSet result = reader.read(dataSet, null); Attribute attribute = result.getAttributes().get("att2"); result.getExample(0).get(attribute); assertTrue(attribute.isDateTime()); assertEquals(Double.NaN, result.getExample(0).getValue(attribute), 1e-15); } @Test(expected = UserError.class) public void moreColumnsInMetaDataThanInDataSet() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.add(new DefaultColumnMetaData("att5", ColumnType.BINARY)); reader.read(dataSet, null); } @Test(expected = UserError.class) public void moreColumnsInMetaDataThanInDataSetRemoveBefore() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.add(new DefaultColumnMetaData("att5", ColumnType.BINARY)); columnMetaData.get(3).setRemoved(true); reader.read(dataSet, null); } @Test public void moreColumnsInMetaDataThanInDataSetButRemoved() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.add(new DefaultColumnMetaData("att5", ColumnType.BINARY)); columnMetaData.get(4).setRemoved(true); reader.read(dataSet, null); } @Test(expected = UserError.class) public void twoColumnsWithSameName() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(3).setName("att1"); reader.read(dataSet, null); } @Test public void twoColumnsWithSameNameButRemoved() throws UserError, ProcessStoppedException, DataSetException, ParseException { columnMetaData.get(3).setName("att1"); columnMetaData.get(3).setRemoved(true); reader.read(dataSet, null); } }