/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.operator.preprocessing; import static org.junit.Assert.assertEquals; import java.util.Arrays; import java.util.Collection; import java.util.Random; import org.junit.Before; import org.junit.Test; import org.junit.runner.RunWith; import org.junit.runners.Parameterized; import org.junit.runners.Parameterized.Parameters; import com.rapidminer.RapidMiner; import com.rapidminer.example.Attribute; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.example.table.internal.ColumnarExampleTable; import com.rapidminer.example.test.ExampleTestTools; import com.rapidminer.test_utils.RapidAssert; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.ParameterService; /** * Tests the materialize data functionality. * * @author Marcel Michel */ @RunWith(value = Parameterized.class) public class MaterializeDataInMemoryTest { public MaterializeDataInMemoryTest(boolean legacyMode) { ParameterService.setParameterValue(RapidMiner.PROPERTY_RAPIDMINER_SYSTEM_LEGACY_DATA_MGMT, String.valueOf(legacyMode)); } @Parameters(name = "legacyMode={0}") public static Collection<Object> params() { return Arrays.asList(true, false); } @Before public void setup() { RapidMiner.initAsserters(); } /** * Creates random example sets and materializes them using the * {@link MaterializeDataInMemory#materializeExampleSet(ExampleSet)} method. The test will * compare the data management types and the actual table values of the source and result sets. */ @Test public void dataMangementTest() { for (int i = DataRowFactory.FIRST_TYPE_INDEX; i <= DataRowFactory.LAST_TYPE_INDEX; i++) { ExampleSet sourceSet = createMemoryExampleTable(100, i).createExampleSet(); ExampleSet materializedSet = MaterializeDataInMemory.materializeExampleSet(sourceSet); RapidAssert.assertEquals("ExampleSets are not equal", sourceSet, materializedSet); if (materializedSet.getExampleTable() instanceof ColumnarExampleTable) { // when a ColumnarExampleTable was created, then the data type can only be // DataRowFactory.TYPE_COLUMN_VIEW assertEquals(DataRowFactory.TYPE_COLUMN_VIEW, findDataRowType(materializedSet)); } else if (i == DataRowFactory.TYPE_COLUMN_VIEW) { // for a DataRowFactory.TYPE_COLUMN_VIEW a row of type // DataRowFactory.TYPE_DOUBLE_ARRAY is created assertEquals(DataRowFactory.TYPE_DOUBLE_ARRAY, findDataRowType(materializedSet)); } else { // otherwise the type should stay the same assertEquals(i, findDataRowType(materializedSet)); } } } /** * Creates a {@link MemoryExampleTable} with random values. * * @param size * the number of rows * @param dataManagement * the data management strategy (see {@link DataRowFactory} for more information) * @return the created example set as {@link MemoryExampleTable} */ private static MemoryExampleTable createMemoryExampleTable(int size, int dataManagement) { Attribute[] attributes = ExampleTestTools.createFourAttributes(); MemoryExampleTable exampleTable = new MemoryExampleTable(attributes); DataRowFactory rowFactory = new DataRowFactory(dataManagement, '.'); Random random = new Random(0); for (int i = 0; i < size; i++) { DataRow row = rowFactory.create(attributes.length); for (int j = 0; j < attributes.length; j++) { if (attributes[j].isNominal()) { row.set(attributes[j], random.nextInt(attributes[j].getMapping().getValues().size())); } else if (attributes[j].getValueType() == Ontology.INTEGER) { row.set(attributes[j], random.nextInt(200) - 100); } else { row.set(attributes[j], 20.0 * random.nextDouble() - 10.0); } } exampleTable.addDataRow(row); } return exampleTable; } /** * This method determines the current used data row implementation. * * @param exampleSet * the set which should be inspected * @return the data type */ private static int findDataRowType(ExampleSet exampleSet) { if (exampleSet.size() > 0) { // then determine current representation: get first row DataRow usedRow = exampleSet.getExample(0).getDataRow(); if (usedRow != null) { return usedRow.getType(); } } // default type return DataRowFactory.TYPE_DOUBLE_ARRAY; } }