/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example;
import java.util.ArrayList;
import java.util.List;
import com.rapidminer.example.table.AttributeFactory;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DoubleArrayDataRow;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.example.table.MemoryExampleTable;
import com.rapidminer.tools.Ontology;
/**
* <p>This class can be used to easily create @link ExampleSet}s and the underlying
* {@link ExampleTable} with simple method calls. Please note that it is often better
* to explicitly fill the data table yourself, or, if possible, to extend {@link ExampleTable}
* or {@link DataRow} to provide the necessary data to RapidMiner. For memory usage reasons,
* it is also often not recommended to create the data matrix from your existing data
* in an extra step and then use one of the factory methods. In these cases, it is better
* to directly fill an {@link ExampleTable} from your data source.</p>
*
* <p>However, in some cases it might be more convenient to use this class in order to
* create example sets from data matrices in a fast and simple way. The resulting example
* set will be backed up by a {@link MemoryExampleTable}. If the data set
* at hand is completely numerical, one can simply use one of the double matrix methods
* provided by this class. This will lead to an {@link ExampleSet} containing only numerical
* attributes. Otherwise, one have to use the Object matrix methods. Please
* note that only String objects and Number objects (Double, Integer) are allowed in this
* case. Otherwise an Exception will be thrown. In case of the Object matrix methods the
* method tries to identify the type itself and initialized the example set with the
* correct attribute types (nominal or numerical).</p>
*
* <p>Please note that the internal representation of the nominal attribute values
* depend on the order they appear in the data set. If this is not allowed (e.g.
* for the label attribute of different training and testing sets, where the
* internal representation should be the same in order to prevent label flips)
* one should definitely use the usual ExampleTable - ExampleSet construction where
* the nominal attribute value mapping can and should be performed beforehand. In these
* cases the usage of this class is definitely not recommended.</p>
*
* @author Ingo Mierswa
* @version $Id: ExampleSetFactory.java,v 1.3 2008/05/09 19:22:42 ingomierswa Exp $
*/
public class ExampleSetFactory {
/** Create a numerical example set from the given data matrix. The resulting example
* set will not contain a label and consists of numerical attributes only. */
public static ExampleSet createExampleSet(double[][] data) {
return createExampleSet(data, null);
}
/** Create a numerical example set from the given data matrix. The label of the
* resulting example set be build from the column with the given index. The example
* set consists of numerical attributes only. */
public static ExampleSet createExampleSet(double[][] data, int classColumn) {
if (data.length == 0) {
throw new RuntimeException("ExampleSetFactory.createExampleSet(double[][], int): data matrix is not allowed to be empty.");
}
double[][] dataWithoutLabel = new double[data.length][data[0].length - 1];
double[] labels = new double[data.length];
for (int e = 0; e < data.length; e++) {
int counter = 0;
for (int a = 0; a < data[e].length; a++) {
if (a == classColumn) {
labels[e] = data[e][a];
} else {
dataWithoutLabel[e][counter++] = data[e][a];
}
}
}
return createExampleSet(dataWithoutLabel, labels);
}
/** Create a numerical example set from the given data matrix. The label of the
* resulting example set be build from the given double array. The example
* set consists of numerical attributes only. */
public static ExampleSet createExampleSet(double[][] data, double[] labels) {
if (data.length == 0) {
throw new RuntimeException("ExampleSetFactory.createExampleSet(double[][], double[]): data matrix is not allowed to be empty.");
}
// create attributes
int numberOfAttributes = data[0].length;
List<Attribute> attributeList = new ArrayList<Attribute>(numberOfAttributes + (labels != null ? 1 : 0));
for (int a = 0; a < numberOfAttributes; a++) {
attributeList.add(AttributeFactory.createAttribute("att" + (a+1), Ontology.NUMERICAL));
}
Attribute labelAttribute = null;
if (labels != null) {
labelAttribute = AttributeFactory.createAttribute("label", Ontology.NUMERICAL);
attributeList.add(labelAttribute);
}
// create table
MemoryExampleTable table = new MemoryExampleTable(attributeList);
for (int e = 0; e < data.length; e++) {
double[] dataRow = data[e];
if (labelAttribute != null) {
dataRow = new double[numberOfAttributes + 1];
System.arraycopy(data[e], 0, dataRow, 0, data[e].length);
dataRow[dataRow.length - 1] = labels[e];
}
table.addDataRow(new DoubleArrayDataRow(dataRow));
}
return table.createExampleSet(labelAttribute);
}
/** Create a mixed-type example set from the given data matrix. The resulting example
* set will not contain a label and might consist of numerical and nominal attributes. */
public static ExampleSet createExampleSet(Object[][] data) {
return createExampleSet(data, null);
}
/** Create a numerical example set from the given data matrix. The label of the
* resulting example set be build from the column with the given index. The example
* set might consist of numerical and nominal attributes. */
public static ExampleSet createExampleSet(Object[][] data, int classColumn) {
if (data.length == 0) {
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], int): data matrix is not allowed to be empty.");
}
Object[][] dataWithoutLabel = new Object[data.length][data[0].length - 1];
Object[] labels = new Object[data.length];
for (int e = 0; e < data.length; e++) {
int counter = 0;
for (int a = 0; a < data[e].length; a++) {
if (a == classColumn) {
labels[e] = data[e][a];
} else {
dataWithoutLabel[e][counter++] = data[e][a];
}
}
}
return createExampleSet(dataWithoutLabel, labels);
}
/** Create a numerical example set from the given data matrix. The label of the
* resulting example set be build from the given double array. The example
* set might consist of numerical and nominal attributes. */
public static ExampleSet createExampleSet(Object[][] data, Object[] labels) {
if (data.length == 0) {
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): data matrix is not allowed to be empty.");
}
// create attributes
int numberOfAttributes = data[0].length;
int totalNumber = numberOfAttributes + (labels != null ? 1 : 0);
boolean[] nominal = new boolean[totalNumber];
List<Attribute> attributeList = new ArrayList<Attribute>(totalNumber);
for (int a = 0; a < numberOfAttributes; a++) {
Object current = data[0][a];
if (current instanceof Number) {
attributeList.add(AttributeFactory.createAttribute("att" + (a+1), Ontology.NUMERICAL));
nominal[a] = false;
} else if (current instanceof String) {
attributeList.add(AttributeFactory.createAttribute("att" + (a+1), Ontology.NOMINAL));
nominal[a] = true;
} else {
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): only objects of type String or Number (Double, Integer) are allowed for the object data matrix.");
}
}
Attribute labelAttribute = null;
if (labels != null) {
Object current = labels[0];
if (current instanceof Number) {
labelAttribute = AttributeFactory.createAttribute("label", Ontology.NUMERICAL);
nominal[nominal.length - 1] = false;
} else if (current instanceof String) {
labelAttribute = AttributeFactory.createAttribute("label", Ontology.NOMINAL);
nominal[nominal.length - 1] = true;
} else {
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): only objects of type String or Number (Double, Integer) are allowed for the object data matrix.");
}
attributeList.add(labelAttribute);
}
// create table
MemoryExampleTable table = new MemoryExampleTable(attributeList);
for (int e = 0; e < data.length; e++) {
double[] dataRow = new double[totalNumber];
for (int a = 0; a < numberOfAttributes; a++) {
Object current = data[e][a];
if (current instanceof Number) {
if (nominal[a])
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): type of objects did change in column. Only the same type of objects is allowed for complete columns.");
dataRow[a] = ((Number)current).doubleValue();
} else if (current instanceof String) {
if (!nominal[a])
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): type of objects did change in column. Only the same type of objects is allowed for complete columns.");
dataRow[a] = attributeList.get(a).getMapping().mapString((String)current);
} else {
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): only objects of type String or Number (Double, Integer) are allowed for the object data matrix.");
}
}
if (labelAttribute != null) {
Object current = labels[e];
if (current instanceof Number) {
if (nominal[nominal.length - 1])
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): type of objects did change in column. Only the same type of objects is allowed for complete columns.");
dataRow[dataRow.length - 1] = ((Number)current).doubleValue();
} else if (current instanceof String) {
if (!nominal[nominal.length - 1])
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): type of objects did change in column. Only the same type of objects is allowed for complete columns.");
dataRow[dataRow.length - 1] = attributeList.get(attributeList.size() - 1).getMapping().mapString((String)current);
} else {
throw new RuntimeException("ExampleSetFactory.createExampleSet(Object[][], Object[]): only objects of type String or Number (Double, Integer) are allowed for the object data matrix.");
}
}
table.addDataRow(new DoubleArrayDataRow(dataRow));
}
return table.createExampleSet(labelAttribute);
}
}