/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example.table;
import java.util.logging.Level;
import com.rapidminer.example.Attribute;
import com.rapidminer.tools.LogService;
import com.rapidminer.tools.Tools;
/**
* Factory class for DataRow objects. One factory should be used for one ExampleTable only. This
* class is necessary to customize implementations of DataRowReader to create DataRows of arbitrary
* type.
*
* @author Ingo Mierswa, Simon Fischer
*/
public class DataRowFactory {
public static final String[] TYPE_NAMES = { "double_array", "float_array", "long_array", "int_array", "short_array",
"byte_array", "boolean_array", "double_sparse_array", "float_sparse_array", "long_sparse_array",
"int_sparse_array", "short_sparse_array", "byte_sparse_array", "boolean_sparse_array", "sparse_map" };
public static final int FIRST_TYPE_INDEX = 0;
public static final int TYPE_DOUBLE_ARRAY = 0;
public static final int TYPE_FLOAT_ARRAY = 1;
public static final int TYPE_LONG_ARRAY = 2;
public static final int TYPE_INT_ARRAY = 3;
public static final int TYPE_SHORT_ARRAY = 4;
public static final int TYPE_BYTE_ARRAY = 5;
public static final int TYPE_BOOLEAN_ARRAY = 6;
public static final int TYPE_DOUBLE_SPARSE_ARRAY = 7;
public static final int TYPE_FLOAT_SPARSE_ARRAY = 8;
public static final int TYPE_LONG_SPARSE_ARRAY = 9;
public static final int TYPE_INT_SPARSE_ARRAY = 10;
public static final int TYPE_SHORT_SPARSE_ARRAY = 11;
public static final int TYPE_BYTE_SPARSE_ARRAY = 12;
public static final int TYPE_BOOLEAN_SPARSE_ARRAY = 13;
public static final int TYPE_SPARSE_MAP = 14;
public static final int TYPE_COLUMN_VIEW = 15;
public static final int LAST_TYPE_INDEX = 15;
public static final int TYPE_SPECIAL = -1;
public static final char POINT_AS_DECIMAL_CHARACTER = '.';
/**
* The type can be one out of TYPE_DOUBLE_ARRAY, TYPE_FLOAT_ARRAY, TYPE_LONG_ARRAY,
* TYPE_INT_ARRAY, TYPE_SHORT_ARRAY, TYPE_BYTE_ARRAY, TYPE_BOOLEAN_ARRAY,
* TYPE_DOUBLE_SPARSE_ARRAY, TYPE_FLOAT_SPARSE_ARRAY, TYPE_LONG_SPARSE_ARRAY,
* TYPE_INT_SPARSE_ARRAY, TYPE_SHORT_SPARSE_ARRAY, TYPE_BYTE_SPARSE_ARRAY,
* TYPE_BOOLEAN_SPARSE_ARRAY, TYPE_SPARSE_MAP or TYPE_COLUMN_VIEW.
*/
private int type;
/** The decimal point character. */
private char decimalPointCharacter = POINT_AS_DECIMAL_CHARACTER;
/**
* @param type
* must be one out of TYPE_DOUBLE_ARRAY, TYPE_FLOAT_ARRAY, TYPE_LONG_ARRAY,
* TYPE_INT_ARRAY, TYPE_SHORT_ARRAY, TYPE_BYTE_ARRAY, TYPE_BOOLEAN_ARRAY,
* TYPE_DOUBLE_SPARSE_ARRAY, TYPE_FLOAT_SPARSE_ARRAY, TYPE_SHORT_SPARSE_ARRAY,
* TYPE_BYTE_SPARSE_ARRAY, TYPE_BOOLEAN_SPARSE_ARRAY, TYPE_SPARSE_MAP or
* TYPE_COLUMN_VIEW.
* @deprecated Please do not use this constructor any longer. Use the constructor
* {@link #DataRowFactory(int, char)} instead.
*/
@Deprecated
public DataRowFactory(int type) {
this(type, POINT_AS_DECIMAL_CHARACTER);
}
/**
* @param type
* must be one out of TYPE_DOUBLE_ARRAY, TYPE_FLOAT_ARRAY, TYPE_LONG_ARRAY,
* TYPE_INT_ARRAY, TYPE_SHORT_ARRAY, TYPE_BYTE_ARRAY, TYPE_BOOLEAN_ARRAY,
* TYPE_DOUBLE_SPARSE_ARRAY, TYPE_FLOAT_SPARSE_ARRAY, TYPE_LONG_SPARSE_ARRAY,
* TYPE_INT_SPARSE_ARRAY, TYPE_SHORT_SPARSE_ARRAY, TYPE_BYTE_SPARSE_ARRAY,
* TYPE_BOOLEAN_SPARSE_ARRAY, TYPE_SPARSE_MAP or TYPE_COLUMN_VIEW.
* @param decimalPointCharacter
* the letter for decimal points, usually '.'
*/
public DataRowFactory(int type, char decimalPointCharacter) {
if (type < FIRST_TYPE_INDEX || type > LAST_TYPE_INDEX) {
throw new IllegalArgumentException("Illegal data row type: " + type);
}
this.type = type;
this.decimalPointCharacter = decimalPointCharacter;
}
/** Creates a new DataRow with the given initial capacity. */
public DataRow create(int size) {
DataRow row = null;
switch (type) {
case TYPE_DOUBLE_ARRAY:
case TYPE_COLUMN_VIEW:
row = new DoubleArrayDataRow(new double[size]);
break;
case TYPE_FLOAT_ARRAY:
row = new FloatArrayDataRow(new float[size]);
break;
case TYPE_LONG_ARRAY:
row = new LongArrayDataRow(new long[size]);
break;
case TYPE_INT_ARRAY:
row = new IntArrayDataRow(new int[size]);
break;
case TYPE_SHORT_ARRAY:
row = new ShortArrayDataRow(new short[size]);
break;
case TYPE_BYTE_ARRAY:
row = new ByteArrayDataRow(new byte[size]);
break;
case TYPE_BOOLEAN_ARRAY:
row = new BooleanArrayDataRow(new boolean[size]);
break;
case TYPE_DOUBLE_SPARSE_ARRAY:
row = new DoubleSparseArrayDataRow(16);
break;
case TYPE_FLOAT_SPARSE_ARRAY:
row = new FloatSparseArrayDataRow(size >> 2);
break;
case TYPE_LONG_SPARSE_ARRAY:
row = new LongSparseArrayDataRow(size >> 2);
break;
case TYPE_INT_SPARSE_ARRAY:
row = new IntSparseArrayDataRow(size >> 2);
break;
case TYPE_SHORT_SPARSE_ARRAY:
row = new ShortSparseArrayDataRow(size >> 2);
break;
case TYPE_BYTE_SPARSE_ARRAY:
row = new ByteSparseArrayDataRow(size >> 2);
break;
case TYPE_BOOLEAN_SPARSE_ARRAY:
row = new BooleanSparseArrayDataRow(size >> 2);
break;
case TYPE_SPARSE_MAP:
row = new SparseMapDataRow();
break;
default:
}
return row;
}
/**
* Creates a data row from an array of Strings. If the corresponding attribute is nominal, the
* string is mapped to its index, otherwise it is parsed using
* <code>Double.parseDouble(String)</code> .
*
* @see FileDataRowReader
*/
public DataRow create(String[] strings, Attribute[] attributes) {
DataRow dataRow = create(strings.length);
for (int i = 0; i < strings.length; i++) {
if (strings[i] != null) {
strings[i] = strings[i].trim();
}
if (strings[i] != null && strings[i].length() > 0 && !strings[i].equals("?")) {
if (attributes[i].isNominal()) {
String unescaped = Tools.unescape(strings[i]);
dataRow.set(attributes[i], attributes[i].getMapping().mapString(unescaped));
} else {
dataRow.set(attributes[i], string2Double(strings[i], this.decimalPointCharacter));
}
} else {
dataRow.set(attributes[i], Double.NaN);
}
}
dataRow.trim();
return dataRow;
}
/**
* Creates a data row from an Object array. The classes of the object must match the value type
* of the corresponding {@link Attribute}. If the corresponding attribute is nominal,
* <code>data[i]</code> will be cast to String. If it is numerical, it will be cast to Number.
*
* @throws ClassCastException
* if data class does not match attribute type
* @see DatabaseDataRowReader
*/
public DataRow create(Object[] data, Attribute[] attributes) {
DataRow dataRow = create(data.length);
for (int i = 0; i < data.length; i++) {
if (data[i] != null) {
if (attributes[i].isNominal()) {
dataRow.set(attributes[i], attributes[i].getMapping().mapString(((String) data[i]).trim()));
} else {
dataRow.set(attributes[i], ((Number) data[i]).doubleValue());
}
} else {
dataRow.set(attributes[i], Double.NaN);
}
}
dataRow.trim();
return dataRow;
}
/**
* Creates a data row from an Object array. The classes of the object must match the value type
* of the corresponding {@link Attribute}. If the corresponding attribute is nominal,
* <code>data[i]</code> will be cast to String. If it is numerical, it will be cast to Number.
*
* @throws ClassCastException
* if data class does not match attribute type
* @see DatabaseDataRowReader
*/
public DataRow create(Double[] data, Attribute[] attributes) {
DataRow dataRow = create(data.length);
for (int i = 0; i < data.length; i++) {
if (data[i] != null) {
if (attributes[i].isNominal()) {
dataRow.set(attributes[i], attributes[i].getMapping().mapString(String.valueOf(data[i]).trim()));
} else {
dataRow.set(attributes[i], ((Number) data[i]).doubleValue());
}
} else {
dataRow.set(attributes[i], Double.NaN);
}
}
dataRow.trim();
return dataRow;
}
/** Returns the type of the created data rows. */
public int getType() {
return type;
}
// --------------------------------------------------------------------------------
private static final double string2Double(String str, char decimalPointCharacter) {
if (str == null) {
return Double.NaN;
}
try {
str = str.replace(decimalPointCharacter, POINT_AS_DECIMAL_CHARACTER);
return Double.parseDouble(str);
} catch (NumberFormatException e) {
// LogService.getGlobal().log("DataRowFactory.string2Double(String): '" + str +
// "' is not a valid number!", LogService.ERROR);
LogService.getRoot().log(Level.SEVERE,
"com.rapidminer.example.table.DataRowFactory.datarowfactory_is_not_a_valid_number", str);
return Double.NaN;
}
}
}