/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.table; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import com.rapidminer.example.Attribute; import com.rapidminer.tools.LogService; import de.tud.inf.example.set.attributevalues.ComplexValue; import de.tud.inf.example.table.ComplexAttribute; import de.tud.inf.example.table.RelationalAttribute; /** * Factory class for DataRow objects. One factory should be used for one * ExampleTable only. This class is necessary to customize implementations of * DataRowReader to create DataRows of arbitrary type. * * @author Ingo Mierswa, Simon Fischer * @version $Id: DataRowFactory.java,v 1.10 2008/07/13 23:25:24 ingomierswa Exp $ */ public class DataRowFactory { public static final String[] TYPE_NAMES = { "double_array", "float_array", "long_array", "int_array", "short_array", "byte_array", "boolean_array", "double_sparse_array", "float_sparse_array", "long_sparse_array", "int_sparse_array", "short_sparse_array", "byte_sparse_array", "boolean_sparse_array", "sparse_map" }; public static final int FIRST_TYPE_INDEX = 0; public static final int TYPE_DOUBLE_ARRAY = 0; public static final int TYPE_FLOAT_ARRAY = 1; public static final int TYPE_LONG_ARRAY = 2; public static final int TYPE_INT_ARRAY = 3; public static final int TYPE_SHORT_ARRAY = 4; public static final int TYPE_BYTE_ARRAY = 5; public static final int TYPE_BOOLEAN_ARRAY = 6; public static final int TYPE_DOUBLE_SPARSE_ARRAY = 7; public static final int TYPE_FLOAT_SPARSE_ARRAY = 8; public static final int TYPE_LONG_SPARSE_ARRAY = 9; public static final int TYPE_INT_SPARSE_ARRAY = 10; public static final int TYPE_SHORT_SPARSE_ARRAY = 11; public static final int TYPE_BYTE_SPARSE_ARRAY = 12; public static final int TYPE_BOOLEAN_SPARSE_ARRAY = 13; public static final int TYPE_SPARSE_MAP = 14; public static final int TYPE_RELATIONAL_EXTENDED = 15; public static final int LAST_TYPE_INDEX = 15; /** * The type can be one out of * TYPE_DOUBLE_ARRAY, TYPE_FLOAT_ARRAY, TYPE_LONG_ARRAY, TYPE_INT_ARRAY, TYPE_SHORT_ARRAY, TYPE_BYTE_ARRAY, TYPE_BOOLEAN_ARRAY, * TYPE_DOUBLE_SPARSE_ARRAY, TYPE_FLOAT_SPARSE_ARRAY, TYPE_LONG_SPARSE_ARRAY, TYPE_INT_SPARSE_ARRAY, TYPE_SHORT_SPARSE_ARRAY, TYPE_BYTE_SPARSE_ARRAY, TYPE_BOOLEAN_SPARSE_ARRAY, * or TYPE_SPARSE_MAP. */ private int type; /** The decimal point character. */ private char decimalPointCharacter = '.'; /** relational attribute instance seperators */ private final String valueSep = ","; private final String tupleSep = "\n"; /** * @param type * must be one out of * TYPE_DOUBLE_ARRAY, TYPE_FLOAT_ARRAY, TYPE_LONG_ARRAY, TYPE_INT_ARRAY, TYPE_SHORT_ARRAY, TYPE_BYTE_ARRAY, TYPE_BOOLEAN_ARRAY, * TYPE_DOUBLE_SPARSE_ARRAY, TYPE_FLOAT_SPARSE_ARRAY, TYPE_SHORT_SPARSE_ARRAY, TYPE_BYTE_SPARSE_ARRAY, TYPE_BOOLEAN_SPARSE_ARRAY, * or TYPE_SPARSE_MAP. * @deprecated Please do not use this constructor any longer. Use the constructor {@link #DataRowFactory(int, char)} instead. */ @Deprecated public DataRowFactory(int type) { this(type, '.'); } /** * @param type * must be one out of * TYPE_DOUBLE_ARRAY, TYPE_FLOAT_ARRAY, TYPE_LONG_ARRAY, TYPE_INT_ARRAY, TYPE_SHORT_ARRAY, TYPE_BYTE_ARRAY, TYPE_BOOLEAN_ARRAY, * TYPE_DOUBLE_SPARSE_ARRAY, TYPE_FLOAT_SPARSE_ARRAY, TYPE_LONG_SPARSE_ARRAY, TYPE_INT_SPARSE_ARRAY, TYPE_SHORT_SPARSE_ARRAY, TYPE_BYTE_SPARSE_ARRAY, TYPE_BOOLEAN_SPARSE_ARRAY, * or TYPE_SPARSE_MAP. * @param decimalPointCharacter the letter for decimal points, usually '.' */ public DataRowFactory(int type, char decimalPointCharacter) { if ((type < FIRST_TYPE_INDEX) || (type > LAST_TYPE_INDEX)) throw new IllegalArgumentException("Illegal data row type: " + type); this.type = type; this.decimalPointCharacter = decimalPointCharacter; } /** Creates a new DataRow with the given initial capacity. */ public DataRow create(int size) { DataRow row = null; switch (type) { case TYPE_DOUBLE_ARRAY: row = new DoubleArrayDataRow(new double[size]); break; case TYPE_FLOAT_ARRAY: row = new FloatArrayDataRow(new float[size]); break; case TYPE_LONG_ARRAY: row = new LongArrayDataRow(new long[size]); break; case TYPE_INT_ARRAY: row = new IntArrayDataRow(new int[size]); break; case TYPE_SHORT_ARRAY: row = new ShortArrayDataRow(new short[size]); break; case TYPE_BYTE_ARRAY: row = new ByteArrayDataRow(new byte[size]); break; case TYPE_BOOLEAN_ARRAY: row = new BooleanArrayDataRow(new boolean[size]); break; case TYPE_DOUBLE_SPARSE_ARRAY: row = new DoubleSparseArrayDataRow(size >> 2); break; case TYPE_FLOAT_SPARSE_ARRAY: row = new FloatSparseArrayDataRow(size >> 2); break; case TYPE_LONG_SPARSE_ARRAY: row = new LongSparseArrayDataRow(size >> 2); break; case TYPE_INT_SPARSE_ARRAY: row = new IntSparseArrayDataRow(size >> 2); break; case TYPE_SHORT_SPARSE_ARRAY: row = new ShortSparseArrayDataRow(size >> 2); break; case TYPE_BYTE_SPARSE_ARRAY: row = new ByteSparseArrayDataRow(size >> 2); break; case TYPE_BOOLEAN_SPARSE_ARRAY: row = new BooleanSparseArrayDataRow(size >> 2); break; case TYPE_SPARSE_MAP: row = new SparseMapDataRow(); break; default: } return row; } /** * Creates a data row from an array of Strings. If the corresponding * attribute is nominal, the string is mapped to its index, otherwise it is * parsed using <code>Double.parseDouble(String)</code> . * * @see FileDataRowReader */ public DataRow create(String[] strings, Attribute[] attributes) { DataRow dataRow = create(strings.length); //if null, attribute set contains no relational attributes Map<Integer,double [][]> rlValues = null; for (int i = 0; i < strings.length; i++) { if (strings[i] != null) strings[i] = strings[i].trim(); if ((strings[i] != null) && (strings[i].length() > 0) && (!strings[i].equals("?"))) { if (attributes[i].isNominal()) { dataRow.set(attributes[i], attributes[i].getMapping().mapString(strings[i])); } else if(attributes[i].isRelational()){ dataRow.initRelationalMap(); if(rlValues == null) rlValues = new HashMap<Integer, double[][]>(); //then string[i] contains arbitrary tuples of innerAttributs.size() - number of attributes //saves them as double.. get attribute information from ComplexAttribute.getInnerAttributes.type RelationalAttribute ca = (RelationalAttribute)attributes[i]; //in overall dataRow set index "pointer" on entry in Map rlValue at position i (later table index) //separate tuples of relational attribute String[] tuples = strings[i].split(tupleSep); //separate instances of innerAttributes double[][] rlValue = new double[tuples.length][]; for(int tpl = 0;tpl<rlValue.length;tpl++) rlValue[tpl] = new double[ca.getInnerAttributes().size()]; //now relValues is well instantiated Attribute iAtt = null; for(int ia =0;ia < ca.getInnerAttributes().size();ia++){ iAtt = ca.getInnerAttributes().get(ia); if(iAtt.isNominal()) for(int t=0;t<tuples.length;t++) rlValue[t][ia] = iAtt.getMapping().mapString(tuples[t].split(valueSep)[ia]); else for(int t=0;t<tuples.length;t++) rlValue[t][ia] = string2Double(tuples[t].split(valueSep)[ia],this.decimalPointCharacter); } // i == tableIndex?? rlValues.put(i,rlValue); }else { dataRow.set(attributes[i], string2Double(strings[i], this.decimalPointCharacter)); } } else { if(attributes[i].isRelational()) dataRow.initRelationalMap(); if(attributes[i].isNominal()) dataRow.set(attributes[i], attributes[i].getMapping().mapString("")); else dataRow.set(attributes[i], Double.NaN); } } dataRow.trim(); if(rlValues != null){ dataRow.setRelationalValues(rlValues); } return dataRow; } /** * Creates a data row from an Object array. The classes of the object must * match the value type of the corresponding {@link Attribute}. If the * corresponding attribute is nominal, <code>data[i]</code> will be cast * to String. If it is numerical, it will be cast to Number. * * @throws ClassCastException * if data class does not match attribute type * @see DatabaseDataRowReader */ public DataRow create(Object[] data, Attribute[] attributes) { /* * changed, because data and attributes can contain ComplexValue and ComplexAttributes */ /** split the attributes in Complex and SimpleAttributes **/ List<Attribute> complexAttributes = new ArrayList<Attribute>(); List<Attribute> simpleAttributes = new ArrayList<Attribute>(); List complexObjects = new ArrayList(); List simpleObjects = new ArrayList(); int dataRowSize = 0; for(int i = 0;i<attributes.length;i++) { Attribute a = attributes[i]; if(a.isComplex()) { ComplexAttribute ca = (ComplexAttribute)a; complexAttributes.add(a); complexObjects.add(data[i]); dataRowSize += ca.getInnerAttributeCount() + ca.getParameterCount(); } else { simpleAttributes.add(a); simpleObjects.add(data[i]); dataRowSize++; } } Attribute[] attr = new Attribute[attributes.length]; attributes = simpleAttributes.toArray(attr); data = simpleObjects.toArray(); DataRow dataRow = create(dataRowSize); for (int i = 0; i < data.length; i++) { if (data[i] != null) { if (attributes[i].isNominal()) { dataRow.set(attributes[i], attributes[i].getMapping().mapString(((String) data[i]).trim())); } else { dataRow.set(attributes[i], ((Number) data[i]).doubleValue()); } } else { dataRow.set(attributes[i], Double.NaN); } } // set the Complex Attributes for(int i =0;i<complexAttributes.size();i++) { dataRow.set((ComplexAttribute)complexAttributes.get(i), (ComplexValue)complexObjects.get(i)); } dataRow.trim(); return dataRow; } /** * Creates a data row from an Object array. The classes of the object must * match the value type of the corresponding {@link Attribute}. If the * corresponding attribute is nominal, <code>data[i]</code> will be cast * to String. If it is numerical, it will be cast to Number. * * @throws ClassCastException * if data class does not match attribute type * @see DatabaseDataRowReader */ public DataRow create(Double[] data, Attribute[] attributes) { DataRow dataRow = create(data.length); for (int i = 0; i < data.length; i++) { if (data[i] != null) { if (attributes[i].isNominal()) { dataRow.set(attributes[i], attributes[i].getMapping().mapString((String.valueOf(data[i])).trim())); } else { dataRow.set(attributes[i], ((Number) data[i]).doubleValue()); } } else { dataRow.set(attributes[i], Double.NaN); } } dataRow.trim(); return dataRow; } /** Returns the type of the created data rows. */ public int getType() { return type; } // -------------------------------------------------------------------------------- private static final double string2Double(String str, char decimalPointCharacter) { if (str == null) return Double.NaN; try { str = str.replace(decimalPointCharacter, '.'); return Double.parseDouble(str); } catch (NumberFormatException e) { LogService.getGlobal().log("DataRowFactory.string2Double(String): '" + str + "' is not a valid number!", LogService.ERROR); return Double.NaN; } } }