/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.datatable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.example.set.SplittedExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DoubleArrayDataRow; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.gui.RapidMinerGUI; import com.rapidminer.tools.Ontology; /** * This class can be used to use an example set as data table. The data is directly * read from the example set instead of building a copy. Please note that the method * for adding new rows is not supported by this type of data tables. * * @author Ingo Mierswa * @version $Id: DataTableExampleSetAdapter.java,v 1.11 2008/07/19 16:31:17 ingomierswa Exp $ */ public class DataTableExampleSetAdapter extends AbstractDataTable { private static final int DEFAULT_MAX_SIZE_FOR_SHUFFLED_SAMPLING = 100000; private ExampleSet exampleSet; private List<Attribute> allAttributes = new ArrayList<Attribute>(); private int numberOfRegularAttributes = 0; private AttributeWeights weights = null; private Attribute idAttribute; public DataTableExampleSetAdapter(ExampleSet exampleSet, AttributeWeights weights) { super("Data Table"); this.exampleSet = exampleSet; this.weights = weights; for (Attribute attribute : exampleSet.getAttributes()) { allAttributes.add(attribute); } this.idAttribute = exampleSet.getAttributes().getId(); Iterator<AttributeRole> s = exampleSet.getAttributes().specialAttributes(); while (s.hasNext()) { Attribute specialAttribute = s.next().getAttribute(); if ((idAttribute == null) || (!idAttribute.getName().equals(specialAttribute.getName()))) { allAttributes.add(specialAttribute); } } this.numberOfRegularAttributes = exampleSet.getAttributes().size(); } public int getNumberOfSpecialColumns() { return allAttributes.size() - numberOfRegularAttributes; } public boolean isSpecial(int index) { return index >= numberOfRegularAttributes; } public boolean isNominal(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.NOMINAL); } public boolean isDate(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.DATE); } public boolean isTime(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.TIME); } public boolean isDateTime(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.DATE_TIME); } public boolean isNumerical(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.NUMERICAL); } public String mapIndex(int column, int value) { return allAttributes.get(column).getMapping().mapIndex(value); } public int mapString(int column, String value) { return allAttributes.get(column).getMapping().mapString(value); } public int getNumberOfValues(int column) { return allAttributes.get(column).getMapping().size(); } public String getColumnName(int i) { return allAttributes.get(i).getName(); } public int getColumnIndex(String name) { for (int i = 0; i < allAttributes.size(); i++) if (allAttributes.get(i).getName().equals(name)) return i; return -1; } public boolean isSupportingColumnWeights() { return weights != null; } public double getColumnWeight(int column) { if (weights == null) return Double.NaN; else return weights.getWeight(getColumnName(column)); } public int getNumberOfColumns() { return this.allAttributes.size(); } public void add(DataTableRow row) { throw new RuntimeException("DataTableExampleSetAdapter: adding new rows is not supported!"); } public DataTableRow getRow(int index) { return new Example2DataTableRowWrapper(exampleSet.getExample(index), allAttributes, idAttribute); } public Iterator<DataTableRow> iterator() { return new Example2DataTableRowIterator(exampleSet.iterator(), allAttributes, idAttribute); } public int getNumberOfRows() { return this.exampleSet.size(); } public void sample(int newSize) { double ratio = (double)newSize / (double)getNumberOfRows(); int maxNumberBeforeSampling = DEFAULT_MAX_SIZE_FOR_SHUFFLED_SAMPLING; String maxString = System.getProperty(RapidMinerGUI.PROPERTY_RAPIDMINER_GUI_MAX_STATISTICS_ROWS); if (maxString != null) { try { maxNumberBeforeSampling = Integer.parseInt(maxString); } catch (NumberFormatException e) { // do nothing } } if (getNumberOfRows() < maxNumberBeforeSampling) { this.exampleSet = new SplittedExampleSet(exampleSet, ratio, SplittedExampleSet.SHUFFLED_SAMPLING, -1); ((SplittedExampleSet)this.exampleSet).selectSingleSubset(0); } else { this.exampleSet = Tools.getLinearSubsetCopy(this.exampleSet, newSize, 0); } } public static ExampleSet createExampleSetFromDataTable(DataTable table) { List<Attribute> attributes = new ArrayList<Attribute>(); for (int i = 0; i < table.getNumberOfColumns(); i++) { if (table.isDate(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.DATE); attributes.add(attribute); } else if (table.isTime(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.TIME); attributes.add(attribute); } else if (table.isDateTime(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.DATE_TIME); attributes.add(attribute); } else if (table.isNominal(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.NOMINAL); attributes.add(attribute); } else { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.REAL); attributes.add(attribute); } } MemoryExampleTable exampleTable = new MemoryExampleTable(attributes); for (int i = 0; i < table.getNumberOfRows(); i++) { DataTableRow row = table.getRow(i); double[] values = new double[attributes.size()]; for (int a = 0; a < values.length; a++) { Attribute attribute = attributes.get(a); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { values[a] = row.getValue(a); } else if (attribute.isNominal()) { values[a] = attribute.getMapping().mapString(table.getValueAsString(row, a)); } else { values[a] = row.getValue(a); } } exampleTable.addDataRow(new DoubleArrayDataRow(values)); } return exampleTable.createExampleSet(); } }