/* * RapidMiner * * Copyright (C) 2001-2011 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.datatable; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import com.rapidminer.ObjectVisualizer; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.AttributeWeights; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.Tools; import com.rapidminer.example.set.SplittedExampleSet; import com.rapidminer.example.table.AttributeFactory; import com.rapidminer.example.table.DoubleArrayDataRow; import com.rapidminer.example.table.MemoryExampleTable; import com.rapidminer.gui.RapidMinerGUI; import com.rapidminer.operator.UserError; import com.rapidminer.tools.ObjectVisualizerService; import com.rapidminer.tools.Ontology; import com.rapidminer.tools.ParameterService; /** * This class can be used to use an example set as data table. The data is directly * read from the example set instead of building a copy. Please note that the method * for adding new rows is not supported by this type of data tables. * * @author Ingo Mierswa */ public class DataTableExampleSetAdapter extends AbstractDataTable { private static final int DEFAULT_MAX_SIZE_FOR_SHUFFLED_SAMPLING = 100000; private ExampleSet exampleSet; private List<Attribute> allAttributes = new ArrayList<Attribute>(); private int numberOfRegularAttributes = 0; private AttributeWeights weights = null; private Attribute idAttribute; public DataTableExampleSetAdapter(ExampleSet exampleSet, AttributeWeights weights) { super("Data Table"); this.exampleSet = exampleSet; this.weights = weights; for (Attribute attribute : exampleSet.getAttributes()) { allAttributes.add(attribute); } this.idAttribute = exampleSet.getAttributes().getId(); Iterator<AttributeRole> s = exampleSet.getAttributes().specialAttributes(); while (s.hasNext()) { Attribute specialAttribute = s.next().getAttribute(); if (idAttribute == null || !idAttribute.getName().equals(specialAttribute.getName())) { allAttributes.add(specialAttribute); } } this.numberOfRegularAttributes = exampleSet.getAttributes().size(); // TODO: Find another solution for this hack registerVisualizerForMe(exampleSet); } public DataTableExampleSetAdapter(DataTableExampleSetAdapter dataTableExampleSetAdapter) { super(dataTableExampleSetAdapter.getName()); this.exampleSet = dataTableExampleSetAdapter.exampleSet; // shallow clone this.allAttributes = dataTableExampleSetAdapter.allAttributes; // shallow clone this.numberOfRegularAttributes = dataTableExampleSetAdapter.numberOfRegularAttributes; this.weights = dataTableExampleSetAdapter.weights; // shallow clone this.idAttribute = dataTableExampleSetAdapter.idAttribute; // shallow clone // TODO: Find another solution for this hack registerVisualizerForMe(dataTableExampleSetAdapter); } @Override public int getNumberOfSpecialColumns() { return allAttributes.size() - numberOfRegularAttributes; } @Override public boolean isSpecial(int index) { return index >= numberOfRegularAttributes; } @Override public boolean isNominal(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.NOMINAL); } @Override public boolean isDate(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.DATE); } @Override public boolean isTime(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.TIME); } @Override public boolean isDateTime(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.DATE_TIME); } @Override public boolean isNumerical(int index) { return Ontology.ATTRIBUTE_VALUE_TYPE.isA(allAttributes.get(index).getValueType(), Ontology.NUMERICAL); } @Override public String mapIndex(int column, int value) { return allAttributes.get(column).getMapping().mapIndex(value); } @Override public int mapString(int column, String value) { return allAttributes.get(column).getMapping().mapString(value); } @Override public int getNumberOfValues(int column) { return allAttributes.get(column).getMapping().size(); } @Override public String getColumnName(int i) { return allAttributes.get(i).getName(); } @Override public int getColumnIndex(String name) { for (int i = 0; i < allAttributes.size(); i++) if (allAttributes.get(i).getName().equals(name)) return i; return -1; } @Override public boolean isSupportingColumnWeights() { return weights != null; } @Override public double getColumnWeight(int column) { if (weights == null) return Double.NaN; else return weights.getWeight(getColumnName(column)); } @Override public int getNumberOfColumns() { return this.allAttributes.size(); } @Override public void add(DataTableRow row) { throw new RuntimeException("DataTableExampleSetAdapter: adding new rows is not supported!"); } @Override public DataTableRow getRow(int index) { return new Example2DataTableRowWrapper(exampleSet.getExample(index), allAttributes, idAttribute); } @Override public Iterator<DataTableRow> iterator() { return new Example2DataTableRowIterator(exampleSet.iterator(), allAttributes, idAttribute); } @Override public int getNumberOfRows() { return this.exampleSet.size(); } @Override public DataTable sample(int newSize) { DataTableExampleSetAdapter result = new DataTableExampleSetAdapter(this); double ratio = (double)newSize / (double)getNumberOfRows(); int maxNumberBeforeSampling = DEFAULT_MAX_SIZE_FOR_SHUFFLED_SAMPLING; String maxString = ParameterService.getParameterValue(RapidMinerGUI.PROPERTY_RAPIDMINER_GUI_MAX_STATISTICS_ROWS); if (maxString != null) { try { maxNumberBeforeSampling = Integer.parseInt(maxString); } catch (NumberFormatException e) { // do nothing } } ExampleSet exampleSet = null; if (getNumberOfRows() < maxNumberBeforeSampling) { try { exampleSet = new SplittedExampleSet(this.exampleSet, ratio, SplittedExampleSet.SHUFFLED_SAMPLING, false, 0); } catch (UserError e) { // this exception is only thrown for Stratified Sampling } ((SplittedExampleSet)exampleSet).selectSingleSubset(0); } else { exampleSet = Tools.getLinearSubsetCopy(this.exampleSet, newSize, 0); } result.exampleSet = exampleSet; return result; } public static ExampleSet createExampleSetFromDataTable(DataTable table) { List<Attribute> attributes = new ArrayList<Attribute>(); for (int i = 0; i < table.getNumberOfColumns(); i++) { if (table.isDate(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.DATE); attributes.add(attribute); } else if (table.isTime(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.TIME); attributes.add(attribute); } else if (table.isDateTime(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.DATE_TIME); attributes.add(attribute); } else if (table.isNominal(i)) { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.NOMINAL); attributes.add(attribute); } else { Attribute attribute = AttributeFactory.createAttribute(table.getColumnName(i), Ontology.REAL); attributes.add(attribute); } } MemoryExampleTable exampleTable = new MemoryExampleTable(attributes); for (int i = 0; i < table.getNumberOfRows(); i++) { DataTableRow row = table.getRow(i); double[] values = new double[attributes.size()]; for (int a = 0; a < values.length; a++) { Attribute attribute = attributes.get(a); if (Ontology.ATTRIBUTE_VALUE_TYPE.isA(attribute.getValueType(), Ontology.DATE_TIME)) { values[a] = row.getValue(a); } else if (attribute.isNominal()) { values[a] = attribute.getMapping().mapString(table.getValueAsString(row, a)); } else { values[a] = row.getValue(a); } } exampleTable.addDataRow(new DoubleArrayDataRow(values)); } return exampleTable.createExampleSet(); } private void registerVisualizerForMe(Object father) { ObjectVisualizer visualizer = ObjectVisualizerService.getVisualizerForObject(father); ObjectVisualizerService.addObjectVisualizer(this, visualizer); } }