/* * RapidMiner * * Copyright (C) 2001-2008 by Rapid-I and the contributors * * Complete list of developers available at our web site: * * http://rapid-i.com * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.table; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; import com.rapidminer.example.Attribute; import com.rapidminer.example.AttributeRole; import com.rapidminer.example.Attributes; import com.rapidminer.example.ExampleSet; import com.rapidminer.example.set.SimpleExampleSet; import com.rapidminer.operator.OperatorException; import com.rapidminer.operator.UserError; import com.rapidminer.tools.Tools; import com.rapidminer.tools.att.AttributeSet; /** * This class is the core data supplier for example sets. Several example sets * can use the same data and access the attribute values by reference. * * @author Ingo Mierswa * @version $Id: AbstractExampleTable.java,v 1.7 2008/07/31 17:43:41 ingomierswa Exp $ */ public abstract class AbstractExampleTable implements ExampleTable { /** * */ private static final long serialVersionUID = -6996954528182122684L; /** * List of instances of {@link Attribute}. The <i>i</i>-th entry in the * list belongs to the <i>i</i>-th data column. Whenever attributes are * removed from the list of attributes (e.g. they were intermediate * predicted labels used only within a validation chain), the succeeding * entries do not move up, but the entry is replaced by a null entry and * this index is added to {@link AbstractExampleTable#unusedColumnList} as an * Integer. */ protected List<Attribute> attributes = new ArrayList<Attribute>(); /** * List of Integers referencing indices of columns that were removed, e.g. * predicted labels that were used within a validation chain but are not * needed any longer. Any of the columns in this list may be used when a new * attribute is created. The list is used as a queue. */ protected List<Integer> unusedColumnList = new LinkedList<Integer>(); /** * Creates a new ExampleTable. * * @param attributes * List of {@link Attribute}. The indices of the attibutes are * set to values reflecting their position in the list. */ public AbstractExampleTable(List<Attribute> attributes) { addAttributes(attributes); } // ------------------------------------------------------------ /** Returns a new array containing all {@link Attribute}s. */ public Attribute[] getAttributes() { Attribute[] attribute = new Attribute[attributes.size()]; attributes.toArray(attribute); return attribute; } /** * Returns the attribute of the column number <i>i</i>. Attention: This * value may return null if the column was marked unused. */ public Attribute getAttribute(int i) { return attributes.get(i); } /** Returns the attribute with the given name. */ public Attribute findAttribute(String name) throws OperatorException { if (name == null) return null; Iterator<Attribute> i = attributes.iterator(); while (i.hasNext()) { Attribute att = i.next(); if (att != null) { if (att.getName().equals(name)) return att; } } throw new UserError(null, 111, name); } /** * Adds all {@link Attribute}s in <code>newAttributes</code> to the end * of the list of attributes, creating new data columns if necessary. */ public void addAttributes(Collection<Attribute> newAttributes) { Iterator<Attribute> i = newAttributes.iterator(); while (i.hasNext()) addAttribute(i.next()); } /** * Adds the attribute to the list of attributes assigning it a free column * index. If the name is already in use, the attribute will be renamed. */ public int addAttribute(Attribute a) { int index = -1; if (unusedColumnList.size() > 0) { index = unusedColumnList.remove(0); attributes.set(index, a); } else { index = attributes.size(); attributes.add(a); } if (a != null) a.setTableIndex(index); return index; } /** * Equivalent to calling * <code>removeAttribute(attribute.getTableIndex())</code>. */ public void removeAttribute(Attribute attribute) { removeAttribute(attribute.getTableIndex()); } /** * Sets the attribute with the given index to null. Afterwards, this column * can be reused. Callers must make sure, that no other example set contains * a reference to this column. Otherwise its data will be messed up. * Ususally this is only possible if an operator generates intermediate * attributes, like a validation chain or a feature generator. If the * attribute already was removed, this method returns silently. */ public void removeAttribute(int index) { Attribute a = attributes.get(index); if (a == null) return; attributes.set(index, null); unusedColumnList.add(index); } /** * Returns the number of attributes. Attention: Callers that use a for-loop * and retrieving {@link Attribute}s by calling * {@link AbstractExampleTable#getAttribute(int)} must keep in mind, that some of * these attributes may be null. */ public int getNumberOfAttributes() { return attributes.size(); } /** * Returns the number of non null attributes. <b>Attention</b>: Since there * are null attributes in the list, the return value of this method must not * be used in a for-loop! * * @see ExampleTable#getNumberOfAttributes(). */ public int getAttributeCount() { return attributes.size() - unusedColumnList.size(); } // ------------------------------------------------------------ /** * Returns a new example set with all attributes switched on. The * given attribute will be used as a special label attribute for learning. */ public ExampleSet createExampleSet(Attribute labelAttribute) { return createExampleSet(labelAttribute, null, null); } /** * Returns a new example set with all attributes switched on. The * given attributes will be used as a special label attribute for learning, * as (example) weight attribute, and as id attribute. */ public ExampleSet createExampleSet(Attribute labelAttribute, Attribute weightAttribute, Attribute idAttribute) { Map<Attribute, String> specialAttributes = new HashMap<Attribute, String>(); if (labelAttribute != null) specialAttributes.put(labelAttribute, Attributes.LABEL_NAME); if (weightAttribute != null) specialAttributes.put(weightAttribute, Attributes.WEIGHT_NAME); if (idAttribute != null) specialAttributes.put(idAttribute, Attributes.ID_NAME); return new SimpleExampleSet(this, specialAttributes); } /** * Returns a new example set with all attributes switched on. The iterator * over the attribute roles will define the special attributes. */ public ExampleSet createExampleSet(Iterator<AttributeRole> newSpecialAttributes) { Map<Attribute, String> specialAttributes = new HashMap<Attribute, String>(); while (newSpecialAttributes.hasNext()) { AttributeRole role = newSpecialAttributes.next(); specialAttributes.put(role.getAttribute(), role.getSpecialName()); } return new SimpleExampleSet(this, specialAttributes); } /** Returns a new example set with all attributes of the given attribute set. */ public ExampleSet createExampleSet(AttributeSet attributeSet) { Map<Attribute, String> specialAttributes = new HashMap<Attribute, String>(); Iterator<String> i = attributeSet.getSpecialNames().iterator(); while (i.hasNext()) { String name = i.next(); specialAttributes.put(attributeSet.getSpecialAttribute(name), name); } return createExampleSet(specialAttributes); } /** * Returns a new example set with all attributes switched on. * All attributes given at creation time will be regular. */ public ExampleSet createExampleSet() { return createExampleSet(new HashMap<Attribute, String>()); } /** * Returns a new example set with all attributes switched on. The attributes * in the given map will be used as special attributes with the specified names, * all other attributes given at creation time will be regular. */ public ExampleSet createExampleSet(Map<Attribute, String> specialAttributes) { return new SimpleExampleSet(this, specialAttributes); } // ------------------------------------------------------------ public String toString() { return "ExampleTable, " + attributes.size() + " attributes, " + size() + " data rows,"+Tools.getLineSeparator()+"attributes: " + attributes; } public String toDataString() { StringBuffer result = new StringBuffer(toString() + Tools.getLineSeparator()); DataRowReader reader = getDataRowReader(); while (reader.hasNext()) { result.append(reader.next().toString() + Tools.getLineSeparator()); } return result.toString(); } }