/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example.table.internal; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.function.IntToDoubleFunction; import com.rapidminer.example.Attribute; import com.rapidminer.example.Attributes; import com.rapidminer.example.table.AbstractExampleTable; import com.rapidminer.example.table.DataRow; import com.rapidminer.example.table.DataRowFactory; import com.rapidminer.example.table.DataRowReader; import com.rapidminer.example.table.GrowingExampleTable; import com.rapidminer.example.utils.ExampleSetBuilder.DataManagement; import com.rapidminer.example.utils.ExampleSets; import com.rapidminer.tools.Ontology; /** * {@linkplain Column} oriented example table. <br/> * Caution: This class is not part of the official API. Please do not use it, instead use methods * provided in {@link ExampleSets}. * * @author Michael Knopf * @see Column * @since 7.3 */ public class ColumnarExampleTable extends AbstractExampleTable implements GrowingExampleTable { private static final long serialVersionUID = 1L; /** Non-empty tables will allocate at least memory for {@value} rows. */ private static final int MIN_NON_EMPTY_SIZE = 8; /** * Empty column as filler for non-existent attributes. Prevents {@link NullPointerException}s * and makes {@code null} checks unnecessary */ private static final Column NAN_COLUMN = new NaNColumn(); /** * View of a single data row. The view itself does not store any data. * * @author Michael Knopf */ private class RowView extends DataRow { private static final long serialVersionUID = 1L; private final int row; public RowView(int index) { row = index; } @Override protected double get(int column, double defaultValue) { return columns[column].get(row); } @Override protected void set(int column, double value, double defaultValue) { columns[column].set(row, value); } @Override protected void ensureNumberOfColumns(int columns) { // not necessary } @Override public void trim() { // no data to trim } @Override public int getType() { return DataRowFactory.TYPE_COLUMN_VIEW; } @Override public String toString() { StringBuffer result = new StringBuffer(); for (int i = 0; i < getNumberOfAttributes(); i++) { result.append((i == 0 ? "" : ",") + columns[i].get(row)); } return result.toString(); } } /** * Reader for data row views. * * @author Michael Knopf */ private class RowReader implements DataRowReader { private int current = 0; private final int last = size; @Override public boolean hasNext() { return current < last; } @Override public DataRow next() { return new RowView(current++); } } private Column[] columns; private int size; private int sizeLimit; private boolean completable; private DataManagement management = DataManagement.AUTO; /** * Creates a new, empty data table with the given attributes. * * @param attributes * the table's attributes */ public ColumnarExampleTable(List<Attribute> attributes) { this(attributes, DataManagement.AUTO, false); } /** * Creates a new, empty data table with the given attributes. * * @param attributes * the table's attributes * @param management * the data management optimization type to use * @param completable * whether {@link #complete()} will be called when the number of rows is final and * before the first reading of values */ public ColumnarExampleTable(List<Attribute> attributes, DataManagement management, boolean completable) { super(attributes); int attributeCount = super.getNumberOfAttributes(); columns = new Column[attributeCount]; size = 0; sizeLimit = 0; // must be set before updating columns this.completable = completable; this.management = management; for (int i = 0; i < attributes.size(); i++) { updateColumn(i, attributes.get(i)); } } /** * Constructor for a shallow clone of the table. The data columns are not cloned. * * <b>Warning:</b> If this called from an unsynchronized method, it can happen that the * {@link #columns} and the {@link AbstractExampleTable#attributes} are in an incompatible state * because of changes that are not yet visible to the current thread. * * @param table * the table to clone */ private ColumnarExampleTable(ColumnarExampleTable table) { super(table); this.columns = Arrays.copyOf(table.columns, table.columns.length); this.size = table.size; this.sizeLimit = table.sizeLimit; this.completable = table.completable; } @Override public synchronized int addAttribute(Attribute attribute) { int newIndex = super.addAttribute(attribute); if (columns != null) { ensureWidth(super.getNumberOfAttributes()); updateColumn(newIndex, attribute); } return newIndex; } @Override public synchronized void removeAttribute(int index) { super.removeAttribute(index); updateColumn(index, null); } @Override public int size() { return size; } @Override public DataRowReader getDataRowReader() { return new RowReader(); } @Override public DataRow getDataRow(int index) { return new RowView(index); } /** * Adds a copy of the given data row to the example table. Will throw an * {@link ArrayIndexOutOfBoundsException} if the data row does not fit the attributes of this * table. * * @param dataRow * the new data row * @throws RuntimeException * May be thrown if the data row does not fit the attributes of the underlying * table, depending on the data row implementation. */ @Override public void addDataRow(DataRow dataRow) { ensureHeight(size + 1); int numberOfAttributes = super.getNumberOfAttributes(); for (int i = 0; i < numberOfAttributes; i++) { Attribute attribute = getAttribute(i); columns[i].setLast(size, dataRow.get(attribute)); } size++; } /** * Adds a copy of the given row to the example table. * * @param row * the row as double array */ public void addRow(double[] row) { ensureHeight(size + 1); int min = Math.min(super.getNumberOfAttributes(), row.length); for (int i = 0; i < min; i++) { columns[i].setLast(size, row[i]); } size++; } /** * Adds numberOfRows blank rows to the table. These rows can be filled afterwards by * {@link #fillColumn} or using {@link #getDataRowReader}. * * @param numberOfRows * the number of empty rows to add */ public void addBlankRows(int numberOfRows) { if (numberOfRows > 0) { int newSize = size + numberOfRows; if (newSize > sizeLimit) { ensureHeight(newSize); } size = newSize; } } /** * Fills the column associated with the attribute using the function. * * @param attribute * the attribute whose column should be filled with values * @param function * the function providing the values to fill the column */ public void fillColumn(Attribute attribute, IntToDoubleFunction function) { Column column = columns[attribute.getTableIndex()]; for (int i = 0; i < size; i++) { column.setLast(i, function.applyAsDouble(i)); } } /** * Resets the column associated with the attribute. The reset is necessary if there were already * rows added in case auto columns are used because this overwrites the values, so the automatic * detection needs to be reset. * * @param attribute * the attribute whose column should be reset */ public void resetColumn(Attribute attribute) { updateColumn(attribute.getTableIndex(), attribute); columns[attribute.getTableIndex()].ensure(sizeLimit); } /** * Sets the expected number of rows. Use this if you know in advance how many rows will be added * by {@link #addRow} or {@link #addDataRow}. Using this method prevents unnecessary resizing if * the container for row values becomes to small. * * @param expectedNumberOfRows * the expected number of rows */ public void setExpectedSize(int expectedNumberOfRows) { if (expectedNumberOfRows <= sizeLimit) { return; } updateHeight(expectedNumberOfRows); } /** * Signals that the number of rows is final. Must be called when using the constructor * {@link #ColumnarExampleTable(List, boolean)} with completable {@code true} before the first * time that values are read. */ public void complete() { completable = false; for (Column column : columns) { column.complete(); } } /** * Creates a shallow clone of the table and removes all columns not contained in attributes. * * @param attributes * the attributes to determine which columns to keep * @return a new table with only column data when the column is associated to an attribute from * attributes */ public ColumnarExampleTable columnCleanupClone(Attributes attributes) { ColumnarExampleTable newTable = createClone(); // check which table indices are still in use int attributeCount = newTable.getNumberOfAttributes(); boolean[] usedIndices = new boolean[attributeCount]; for (Iterator<Attribute> allIterator = attributes.allAttributes(); allIterator.hasNext();) { Attribute attribute = allIterator.next(); usedIndices[attribute.getTableIndex()] = true; } // remove unused attributes and their columns for (int i = 0; i < attributeCount; i++) { if (!usedIndices[i]) { newTable.removeAttribute(i); } } return newTable; } /** * Creates a clone. Synchronized in order to prevent incompatible states when some changes are * not yet seen by the current thread. * * @return a clone of the current table */ private synchronized ColumnarExampleTable createClone() { return new ColumnarExampleTable(this); } /** * Ensures that the data table can store up to the given number of rows. Invoking this method * does not change the size of the table! * <p> * The implementation resizes the column sizes using the same strategy as the JRE's array list * implementation (enlarges arrays by ~50%). * * @param height * number of rows */ private void ensureHeight(int height) { if (height <= sizeLimit) { return; } int newHeight = Math.max(Math.max(MIN_NON_EMPTY_SIZE, height), sizeLimit + (sizeLimit >> 1)); updateHeight(newHeight); } /** * Uses the {@link Column#ensure(int) Column.ensure} method of each column to ensure the * newHeight. * * @param newHeight * the new height of the table */ private void updateHeight(int newHeight) { for (int i = 0; i < super.getNumberOfAttributes(); i++) { columns[i].ensure(newHeight); } sizeLimit = newHeight; } /** * Ensures that the data table can store up to the given number of columns. Invoking this method * does not create a new attribute. * * @param width * number of columns */ private void ensureWidth(int width) { if (width <= columns.length) { return; } int newWidth = Math.max(Math.max(width, MIN_NON_EMPTY_SIZE), columns.length + (columns.length >> 1)); columns = Arrays.copyOf(columns, newWidth); } /** * Updates the given column with respect to the type of the associated attribute. If an * attribute is removed, a reference to {@link #NAN_COLUMN} is set to prevent * {@link NullPointerException}s when iterating over all attribute indices. * * @param column * the column to update * @param attribute * the associated attribute */ private void updateColumn(final int column, Attribute attribute) { if (attribute == null) { columns[column] = NAN_COLUMN; return; } switch (attribute.getValueType()) { case Ontology.BINOMINAL: columns[column] = new ByteArrayColumn(sizeLimit); break; case Ontology.NOMINAL: case Ontology.POLYNOMINAL: if (management == DataManagement.SPEED_OPTIMIZED) { columns[column] = new IntegerArrayColumn(sizeLimit); } else { columns[column] = completable ? new IntegerAutoColumn(sizeLimit, management) : new IntegerIncompleteAutoColumn(sizeLimit, management); } break; default: if (management == DataManagement.SPEED_OPTIMIZED) { columns[column] = new DoubleArrayColumn(sizeLimit); } else { columns[column] = completable ? new DoubleAutoColumn(sizeLimit, management) : new DoubleIncompleteAutoColumn(sizeLimit, management); } break; } } }