/**
* Copyright (C) 2001-2017 by RapidMiner and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapidminer.com
*
* This program is free software: you can redistribute it and/or modify it under the terms of the
* GNU Affero General Public License as published by the Free Software Foundation, either version 3
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without
* even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License along with this program.
* If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example.utils;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.function.IntToDoubleFunction;
import com.rapidminer.example.Attribute;
import com.rapidminer.example.Attributes;
import com.rapidminer.example.ExampleSet;
import com.rapidminer.example.set.SimpleExampleSet;
import com.rapidminer.example.table.DataRow;
import com.rapidminer.example.table.DataRowFactory;
import com.rapidminer.example.table.DataRowReader;
import com.rapidminer.example.table.ExampleTable;
/**
* Builds an {@link ExampleSet} from the given data starting with the given attributes. The
* instructions to fill the underlying {@link ExampleTable} are applied in the following order:
* <ul>
* <li>rows in the order with which they are added by {@link #addDataRow} or {@link #addRow}</li>
* <li>rows given by the reader specified by {@link #withDataRowReader}</li>
* <li>the blank size {@link #withBlankSize}</li>
* <li>columns are filled as specified by {@link #withColumnFiller}</li>
* </ul>
*
* To prevent memory strain, the data rows added by {@link #addDataRow} and {@link #addRow} are
* immediately written to the underlying data table instead of being stored in the builder. The
* other data (set by {@link #withDataRowReader}, {@link #withBlankSize} or
* {@link #withColumnFiller}) that fills the table is stored and only used on {@link #build}.
*
* If a special data management optimization type should be used
* {@link #withOptimizationHint(DataManagment)} must be called before adding any rows.
*
* @author Gisa Schaefer
* @since 7.3
*/
public abstract class ExampleSetBuilder {
/**
* Data management optimization options that can be used with the builder option
* {@link ExampleSetBuilder#withOptimizationHint(DataManagement)}.
*/
public enum DataManagement {
/**
* Always use the fastest data representation, regardless of memory usage.
*/
SPEED_OPTIMIZED,
/**
* Automatically detects very sparse columns and compress those.
*/
AUTO,
/**
* Decreases memory usage by compressing sparse columns as much as possible.
*/
MEMORY_OPTIMIZED;
}
/** all the attributes in the example set */
private final List<Attribute> attributes;
/** roles for some of the attributes */
private final Map<Attribute, String> specialAttributes = new LinkedHashMap<>();
/**
* Creates a builder based on the given attributes. The given attributes will be the attributes
* in the {@link ExampleTable} constructed with this builder. If the given attributes are
* {@code null}, the example will have no attributes.
*
* @param attributes
* the {@link Attribute}s that the {@link ExampleSet} should contain, can be
* {@code null}
*/
ExampleSetBuilder(List<Attribute> attributes) {
this.attributes = new ArrayList<>();
if (attributes != null) {
this.attributes.addAll(attributes);
}
}
/**
* Creates a builder based on the given attributes. The given attributes will be the attributes
* in the {@link ExampleTable} constructed with this builder.
*
* @param attributes
* the {@link Attribute}s that the {@link ExampleSet} should contain
*/
ExampleSetBuilder(Attribute... attributes) {
this.attributes = Arrays.asList(attributes);
}
/**
* Determines which of the attributes used to construct the builder should have a special role
* in the {@link ExampleSet} that is build.
*
* @param specialAttributes
* map from attributes to their role names
* @return the builder
*/
public ExampleSetBuilder withRoles(Map<Attribute, String> specialAttributes) {
this.specialAttributes.putAll(specialAttributes);
return this;
}
/**
* Sets the role for the given attribute, which is one of the attributes used to construct the
* builder.
*
* @param attribute
* the attribute that has a special role, must not be {@code null}
* @param role
* the role name for the attribute
* @return the builder
*/
public ExampleSetBuilder withRole(Attribute attribute, String role) {
if (attribute == null) {
throw new IllegalArgumentException("The attribute must not be null");
}
specialAttributes.put(attribute, role);
return this;
}
/**
* Sets the expected number of rows to numberOfRows. Does not construct any rows but ensures
* that the container for the rows has the right size and does not need to be resized while rows
* are added.
*
* @param numberOfRows
* the expected number of rows
* @return the builder
*/
public abstract ExampleSetBuilder withExpectedSize(int numberOfRows);
/**
* Adds the data row to the existing data rows of the data table. Will be applied before all the
* other table fillers ({@link #withDataRowReader}, {@link #withBlankSize},
* {@link #withColumnFiller} ).
*
* @param dataRow
* the data row to add
* @return the builder
* @throws RuntimeException
* May be thrown if the data row does not fit the attributes of the underlying
* table, depending on the data row implementation.
*/
public abstract ExampleSetBuilder addDataRow(DataRow dataRow);
/**
* Adds the data of the row as a new data row to the data table. Has the same effect as
* {@code addDataRow(new DoubleArrayDataRow(row))} but might be more efficient if supported by
* the underlying data structure. Will be applied before all the other table fillers (
* {@link #withDataRowReader}, {@link #withBlankSize}, {@link #withColumnFiller} ).
*
* @param row
* the data to add
* @return the builder
*/
public abstract ExampleSetBuilder addRow(double[] row);
/**
* Adds the rows supplied by the reader to the data table. Will be applied after any rows added
* by {@link #addDataRow} and {@link #addRow} but before creating blank rows specified by
* {@link #withBlankSize} and filling columns specified by {@link #withColumnFiller}. Calling
* the method a second time will overwrite the previous reader.
*
* @param reader
* a {@link DataRowReader} providing rows for the data table
* @return the builder
*/
public abstract ExampleSetBuilder withDataRowReader(DataRowReader reader);
/**
* Constructs numberOfRows blank rows. Creates rows of type
* {@link DataRowFactory.TYPE_DOUBLE_ARRAY} if supported by the underlying data structure.
*
* @param numberOfRows
* the number of blank rows to create
* @return the builder
*/
public abstract ExampleSetBuilder withBlankSize(int numberOfRows);
/**
* Fills the column in the data table associated with the given attribute with the values
* generated by the valueForRow function.
*
* Only has an effect if there are rows already constructed by {@link #addRow},
* {@link #addDataRow}, {@link #withDataRowReader} and {@link #withBlankSize}. Will be applied
* after those so it overwrites the values already set.
*
* @param attribute
* the attribute for which the column should be filled by valueForRow
* @param valueForRow
* the function to fill the column associated with the attribute
* @return the builder
*/
public abstract ExampleSetBuilder withColumnFiller(Attribute attribute, IntToDoubleFunction valueForRow);
/**
* A hint for optimization type to use in the data table. Must be set before
* {@link #addDataRow(DataRow)} or {@link #addRow(double[])} is called. When this method is not
* used, {@link DataManagment#AUTO} is selected by default. May be ignored if not supported by
* the underlying data structure.
*
* @param management
* the data management optimization to use
* @return the builder
*/
public abstract ExampleSetBuilder withOptimizationHint(DataManagement management);
/**
* Builds the example set.
*
* @return the {@link ExampleSet} build from the specified data
*/
public ExampleSet build() {
// Cannot provide map of special attributes directly to the constructor used below, because
// we have to ensure that all attributes are being added to the regular attributes first so
// that a bug in Attributes#getRole() does not take effect. This way,
// Attributes#setSpecial() deletes the same attribute and not some other attribute (with
// same role as first attribute's name).
ExampleSet set = new SimpleExampleSet(getExampleTable(), attributes, null);
Attributes attributes = set.getAttributes();
for (Entry<Attribute, String> entry : specialAttributes.entrySet()) {
attributes.setSpecialAttribute(entry.getKey(), entry.getValue());
}
return set;
}
/**
* @return the specified attributes
*/
protected List<Attribute> getAttributes() {
return attributes;
}
/**
* Fetches the example table to use for building the example set. Will only be called once when
* the example set is build.
*
* @return the example table to use for the example set
*/
protected abstract ExampleTable getExampleTable();
/**
* Sets the table indices so that {@code dataRow.set(attribute,value)} can be used for the
* attributes. Calling {@code new MemoryExampleTable(getAttributes)} or
* {@code new ColumnarExampleTable(getAttribute)} has the same effect.
*/
protected void setTableIndices() {
int i = 0;
for (Attribute attribute : getAttributes()) {
attribute.setTableIndex(i++);
}
}
}