/** * Copyright (C) 2001-2017 by RapidMiner and the contributors * * Complete list of developers available at our web site: * * http://rapidminer.com * * This program is free software: you can redistribute it and/or modify it under the terms of the * GNU Affero General Public License as published by the Free Software Foundation, either version 3 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without * even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License along with this program. * If not, see http://www.gnu.org/licenses/. */ package com.rapidminer.example; import java.io.File; import java.io.IOException; import java.nio.charset.Charset; import com.rapidminer.example.table.ExampleTable; import com.rapidminer.operator.ResultObject; /** * Interface definition for all example sets. Usually, example sets do not contain any data but are * only views on an example table (for example for sampling or feature selection purposes). It * should be possible to create a layered view on the data, hence the name multi-layered data view. * ExampleSet implementation should support this view concept. * * @author Ingo Mierswa */ public interface ExampleSet extends ResultObject, Cloneable, Iterable<Example> { /** necessary since default method was added */ static final long serialVersionUID = 4100925167567270064L; // ------------- Misc ----------------------------- /** Clones the example set. */ public Object clone(); /** True if all attributes are equal. */ @Override public boolean equals(Object o); /** * Returns the hash code. Two example sets must deliver the same hash code if they are equal. */ @Override public int hashCode(); /** * Frees unused resources, if supported by the implementation. Does nothing by default. * * Should only be used on freshly {@link #clone}ed {@link ExampleSet}s to ensure that the * cleaned up resources are not requested afterwards. * * @since 7.3 */ public default void cleanup() { // does nothing by default } // -------------------- attributes -------------------- /** * Returns the data structure holding all attributes. NOTE! if you intend to iterate over all * Attributes of this ExampleSet then you need to create an Iterator by calling * {@link ExampleSet#getAttributes()#getAttributes()} and use it instead. */ public Attributes getAttributes(); // -------------------- Examples -------------------- /** * Returns the number of examples in this example set. This number should not be used to create * for-loops to iterate through all examples. */ public int size(); /** * Returns the underlying example table. Most operators should operate on the example set and * manipulate example to change table data instead of using the table directly. */ public ExampleTable getExampleTable(); /** * Returns the example with the given id value. If the example set does not contain an id * attribute this method should return null. Call {@link #remapIds()} before using this method. */ public Example getExampleFromId(double value); /** * Returns all examples which have the given id. Should return null in the case that there are * no examples matching that id. */ public int[] getExampleIndicesFromId(double value); /** * Returns the i-th example. It is not guaranteed that asking for an example by using the index * in the example table is efficiently implemented. Therefore for-loops for iterations are not * an option and an {@link ExampleReader} should be used. */ public Example getExample(int index); /** * Remaps all ids. This method should be invoked before the method * {@link #getExampleFromId(double)} is used. */ public void remapIds(); // -------------------- File Writing -------------------- /** Writes the data and the attribute description to a file. */ public void writeDataFile(File dataFile, int fractionDigits, boolean quoteNominal, boolean zipped, boolean append, Charset encoding) throws IOException; /** * Writes the attribute meta descriptions into a file. The data file is used in order to * determine the relative file positions and is not allowed to be null. */ public void writeAttributeFile(File attFile, File dataFile, Charset encoding) throws IOException; /** * Writes the data and the attribute description to a sparse data file. * * @param dataFile * the file to write the data to * @param format * specified by {@link com.rapidminer.operator.io.SparseFormatExampleSource} * @param fractionDigits * the number of fraction digits (-1 for all possible digits) */ public void writeSparseDataFile(File dataFile, int format, int fractionDigits, boolean quoteNominal, boolean zipped, boolean append, Charset encoding) throws IOException; /** * Writes the attribute meta descriptions for a sparse data file into a file. The data file is * used in order to determine the relative file positions and is not allowed to be null. * * @param format * specified by {@link com.rapidminer.operator.io.SparseFormatExampleSource} */ public void writeSparseAttributeFile(File attFile, File dataFile, int format, Charset encoding) throws IOException; // ------------------- Statistics --------------- /** Recalculate all attribute statistics. */ public void recalculateAllAttributeStatistics(); /** Recalculate the attribute statistics of the given attribute. */ public void recalculateAttributeStatistics(Attribute attribute); /** * Returns the desired statistic for the given attribute. This method should be preferred over * the deprecated method Attribute#getStatistics(String) since it correctly calculates and keep * the statistics for the current example set and does not overwrite the statistics in the * attribute. */ public double getStatistics(Attribute attribute, String statisticsName); /** * Returns the desired statistic for the given attribute. This method should be preferred over * the deprecated method Attribute#getStatistics(String) since it correctly calculates and keep * the statistics for the current example set and does not overwrite the statistics in the * attribute. */ public double getStatistics(Attribute attribute, String statisticsName, String statisticsParameter); }