/*
* RapidMiner
*
* Copyright (C) 2001-2008 by Rapid-I and the contributors
*
* Complete list of developers available at our web site:
*
* http://rapid-i.com
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see http://www.gnu.org/licenses/.
*/
package com.rapidminer.example;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import com.rapidminer.example.table.ExampleTable;
import com.rapidminer.operator.ResultObject;
/**
* Interface definition for all example sets. Usually, example sets do not
* contain any data but are only views on an example table (for example for
* sampling or feature selection purposes). It should be possible to create a
* layered view on the data, hence the name multi-layered data view. ExampleSet
* implementation should support this view concept.
*
* @author Ingo Mierswa
* @version $Id: ExampleSet.java,v 1.9 2008/08/28 18:54:07 ingomierswa Exp $
*/
public interface ExampleSet extends ResultObject, Cloneable, Iterable<Example> {
// ------------- Misc -----------------------------
/** Clones the example set. */
public Object clone();
/** True if all attributes are equal. */
public boolean equals(Object o);
/** Returns the hash code. Two example sets must deliver the same hash code if they are equal. */
public int hashCode();
// -------------------- attributes --------------------
/** Returns the data structure holding all attributes. */
public Attributes getAttributes();
// -------------------- Examples --------------------
/**
* Returns the number of examples in this example set. This number should
* not be used to create for-loops to iterate through all examples.
*/
public int size();
/**
* Returns the underlying example table. Most operators should operate on
* the example set and manipulate example to change table data instead of
* using the table directly.
*/
public ExampleTable getExampleTable();
/**
* Returns the example with the given id value. If the example set does not
* contain an id attribute this method should return null.
*/
public Example getExampleFromId(double value);
/**
* Returns the i-th example. It is not guaranteed that asking for an example
* by using the index in the example table is efficiently implemented.
* Therefore for-loops for iterations are not an option and an
* {@link ExampleReader} should be used.
*/
public Example getExample(int index);
/**
* Remaps all ids. This method should be invoked before the method {@link #getExampleFromId(double)}
* is used.
*/
public void remapIds();
// -------------------- File Writing --------------------
/** Writes the data and the attribute description to a file. */
public void writeDataFile(File dataFile, int fractionDigits, boolean quoteWhitespace, boolean zipped, boolean append, Charset encoding) throws IOException;
/** Writes the attribute meta descriptions into a file. The data file is used in order to determine
* the relative file positions and is not allowed to be null. */
public void writeAttributeFile(File attFile, File dataFile, Charset encoding) throws IOException;
/** Writes the data and the attribute description to a sparse data file.
* @param dataFile the file to write the data to
* @param format specified by {@link com.rapidminer.operator.io.SparseFormatExampleSource}
* @param fractionDigits the number of fraction digits (-1 for all possible digits)
*/
public void writeSparseDataFile(File dataFile, int format, int fractionDigits, boolean quoteWhitespace, boolean zipped, boolean append, Charset encoding) throws IOException;
/** Writes the attribute meta descriptions for a sparse data file into a file.
* The data file is used in order to determine the relative file positions
* and is not allowed to be null.
* @param format specified by {@link com.rapidminer.operator.io.SparseFormatExampleSource}
*/
public void writeSparseAttributeFile(File attFile, File dataFile, int format, Charset encoding) throws IOException;
// ------------------- Statistics ---------------
/** Recalculate all attribute statistics. */
public void recalculateAllAttributeStatistics();
/** Recalculate the attribute statistics of the given attribute. */
public void recalculateAttributeStatistics(Attribute attribute);
/** Returns the desired statistic for the given attribute. This method should be
* preferred over the deprecated method Attribute#getStatistics(String)
* since it correctly calculates and keep the statistics for the current example
* set and does not overwrite the statistics in the attribute. */
public double getStatistics(Attribute attribute, String statisticsName);
/** Returns the desired statistic for the given attribute. This method should be
* preferred over the deprecated method Attribute#getStatistics(String)
* since it correctly calculates and keep the statistics for the current example
* set and does not overwrite the statistics in the attribute. */
public double getStatistics(Attribute attribute, String statisticsName, String statisticsParameter);
}