/* Copyright 2003, Carnegie Mellon, All Rights Reserved */ package edu.cmu.minorthird.classify; import java.util.Iterator; import java.util.Random; import edu.cmu.minorthird.util.gui.Visible; /** * A set of examples for learning. * * @author William Cohen */ public interface Dataset extends Visible{ /** Get the FeatureFactory associated with the dataset */ public FeatureFactory getFeatureFactory(); /** Get the schema associated with the dataset */ public ExampleSchema getSchema(); /** Add a new example to the dataset. */ public void add(Example example); /** Add a new example to the dataset. Specifying whether or not to compress it. */ public void add(Example example, boolean compress); /** * Return an iterator over all examples. This iterator must always * return examples in the order in which they were added, unless the * data has been shuffled. */ public Iterator<Example> iterator(); /** Return the number of examples. */ public int size(); // these operations are mostly to support train/testing experiments /** Randomly re-order the examples. */ public void shuffle(Random r); /** Randomly re-order the examples. */ public void shuffle(); /** Make a shallow copy of the dataset. Examples are shared, but not the * ordering of the examples. */ public Dataset shallowCopy(); /** Partition the dataset as required by the splitter. */ public Split split(Splitter<Example> splitter); /** * A partitioning of the dataset into a number of train/test partitions */ public interface Split{ /** Return the number of partitions */ public int getNumPartitions(); /** Return a dataset containing the training cases in the k-th split */ public Dataset getTrain(int k); /** Return a dataset containing the test cases in the k-th split */ public Dataset getTest(int k); } }