Dataset.java example

Explorer
MinorThird-master
/* Copyright 2003, Carnegie Mellon, All Rights Reserved */

package edu.cmu.minorthird.classify;

import java.util.Iterator;
import java.util.Random;

import edu.cmu.minorthird.util.gui.Visible;

/**
 * A set of examples for learning.
 *
 * @author William Cohen
 */

public interface Dataset extends Visible{
	
	/** Get the FeatureFactory associated with the dataset */
	public FeatureFactory getFeatureFactory();
	
	/** Get the schema associated with the dataset */
	public ExampleSchema getSchema();

	/** Add a new example to the dataset. */
	public void add(Example example);
	
	/** Add a new example to the dataset. Specifying whether or not to compress it. */
	public void add(Example example, boolean compress);

	/** 
	 * Return an iterator over all examples.  This iterator must always
	 * return examples in the order in which they were added, unless the
	 * data has been shuffled.
	 */
	public Iterator<Example> iterator(); 

	/** Return the number of examples. */
	public int size(); 

	// these operations are mostly to support train/testing experiments

	/** Randomly re-order the examples. */
	public void shuffle(Random r);

	/** Randomly re-order the examples. */
	public void shuffle();

	/** Make a shallow copy of the dataset. Examples are shared, but not the 
	 * ordering of the examples. */
	public Dataset shallowCopy();

	/** Partition the dataset as required by the splitter. */
	public Split split(Splitter<Example> splitter);

	/** 
	 * 	A partitioning of the dataset into a number of train/test partitions 
	 */
	public interface Split{
		/** Return the number of partitions */
		public int getNumPartitions();

		/** Return a dataset containing the training cases in the k-th split */
		public Dataset getTrain(int k);

		/** Return a dataset containing the test cases in the k-th split */
		public Dataset getTest(int k);
	}
}