/* Copyright 2003, Carnegie Mellon, All Rights Reserved */
package edu.cmu.minorthird.classify;
import java.awt.Component;
import java.io.File;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import java.util.TreeSet;
import javax.swing.JComponent;
import javax.swing.JList;
import javax.swing.JScrollPane;
import javax.swing.ListCellRenderer;
import edu.cmu.minorthird.util.Saveable;
import edu.cmu.minorthird.util.gui.ComponentViewer;
import edu.cmu.minorthird.util.gui.Viewer;
import edu.cmu.minorthird.util.gui.ViewerFrame;
import edu.cmu.minorthird.util.gui.Visible;
import edu.cmu.minorthird.util.gui.ZoomedViewer;
/**
* A set of examples for learning.
*
* @author William Cohen
*/
public class BasicDataset implements Dataset,Serializable,Visible,Saveable{
static final long serialVersionUID=20080118L;
protected FeatureFactory featureFactory;
protected List<Example> examples;
protected List<Instance> unlabeledExamples;
protected Set<String> classNameSet;
public BasicDataset(FeatureFactory featureFactory){
this.featureFactory=featureFactory;
examples=new ArrayList<Example>();
unlabeledExamples=new ArrayList<Instance>();
classNameSet=new TreeSet<String>();
}
public BasicDataset(){
this(new FeatureFactory());
}
@Override
public ExampleSchema getSchema(){
ExampleSchema schema=new ExampleSchema(classNameSet.toArray(new String[classNameSet.size()]));
if(schema.equals(ExampleSchema.BINARY_EXAMPLE_SCHEMA)){
return ExampleSchema.BINARY_EXAMPLE_SCHEMA;
}else{
return schema;
}
}
// methods for semisupervised data, part of the SemiSupervisedDataset interface
public void addUnlabeled(Instance instance){
unlabeledExamples.add(featureFactory.compress(instance));
}
public Iterator<Instance> iteratorOverUnlabeled(){
return unlabeledExamples.iterator();
}
//public ArrayList getUnlabeled() { return this.unlabeledExamples; }
public int sizeUnlabeled(){
return unlabeledExamples.size();
}
public boolean hasUnlabeled(){
return (unlabeledExamples.size()>0)?true:false;
}
@Override
public FeatureFactory getFeatureFactory(){
return featureFactory;
}
//
// methods for labeled data, part of the Dataset interface
//
/**
* Add an example to the dataset. <br>
* <br>
* This method compresses the example before adding it to the dataset. If
* you don't want/need the example to be compressed then call {@link #add(Example, boolean)}
*
* @param example The Example that you want to add to the dataset.
*/
@Override
public void add(Example example){
this.add(example,true);
}
/**
* Add an Example to the dataset. <br>
* <br>
* This method lets the caller specify whether or not to compress the example
* before adding it to the dataset.
*
* @param example The example to add to the dataset
* @param compress Boolean specifying whether or not to compress the example.
*/
@Override
public void add(Example example,boolean compress){
if(compress)
examples.add(featureFactory.compress(example));
else
examples.add(example);
classNameSet.addAll(example.getLabel().possibleLabels());
}
@Override
public Iterator<Example> iterator(){
return examples.iterator();
}
@Override
public int size(){
return examples.size();
}
@Override
public void shuffle(Random r){
Collections.shuffle(examples,r);
}
@Override
public void shuffle(){
shuffle(new Random());
}
@Override
public Dataset shallowCopy(){
Dataset copy=new BasicDataset();
for(Iterator<Example> i=iterator();i.hasNext();){
copy.add(i.next());
}
return copy;
}
// Implement Saveable interface.
static private final String FORMAT_NAME="Minorthird Dataset";
@Override
public String[] getFormatNames(){
return new String[]{FORMAT_NAME};
}
@Override
public String getExtensionFor(String s){
return ".data";
}
@Override
public void saveAs(File file,String format)throws IOException{
if(!format.equals(FORMAT_NAME)){
throw new IllegalArgumentException("illegal format: "+format);
}
else{
DatasetLoader.save(this,file);
}
}
@Override
public Object restore(File file) throws IOException{
try{
return DatasetLoader.loadFile(file);
}catch(NumberFormatException ex){
throw new IllegalStateException("error loading from "+file+": "+ex);
}
}
/** A string view of the dataset */
@Override
public String toString(){
StringBuffer buf=new StringBuffer("");
for(Iterator<Example> i=this.iterator();i.hasNext();){
Example ex=i.next();
buf.append(ex.toString());
buf.append("\n");
}
return buf.toString();
}
/** A GUI view of the dataset. */
@Override
public Viewer toGUI(){
Viewer dbGui=new SimpleDatasetViewer();
dbGui.setContent(this);
Viewer instGui=GUI.newSourcedExampleViewer();
return new ZoomedViewer(dbGui,instGui);
}
public static class SimpleDatasetViewer extends ComponentViewer{
static final long serialVersionUID=20071015;
@Override
public boolean canReceive(Object o){
return o instanceof Dataset;
}
@Override
public JComponent componentFor(Object o){
final Dataset d=(Dataset)o;
final Example[] tmp=new Example[d.size()];
int k=0;
for(Iterator<Example> i=d.iterator();i.hasNext();){
tmp[k++]=i.next();
}
final JList jList=new JList(tmp);
jList.setCellRenderer(new ListCellRenderer(){
@Override
public Component getListCellRendererComponent(JList el,Object v,
int index,boolean sel,boolean focus){
return GUI
.conciseExampleRendererComponent(tmp[index],60,sel);
}
});
monitorSelections(jList);
return new JScrollPane(jList);
}
}
//
// splitter
//
@Override
public Split split(final Splitter<Example> splitter){
splitter.split(examples.iterator());
return new Split(){
@Override
public int getNumPartitions(){
return splitter.getNumPartitions();
}
@Override
public Dataset getTrain(int k){
return invertIteration(splitter.getTrain(k));
}
@Override
public Dataset getTest(int k){
return invertIteration(splitter.getTest(k));
}
};
}
private Dataset invertIteration(Iterator<Example> i){
BasicDataset copy=new BasicDataset();
while(i.hasNext())
copy.add(i.next());
return copy;
}
//
// test routine
//
/** Simple test routine */
static public void main(String[] args){
try{
BasicDataset data=(BasicDataset)SampleDatasets.sampleData("toy",false);
new ViewerFrame("Toy Dataset",data.toGUI());
System.out.println(data.getSchema());
}catch(Exception e){
e.printStackTrace();
}
}
}