/* Copyright 2003, Carnegie Mellon, All Rights Reserved */
package edu.cmu.minorthird.classify.sequential;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Random;
import java.util.Set;
import edu.cmu.minorthird.classify.BasicDataset;
import edu.cmu.minorthird.classify.Dataset;
import edu.cmu.minorthird.classify.Example;
import edu.cmu.minorthird.classify.ExampleSchema;
import edu.cmu.minorthird.classify.FeatureFactory;
import edu.cmu.minorthird.classify.GUI;
import edu.cmu.minorthird.classify.Splitter;
import edu.cmu.minorthird.util.gui.Viewer;
import edu.cmu.minorthird.util.gui.ZoomedViewer;
/**
* A SequenceDataset that additionally includes examples for 'sliding
* windows' over the original data.
*
* @author William Cohen
*/
public class SegmentDataset implements Dataset{
int maxWindowSize=-1;
private List<CandidateSegmentGroup> groupList=new ArrayList<CandidateSegmentGroup>();
private Set<String> classNameSet=new HashSet<String>();
private int totalSize=0;
private FeatureFactory factory=new FeatureFactory();
private boolean compressGroups=true;
public SegmentDataset(){
;
}
public void setDataCompression(boolean flag){
compressGroups=flag;
}
@Override
public FeatureFactory getFeatureFactory(){
return factory;
}
public int getMaxWindowSize(){
return maxWindowSize;
}
@Override
public int size(){
return totalSize;
}
public int getNumberOfSegmentGroups(){
return groupList.size();
}
/** Add a new sequence of examples to the dataset. */
public void addCandidateSegmentGroup(CandidateSegmentGroup group){
if(maxWindowSize<0)
maxWindowSize=group.getMaxWindowSize();
if(maxWindowSize>=0&&group.getMaxWindowSize()!=maxWindowSize){
throw new IllegalArgumentException("mismatched window sizes: "+
maxWindowSize+", "+group.getMaxWindowSize());
}
if(compressGroups)
groupList.add(new CompactCandidateSegmentGroup(factory,group));
else
groupList.add(group);
classNameSet.addAll(group.classNameSet());
totalSize+=group.size();
}
@Override
public ExampleSchema getSchema(){
ExampleSchema schema=
new ExampleSchema(classNameSet
.toArray(new String[classNameSet.size()]));
if(schema.equals(ExampleSchema.BINARY_EXAMPLE_SCHEMA))
return ExampleSchema.BINARY_EXAMPLE_SCHEMA;
else
return schema;
}
/**
* Add an example to the dataset. <br>
* <br>
* This method compresses the example before adding it to the dataset. If
* you want/need the example to be compressed then call {@link #add(Example, boolean)}
*
* @param example The Example that you want to add to the dataset.
*/
@Override
public void add(Example example){
add(example,false);
}
/**
* Add an Example to the dataset. <br>
* <br>
* This method lets the caller specify whether or not to compress the example
* before adding it to the dataset.
*
* @param example The example to add to the dataset
* @param compress Boolean specifying whether or not to compress the example.
*/
@Override
public void add(Example example,boolean compress){
MutableCandidateSegmentGroup g=new MutableCandidateSegmentGroup(1,1);
if(compress)
g.setSubsequence(0,1,factory.compress(example.asInstance()),example
.getLabel());
else
g.setSubsequence(0,1,example.asInstance(),example.getLabel());
addCandidateSegmentGroup(g);
}
/** Iterate over all examples */
@Override
public Iterator<Example> iterator(){
List<Example> result=new ArrayList<Example>();
for(Iterator<CandidateSegmentGroup> i=groupList.iterator();i.hasNext();){
CandidateSegmentGroup g=i.next();
for(int j=0;j<g.getSequenceLength();j++){
for(int k=1;k<=g.getMaxWindowSize();k++){
Example e=g.getSubsequenceExample(j,j+k);
if(e!=null)
result.add(e);
}
}
}
return result.iterator();
}
public Iterator<CandidateSegmentGroup> candidateSegmentGroupIterator(){
return groupList.iterator();
}
@Override
public String toString(){
StringBuffer buf=new StringBuffer("");
buf.append("size = "+size()+"\n");
for(Iterator<CandidateSegmentGroup> i=groupList.iterator();i.hasNext();){
buf.append(i.next()+"\n");
}
return buf.toString();
}
/** Randomly re-order the examples. */
@Override
public void shuffle(Random r){
Collections.shuffle(groupList,r);
}
/** Randomly re-order the examples. */
@Override
public void shuffle(){
Collections.shuffle(groupList,new Random(0));
}
/** Make a shallow copy of the dataset. */
@Override
public Dataset shallowCopy(){
SegmentDataset copy=new SegmentDataset();
for(Iterator<CandidateSegmentGroup> i=groupList.iterator();i.hasNext();){
copy.addCandidateSegmentGroup(i.next());
}
return copy;
}
//
// split
//
@Override
public Split split(final Splitter<Example> splitter){
throw new UnsupportedOperationException();
}
public Split splitCandidateSegmentGroup(final Splitter<CandidateSegmentGroup> splitter){
splitter.split(groupList.iterator());
return new Split(){
@Override
public int getNumPartitions(){
return splitter.getNumPartitions();
}
@Override
public Dataset getTrain(int k){
return invertIteration(splitter.getTrain(k));
}
@Override
public Dataset getTest(int k){
return invertIteration(splitter.getTest(k));
}
};
}
protected Dataset invertIteration(Iterator<CandidateSegmentGroup> i){
SegmentDataset copy=new SegmentDataset();
while(i.hasNext()){
CandidateSegmentGroup o=i.next();
copy.addCandidateSegmentGroup(o);
}
return copy;
}
/** A GUI view of the dataset. */
@Override
public Viewer toGUI(){
//return new VanillaViewer(this);
Viewer dbGui=new BasicDataset.SimpleDatasetViewer();
dbGui.setContent(this);
Viewer instGui=GUI.newSourcedExampleViewer();
return new ZoomedViewer(dbGui,instGui);
}
public int getNumPosExamples(){
return -1;
}
}