package edu.stanford.nlp.classify;
import java.io.File;
import java.io.IOException;
import java.util.Arrays;
import java.util.Iterator;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.util.CollectionUtils;
import junit.framework.Assert;
import junit.framework.TestCase;
public class RVFDatasetTest extends TestCase {
public void testCombiningDatasets() {
RVFDatum<String, String> datum1 = newRVFDatum(null, "a", "b", "a");
RVFDatum<String, String> datum2 = newRVFDatum(null, "c", "c", "b");
RVFDataset<String, String> data1 = new RVFDataset<String, String>();
data1.add(datum1);
RVFDataset<String, String> data2 = new RVFDataset<String, String>();
data1.add(datum2);
RVFDataset<String, String> data = new RVFDataset<String, String>();
data.addAll(data1);
data.addAll(data2);
Iterator<RVFDatum<String, String>> iter = data.iterator();
Assert.assertEquals(datum1, iter.next());
Assert.assertEquals(datum2, iter.next());
Assert.assertFalse(iter.hasNext());
}
public void testSVMLightIntegerFormat() throws IOException {
RVFDataset<Boolean, Integer> dataset = new RVFDataset<Boolean, Integer>();
dataset.add(newRVFDatum(true, 1, 2, 1, 0));
dataset.add(newRVFDatum(false, 2, 2, 0, 0));
dataset.add(newRVFDatum(true, 0, 1, 2, 2));
File tempFile = File.createTempFile("testSVMLightIntegerFormat", ".svm");
dataset.writeSVMLightFormat(tempFile);
RVFDataset<Boolean, Integer> newDataset = new RVFDataset<Boolean, Integer>();
try {
newDataset.readSVMLightFormat(tempFile);
Assert.fail("expected failure with empty indexes");
} catch (RuntimeException e) {}
newDataset = new RVFDataset<Boolean, Integer>(
dataset.size(), dataset.featureIndex(), dataset.labelIndex());
newDataset.readSVMLightFormat(tempFile);
Assert.assertEquals(CollectionUtils.toList(dataset), CollectionUtils.toList(newDataset));
}
private static <L, F> RVFDatum<L, F> newRVFDatum(L label, F ... items) {
return new RVFDatum<L, F>(Counters.asCounter(Arrays.asList(items)), label);
}
}