/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
/**
@author Aron Culotta <a href="mailto:culotta@cs.umass.edu">culotta@cs.umass.edu</a>
*/
package cc.mallet.pipe.tests;
import junit.framework.*;
import java.util.ArrayList;
import java.util.regex.*;
import cc.mallet.pipe.*;
import cc.mallet.pipe.iterator.*;
import cc.mallet.pipe.tsf.*;
import cc.mallet.types.*;
public class TestSGML2TokenSequence extends TestCase
{
public TestSGML2TokenSequence (String name) {
super (name);
}
String[] dataWithTags = new String[] {
"zeroth test string",
"<tag>first</tag> test string",
"second <tag>test</tag> string",
"third test <tag>string</tag>",
};
String[] data = new String[] {
"zeroth test string",
"first test string",
"second test string",
"third test string",
};
String[] tags = new String[] {
"O O O",
"tag O O ",
"O tag O",
"O O tag",
};
public static class Array2ArrayIterator extends Pipe
{
public Instance pipe (Instance carrier) {
carrier.setData(new ArrayIterator ((Object[])carrier.getData()));
return carrier;
}
}
public void testOne ()
{
Pipe p = new SerialPipes (new Pipe[] {
new Input2CharSequence (),
new SGML2TokenSequence()
});
for (int i=0; i < dataWithTags.length; i++) {
Instance inst = p.instanceFrom(new Instance (dataWithTags[i], null, null, null));
TokenSequence input = (TokenSequence)inst.getData();
TokenSequence target = (TokenSequence)inst.getTarget();
String[] oginput = data[i].split("\\s+");
String[] ogtags = tags[i].split("\\s+");
assert (input.size() == target.size());
assert (input.size() == oginput.length);
for (int j=0; j < oginput.length; j++) {
assert (oginput[j].equals (input.get(j).getText()));
assert (ogtags[j].equals (target.get(j).getText()));
}
}
}
public static Test suite ()
{
return new TestSuite (TestSGML2TokenSequence.class);
}
protected void setUp ()
{
}
public static void main (String[] args)
{
junit.textui.TestRunner.run (suite());
}
}