package edu.stanford.nlp.tagger.io;
import junit.framework.TestCase;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
import edu.stanford.nlp.ling.TaggedWord;
public class TSVTaggedFileReaderTest extends TestCase {
static final String TEST_FILE =
"A\t1\nB\t2\nC\t3\n\nD\t4\nE\t5\n\n\n\nF\t6\n\n\n";
File createFile(String data)
throws IOException
{
File file = File.createTempFile("TSVTaggedFileReaderTest", "txt");
FileWriter fout = new FileWriter(file);
fout.write(data);
fout.close();
return file;
}
File createTestFile()
throws IOException
{
return createFile(TEST_FILE);
}
File createBrokenFile()
throws IOException
{
// no tags
return createFile("A\nB\n\n");
}
TaggedFileRecord createRecord(File file, String extraArgs) {
String description = extraArgs + "format=TSV," + file;
Properties props = new Properties();
return TaggedFileRecord.createRecord(props, description);
}
public void testReadNormal()
throws IOException
{
File file = createTestFile();
TaggedFileRecord record = createRecord(file, "");
List<List<TaggedWord>> sentences = new ArrayList<List<TaggedWord>>();
for (List<TaggedWord> sentence : record.reader()) {
sentences.add(sentence);
}
assertEquals(3, sentences.size());
assertEquals(3, sentences.get(0).size());
assertEquals("A", sentences.get(0).get(0).word());
assertEquals("B", sentences.get(0).get(1).word());
assertEquals("C", sentences.get(0).get(2).word());
assertEquals("D", sentences.get(1).get(0).word());
assertEquals("E", sentences.get(1).get(1).word());
assertEquals("F", sentences.get(2).get(0).word());
assertEquals("1", sentences.get(0).get(0).tag());
assertEquals("2", sentences.get(0).get(1).tag());
assertEquals("3", sentences.get(0).get(2).tag());
assertEquals("4", sentences.get(1).get(0).tag());
assertEquals("5", sentences.get(1).get(1).tag());
assertEquals("6", sentences.get(2).get(0).tag());
}
public void testReadBackwards()
throws IOException
{
File file = createTestFile();
TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
List<List<TaggedWord>> sentences = new ArrayList<List<TaggedWord>>();
for (List<TaggedWord> sentence : record.reader()) {
sentences.add(sentence);
}
assertEquals(3, sentences.size());
assertEquals(3, sentences.get(0).size());
assertEquals("A", sentences.get(0).get(0).tag());
assertEquals("B", sentences.get(0).get(1).tag());
assertEquals("C", sentences.get(0).get(2).tag());
assertEquals("D", sentences.get(1).get(0).tag());
assertEquals("E", sentences.get(1).get(1).tag());
assertEquals("F", sentences.get(2).get(0).tag());
assertEquals("1", sentences.get(0).get(0).word());
assertEquals("2", sentences.get(0).get(1).word());
assertEquals("3", sentences.get(0).get(2).word());
assertEquals("4", sentences.get(1).get(0).word());
assertEquals("5", sentences.get(1).get(1).word());
assertEquals("6", sentences.get(2).get(0).word());
}
public void testError()
throws IOException
{
File file = createBrokenFile();
TaggedFileRecord record = createRecord(file, "tagColumn=0,wordColumn=1,");
try {
for (List<TaggedWord> sentence : record.reader()) {
throw new AssertionError("Should have thrown an error " +
" reading a file with no tags");
}
} catch (IllegalArgumentException e) {
// yay
}
}
}