package edu.stanford.nlp.ling; import edu.stanford.nlp.simple.Sentence; import junit.framework.TestCase; import java.util.ArrayList; import java.util.List; /** * Tests the static methods that turn sentences (lists of Labels) * into strings. * * @author John Bauer */ public class SentenceTest extends TestCase { private String[] words = {"This", "is", "a", "test", "."}; private String[] tags = {"A", "B", "C", "D", "E"}; private String expectedValueOnly = "This is a test ."; private String expectedTagged = "This_A is_B a_C test_D ._E"; private String separator = "_"; @Override public void setUp() { assertEquals(words.length, tags.length); } public void testCoreLabelListToString() { List<CoreLabel> clWords = new ArrayList<>(); List<CoreLabel> clValues = new ArrayList<>(); List<CoreLabel> clWordTags = new ArrayList<>(); List<CoreLabel> clValueTags = new ArrayList<>(); for (int i = 0; i < words.length; ++i) { CoreLabel cl = new CoreLabel(); cl.setWord(words[i]); clWords.add(cl); cl = new CoreLabel(); cl.setValue(words[i]); clValues.add(cl); cl = new CoreLabel(); cl.setWord(words[i]); cl.setTag(tags[i]); clWordTags.add(cl); cl = new CoreLabel(); cl.setValue(words[i]); cl.setTag(tags[i]); clValueTags.add(cl); } assertEquals(expectedValueOnly, SentenceUtils.listToString(clWords, true)); assertEquals(expectedValueOnly, SentenceUtils.listToString(clValues, true)); assertEquals(expectedTagged, SentenceUtils.listToString(clWordTags, false, separator)); assertEquals(expectedTagged, SentenceUtils.listToString(clValueTags, false, separator)); } public void testTaggedWordListToString() { List<TaggedWord> tagged = new ArrayList<>(); for (int i = 0; i < words.length; ++i) { tagged.add(new TaggedWord(words[i], tags[i])); } assertEquals(expectedValueOnly, SentenceUtils.listToString(tagged, true)); assertEquals(expectedTagged, SentenceUtils.listToString(tagged, false, separator)); } /** * Serializing a raw sentence shouldn't make it an order of magnitude larger than * the raw text. */ public void testTokenizedSentenceSize() { String text = "one two three four five"; byte[] sentenceArray = new Sentence(text).serialize().toByteArray(); byte[] textArray = text.getBytes(); assertTrue(sentenceArray.length < textArray.length * 10); } }