package edu.cmu.sphinx.alignment; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.equalTo; import java.util.List; import org.testng.annotations.BeforeMethod; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import edu.cmu.sphinx.util.Utilities; public class UsEnglishWordExpanderTest { private static final Object[][] TEST_DATA = { {"# . no, $ convertion.", ". no $ convertion"}, {"1, 2 3", "one two three"}, {"the answer is 42,", "the answer is forty two"}, {"587", "five hundred eighty seven"}, {"1903", "one thousand nine hundred three"}, {"12011", "twelve thousand eleven"}, {"126166", "one hundred twenty six thousand one hundred sixty six"}, {"9 3/4", "nine and three fourth 's"}, {"October 1st", "october first"}, {"May the 4th be with you", "may the fourth be with you"}, {"7-11", "seven to eleven"}, {"12, 35", "twelve thirty five"}, {"146%", "one hundred forty six percent"}, {"320'000", "three hundred twenty thousand"}, {"120,000", "one hundred twenty thousand"}, {"$35,000", "thirty five thousand dollars"}, {"$1000000", "one million dollars"}, {"U.S. economy", "u s economy"}, {"sweet home Greenbow, AL.", "sweet home greenbow alabama"}, {"Henry I", "henry the first"}, {"Chapter XVII", "chapter seventeen"}, {"don't, doesn't, won't, can't", "don't doesn't won't can't"}, {"I've we've", "i've we've"}, {"I've we've it's", "i've we've it's"}, {"Classics of 80s", "classics of eighties"}, {"In 1880s", "in eighteen eighties"}, {"Mulholland Dr.", "mulholland drive"}, {"dr. Jekyll and Mr. Hyde.", "doctor jekyll and mister hyde"}, {"Mr. & Mrs. smith", "mister and missus smith"}, {"St. Louis Cardinals", "saint louis cardinals"}, {"St. Elmo's fire", "saint elmo's fire"}, {"elm st.", "elm street"},}; private TextTokenizer expander; @BeforeMethod public void setupMethod() { expander = new USEnglishTokenizer(); } @DataProvider(name = "data") public Object[][] getData() { return TEST_DATA; } @Test(dataProvider = "data") public void textToWords(String text, String expanded) { List<String> tokens = expander.expand(text); assertThat(Utilities.join(tokens), equalTo(expanded)); } }