package edu.cmu.sphinx.linguist.language.ngram; import static edu.cmu.sphinx.util.LogMath.getLogMath; import static java.util.Arrays.asList; import static org.hamcrest.MatcherAssert.assertThat; import static org.hamcrest.Matchers.contains; import static org.hamcrest.Matchers.containsInAnyOrder; import static org.hamcrest.Matchers.equalTo; import java.io.File; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.List; import java.util.Scanner; import org.testng.annotations.BeforeClass; import org.testng.annotations.Test; import edu.cmu.sphinx.linguist.WordSequence; import edu.cmu.sphinx.linguist.acoustic.UnitManager; import edu.cmu.sphinx.linguist.dictionary.Dictionary; import edu.cmu.sphinx.linguist.dictionary.TextDictionary; public class DynamicTrigramModelTest { private Dictionary dictionary; @BeforeClass public void setUp() throws IOException { URL dictUrl = getClass() .getResource( "/edu/cmu/sphinx/models/en-us/cmudict-en-us.dict"); URL noiseDictUrl = getClass().getResource( "/edu/cmu/sphinx/models/en-us/en-us/noisedict"); dictionary = new TextDictionary(dictUrl, noiseDictUrl, null, null, new UnitManager()); dictionary.allocate(); } @Test public void unigramModel() throws IOException { DynamicTrigramModel model = new DynamicTrigramModel(dictionary); model.setText(asList("one")); model.allocate(); assertThat(model.getVocabulary(), contains("one")); assertThat(model.getProbability(new WordSequence(dictionary .getWord("one"))), equalTo(getLogMath().linearToLog(1.0/3))); } @Test public void bigramModel() throws IOException { DynamicTrigramModel model = new DynamicTrigramModel(dictionary); model.setText(asList("one", "two")); model.allocate(); assertThat(model.getVocabulary(), containsInAnyOrder("one", "two")); } @Test public void trigramModel() throws IOException { DynamicTrigramModel model = new DynamicTrigramModel(dictionary); model.setText(asList("one", "two", "three")); model.allocate(); assertThat(model.getVocabulary(), containsInAnyOrder("one", "two", "three")); } @Test(enabled = false) public void compareWithPrecomputed() throws ClassNotFoundException, IOException { DynamicTrigramModel model = new DynamicTrigramModel(dictionary); URL url = getClass().getResource("npr.transcript"); Scanner scanner = new Scanner(new File("../words")); List<String> words = new ArrayList<String>(); while (scanner.hasNext()) { words.add(scanner.next()); } scanner.close(); model.setText(words); model.allocate(); url = getClass().getResource("npr.lm"); SimpleNGramModel simpleModel = new SimpleNGramModel(url.getPath(), dictionary, 1.f, -1); model.allocate(); simpleModel.allocate(); assertThat(model.getVocabulary(), equalTo(simpleModel.getVocabulary())); for (WordSequence wordSequence : simpleModel.getNGrams()) { if (wordSequence.size() < 3) continue; System.err.println(wordSequence); assertThat(model.getProbability(wordSequence), equalTo(simpleModel.getProbability(wordSequence))); } } }