/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.corpus.lexprob; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.util.Collections; import java.util.Date; import joshua.corpus.AlignedParallelCorpus; import joshua.corpus.Corpus; import joshua.corpus.CorpusArray; import joshua.corpus.LabeledSpan; import joshua.corpus.ParallelCorpus; import joshua.corpus.Span; import joshua.corpus.alignment.Alignments; import joshua.corpus.suffix_array.HierarchicalPhrase; import joshua.corpus.suffix_array.HierarchicalPhrases; import joshua.corpus.suffix_array.Pattern; import joshua.corpus.suffix_array.SuffixArray; import joshua.corpus.suffix_array.SuffixArrayFactory; import joshua.corpus.vocab.SymbolTable; import joshua.corpus.vocab.Vocabulary; import joshua.prefix_tree.PrefixTree; import joshua.util.Counts; import org.testng.Assert; import org.testng.annotations.Test; /** * Unit tests for LexProbs class. * * TODO This class needs to be extended to add more unit tests that test for proper NULL alignment behavior. * * @author Lane Schwartz */ public class BetterLexProbsTest { // ä == \u00E4 // ü == \u00FC LexProbs lexProbs; Vocabulary sourceVocab, targetVocab; Alignments alignmentArray; CorpusArray sourceCorpusArray; CorpusArray targetCorpusArray; ParallelCorpus parallelCorpus; @Test public void setupCorpus() throws IOException { // Set System.out and System.err to use the provided character encoding try { System.setOut(new PrintStream(System.out, true, "UTF8")); System.setErr(new PrintStream(System.err, true, "UTF8")); } catch (UnsupportedEncodingException e1) { System.err.println("UTF8 is not a valid encoding; using system default encoding for System.out and System.err."); } catch (SecurityException e2) { System.err.println("Security manager is configured to disallow changes to System.out or System.err; using system default encoding."); } String sourceCorpusString = "it makes him and it mars him , it sets him on yet it takes him off ." + "\n" + "resumption of the session ." + "\n" + "of the session" + "\n" + "of the session" + "\n" + "thunder ; lightning" + "\n" + "; blither blather ;"; String sourceFileName; { File sourceFile = File.createTempFile("source", new Date().toString()); PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8"); sourcePrintStream.println(sourceCorpusString); sourcePrintStream.close(); sourceFileName = sourceFile.getAbsolutePath(); } String targetCorpusString = "das macht ihn und es besch\u00E4digt ihn , es setzt ihn auf und es f\u00FChrt ihn aus ." + "\n" + "wiederaufnahme der sitzungsperiode ." + "\n" + "von dem sitzung" + "\n" + "von dem sitzung" + "\n" + "blitzen" + "\n" + "; ;"; String targetFileName; { File targetFile = File.createTempFile("target", new Date().toString()); PrintStream targetPrintStream = new PrintStream(targetFile, "UTF-8"); targetPrintStream.println(targetCorpusString); targetPrintStream.close(); targetFileName = targetFile.getAbsolutePath(); } String alignmentString = "0-0 1-1 2-2 3-3 4-4 5-5 6-6 7-7 8-8 9-9 10-10 11-11 12-12 13-13 14-14 15-15 16-16 17-17" + "\n" + "0-0 1-1 2-1 3-2 4-3" + "\n" + "0-0 1-1 2-2" + "\n" + "0-0 1-1 2-2" + "\n" + "0-0 2-0" + "\n" + "0-0 3-1"; String alignmentFileName; { File alignmentFile = File.createTempFile("alignment", new Date().toString()); PrintStream alignmentPrintStream = new PrintStream(alignmentFile); alignmentPrintStream.println(alignmentString); alignmentPrintStream.close(); alignmentFileName = alignmentFile.getAbsolutePath(); } this.sourceCorpusArray = SuffixArrayFactory.createCorpusArray(sourceFileName); this.sourceVocab = (Vocabulary) sourceCorpusArray.getVocabulary(); SuffixArray sourceSuffixArray = SuffixArrayFactory.createSuffixArray(sourceCorpusArray, SuffixArray.DEFAULT_CACHE_CAPACITY); this.targetCorpusArray = SuffixArrayFactory.createCorpusArray(targetFileName); this.targetVocab = (Vocabulary) targetCorpusArray.getVocabulary(); SuffixArray targetSuffixArray = SuffixArrayFactory.createSuffixArray(targetCorpusArray, SuffixArray.DEFAULT_CACHE_CAPACITY); this.alignmentArray = SuffixArrayFactory.createAlignments(alignmentFileName, sourceSuffixArray, targetSuffixArray); this.parallelCorpus = new AlignedParallelCorpus(sourceCorpusArray, targetCorpusArray, alignmentArray); // { // public Alignments getAlignments() { return alignmentArray; } // public int getNumSentences() { return sourceCorpusArray.getNumSentences(); } // public Corpus getSourceCorpus() { return sourceCorpusArray; } // public Corpus getTargetCorpus() { return targetCorpusArray; } // }; } @Test(dependsOnMethods={"setupCorpus"}) public void verifyCorpusCounts() { this.lexProbs = new LexProbs(parallelCorpus, Float.MIN_VALUE); Counts<Integer,Integer> counts = lexProbs.getCounts(); Assert.assertEquals(counts.getCount(sourceVocab.getID(";"), targetVocab.getID(";")), 2); Assert.assertEquals(counts.getCount(sourceVocab.getID(";"), null), 1); } @Test(dependsOnMethods={"setupCorpus","verifyCorpusCounts"}) public void setup() { // System.err.println("Completed setup!"); // SuffixArrayFactory.createLexicalProbabilities(parallelCorpus); // new SampledLexProbs(Integer.MAX_VALUE, sourceSuffixArray, targetSuffixArray, alignmentArray, Cache.DEFAULT_CAPACITY, false); } @Test(dependsOnMethods={"setup"}) public void verifyTargetVocabulary() { // "das macht ihn und es besch\u00E4digt ihn , es setzt ihn auf und es f\u00FChrt ihn aus ." + "\n" + //"wiederaufnahme der sitzungsperiode ." + "\n" + //"von dem sitzung" + "\n" + //"von dem sitzung"; Assert.assertEquals(targetVocab.getWord(targetVocab.getID("das")), "das"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("macht")), "macht"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("ihn")), "ihn"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("und")), "und"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("es")), "es"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("setzt")), "setzt"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID(",")), ","); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("auf")), "auf"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("und")), "und"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("aus")), "aus"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID(".")), "."); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("wiederaufnahme")), "wiederaufnahme"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("der")), "der"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("sitzungsperiode")), "sitzungsperiode"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("von")), "von"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("dem")), "dem"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("sitzung")), "sitzung"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("f\u00FChrt")), "f\u00FChrt"); Assert.assertEquals(targetVocab.getWord(targetVocab.getID("besch\u00E4digt")), "besch\u00E4digt"); } @Test(dependsOnMethods={"setup"}) public void testAlignmentPoints() { for (int i=0; i<18; i++) { int[] sourceIndices = alignmentArray.getAlignedSourceIndices(i); Assert.assertNotNull(sourceIndices); Assert.assertEquals(sourceIndices.length, 1); Assert.assertEquals(sourceIndices[0], i); int[] targetIndices = alignmentArray.getAlignedTargetIndices(i); Assert.assertNotNull(targetIndices); Assert.assertEquals(targetIndices.length, 1); Assert.assertEquals(targetIndices[0], i); } } @Test(dependsOnMethods={"setup"}) public void calculateLexProbs() { // Pair<Float,Float> results; // "it makes him and it mars him , it sets him on yet it takes him off ."; // "das macht ihn und es besch\u00E4digt ihn , es setzt ihn auf und es f\u00FChrt ihn aus ." int phraseIndex = 0; { HierarchicalPhrases phrases = getSourcePhrase("it", 0, 1); HierarchicalPhrase targetPhrase = getTargetPhrase("das", 0, 1); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(it | das) Assert.assertEquals(targetGivenSource, 0.25f);// lex P(das | it) } { HierarchicalPhrases phrases = getSourcePhrase("makes", 1, 2); HierarchicalPhrase targetPhrase = getTargetPhrase("macht", 1, 2); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(makes | macht) Assert.assertEquals(targetGivenSource, 1.0f);// lex P(macht | makes) } { HierarchicalPhrases phrases = getSourcePhrase("him", 2, 3); HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 2, 3); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("and", 3, 4); HierarchicalPhrase targetPhrase = getTargetPhrase("und", 3, 4); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 0.5f); // P(and | und) Assert.assertEquals(targetGivenSource, 1.0f);// P(und | and) } { HierarchicalPhrases phrases = getSourcePhrase("it", 4, 5); HierarchicalPhrase targetPhrase = getTargetPhrase("es", 4, 5); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(it | es) Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it) } { HierarchicalPhrases phrases = getSourcePhrase("mars", 5, 6); HierarchicalPhrase targetPhrase = getTargetPhrase("besch\u00E4digt", 5, 6); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("him", 6, 7); HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 6, 7); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase(",", 7, 8); HierarchicalPhrase targetPhrase = getTargetPhrase(",", 7, 8); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("it", 8, 9); HierarchicalPhrase targetPhrase = getTargetPhrase("es", 8, 9); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(it | es) Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it) } { HierarchicalPhrases phrases = getSourcePhrase("sets", 9, 10); HierarchicalPhrase targetPhrase = getTargetPhrase("setzt", 9, 10); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("him", 10, 11); HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 10, 11); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("on", 11, 12); HierarchicalPhrase targetPhrase = getTargetPhrase("auf", 11, 12); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("yet", 12, 13); HierarchicalPhrase targetPhrase = getTargetPhrase("und", 12, 13); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 0.5f); // P(yet | und) Assert.assertEquals(targetGivenSource, 1.0f);// P(und | yet) } { HierarchicalPhrases phrases = getSourcePhrase("it", 13, 14); HierarchicalPhrase targetPhrase = getTargetPhrase("es", 13, 14); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); // lex P(it | es) Assert.assertEquals(targetGivenSource, 0.75f);// lex P(es | it) } { HierarchicalPhrases phrases = getSourcePhrase("takes", 14, 15); HierarchicalPhrase targetPhrase = getTargetPhrase("f\u00FChrt", 14, 15); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("him", 15, 16); HierarchicalPhrase targetPhrase = getTargetPhrase("ihn", 15, 16); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase("off", 16, 17); HierarchicalPhrase targetPhrase = getTargetPhrase("aus", 16, 17); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } { HierarchicalPhrases phrases = getSourcePhrase(".", 17, 18); HierarchicalPhrase targetPhrase = getTargetPhrase(".", 17, 18); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 1.0f); Assert.assertEquals(targetGivenSource, 1.0f); } /////////// { HierarchicalPhrases phrases = getSourcePhrase("yet it", 12, 14); HierarchicalPhrase targetPhrase = getTargetPhrase("und es", 12, 14); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 0.5f * 1.0f); // lex P(yet it | und es) Assert.assertEquals(targetGivenSource, 1.0f * 0.75f);// lex P(und es | yet it) } /////////// { HierarchicalPhrases phrases = getSourcePhrase("of the session", 19, 22); HierarchicalPhrase targetPhrase = getTargetPhrase("der sitzungsperiode", 19, 21); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 0.5f * 0.5f * 1.0f); // lex P(of the session | der sitzungsperiode) Assert.assertEquals(targetGivenSource, 0.5f*((1.0f/3.0f) + (1.0f/3.0f)) * (1.0f/3.0f));// lex P(der sitzungsperiode | of the session) } { HierarchicalPhrases phrases = getSourcePhrase("thunder ; lightning", 29, 32); HierarchicalPhrase targetPhrase = getTargetPhrase("blitzen", 28, 29); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); Assert.assertEquals(sourceGivenTarget, 0.5f * (1.0f/3.0f) * 0.5f); // lex P(thunder ; lightning | blitzen) Assert.assertEquals(targetGivenSource, ((1.0f/2.0f) * (1.0f + 1.0f)));// lex P(blitzen | thunder ; lightning) } } /** * Unit test to verify correct calculation of * lexical translation probabilities for phrases with gaps. */ @Test(dependsOnMethods={"setup"}) public void calculateHieroLexProbs() { Pattern pattern = new Pattern(sourceVocab, sourceVocab.getID("it"), SymbolTable.X, sourceVocab.getID("and"), sourceVocab.getID("it")); int[] terminalSequenceStartIndices = {0,3}; // int[] terminalSequenceEndIndices = {1,5}; int phraseIndex = 0; int[] sentenceNumbers = {0}; // HierarchicalPhrase phrase = new HierarchicalPhrase( // pattern, // terminalSequenceStartIndices, // terminalSequenceEndIndices, // sourceCorpusArray, // terminalSequenceEndIndices[terminalSequenceEndIndices.length-1] - terminalSequenceStartIndices[0]); HierarchicalPhrases phrases = new HierarchicalPhrases(pattern, terminalSequenceStartIndices, sentenceNumbers); // Pair<Float,Float> results; // "it makes him and it mars him , it sets him on yet it takes him off ."; // "das macht ihn und es besch\u00E4digt ihn , es setzt ihn auf und es f\u00FChrt ihn aus ." int[] targetWords = { targetVocab.getID("das"), SymbolTable.X, targetVocab.getID("und"), targetVocab.getID("es") }; HierarchicalPhrase targetPhrase = new HierarchicalPhrase( targetWords, new Span(0,5), Collections.<LabeledSpan>emptyList(), targetCorpusArray); // results = lexProbs.calculateLexProbs(phrases, phraseIndex, targetPhrase); float sourceGivenTarget = lexProbs.lexProbSourceGivenTarget(phrases, phraseIndex, targetPhrase); float targetGivenSource = lexProbs.lexProbTargetGivenSource(phrases, phraseIndex, targetPhrase); // Assert.assertNotNull(results); Assert.assertEquals(sourceGivenTarget, 1.0f * 0.5f * 1.0f); // lex P(it X and it | das X und es) Assert.assertEquals(targetGivenSource, 0.25f * 1.0f * 0.75f);// lex P(das X und es | it X and it) } private HierarchicalPhrases getSourcePhrase(String sourcePhrase, int startIndex, int endIndex) { Pattern pattern = new Pattern(sourceVocab, sourceVocab.getIDs(sourcePhrase)); int[] terminalSequenceStartIndices = {startIndex}; int[] sentenceNumbers = {0}; HierarchicalPhrases phrases = new HierarchicalPhrases(pattern, terminalSequenceStartIndices, sentenceNumbers); return phrases; } private HierarchicalPhrase getTargetPhrase(String targetPhrase, int startIndex, int endIndex) { return new HierarchicalPhrase( targetVocab.getIDs(targetPhrase), new Span(startIndex,endIndex), Collections.<LabeledSpan>emptyList(), targetCorpusArray); } @Test(dependsOnMethods={"setup"}) public void testSourceGivenTargetString() { // In this example, English is the source & German is the target Assert.assertEquals(lexProbs.sourceGivenTarget(",", ","), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(".", "."), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("on", "auf"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("off", "aus"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("mars", "besch\u00E4digt"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("it", "das"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("it", "es"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("takes", "f\u00FChrt"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("him", "ihn"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("makes", "macht"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("sets", "setzt"), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget("and", "und"), 0.5f); Assert.assertEquals(lexProbs.sourceGivenTarget("yet", "und"), 0.5f); Assert.assertEquals(lexProbs.sourceGivenTarget(";", null), (1.0f/3.0f)); } @Test(dependsOnMethods={"setup"}) public void testSourceGivenTargetStringUnaligned() { // In this example, English is the source & German is the target float floorProbability = lexProbs.getFloorProbability(); Assert.assertEquals(lexProbs.sourceGivenTarget(",", "."), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget(".", ","), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("on", "aoeuaeoa"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("off", "das"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("mars", "das"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("it", "besch\u00E4digt"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("it", "f\u00FChrt"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("takes", "."), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("him", "es"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("makes", ","), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("sets", "."), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("and", "es"), floorProbability); Assert.assertEquals(lexProbs.sourceGivenTarget("yet", "das"), floorProbability); } @Test(dependsOnMethods={"setup"}) public void testSourceGivenTarget() { // In this example, English is the source & German is the target Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID(","), targetVocab.getID(",")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("."), targetVocab.getID(".")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("on"), targetVocab.getID("auf")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("off"), targetVocab.getID("aus")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("mars"), targetVocab.getID("besch\u00E4digt")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("it"), targetVocab.getID("das")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("it"), targetVocab.getID("es")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("takes"), targetVocab.getID("f\u00FChrt")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("him"), targetVocab.getID("ihn")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("makes"), targetVocab.getID("macht")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("sets"), targetVocab.getID("setzt")), 1.0f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("and"), targetVocab.getID("und")), 0.5f); Assert.assertEquals(lexProbs.sourceGivenTarget(sourceVocab.getID("yet"), targetVocab.getID("und")), 0.5f); } @Test(dependsOnMethods={"setup"}) public void testTargetGivenSourceString() { Assert.assertEquals(lexProbs.targetGivenSource(",", ","), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(".", "."), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("und", "and"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("ihn", "him"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("das", "it"), 0.25f); Assert.assertEquals(lexProbs.targetGivenSource("es", "it"), 0.75f); Assert.assertEquals(lexProbs.targetGivenSource("macht", "makes"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("besch\u00E4digt", "mars"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("aus", "off"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("auf", "on"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("setzt", "sets"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("f\u00FChrt", "takes"), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource("und", "yet"), 1.0f); } @Test(dependsOnMethods={"setup"}) public void testTargetGivenSource() { Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID(","), sourceVocab.getID(",")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("."), sourceVocab.getID(".")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("und"), sourceVocab.getID("and")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("ihn"), sourceVocab.getID("him")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("das"), sourceVocab.getID("it")), 0.25f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("es"), sourceVocab.getID("it")), 0.75f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("macht"), sourceVocab.getID("makes")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("besch\u00E4digt"), sourceVocab.getID("mars")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("aus"), sourceVocab.getID("off")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("auf"), sourceVocab.getID("on")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("setzt"), sourceVocab.getID("sets")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("f\u00FChrt"), sourceVocab.getID("takes")), 1.0f); Assert.assertEquals(lexProbs.targetGivenSource(targetVocab.getID("und"), sourceVocab.getID("yet")), 1.0f); } }