/* This file is part of the Joshua Machine Translation System. * * Joshua is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 * of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free * Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, * MA 02111-1307 USA */ package joshua.decoder; import java.io.File; import java.io.IOException; import java.io.PrintStream; import java.util.Date; import java.util.Scanner; import joshua.corpus.Corpus; import joshua.corpus.alignment.AlignmentGrids; import joshua.corpus.suffix_array.Compile; import joshua.corpus.suffix_array.SuffixArrayFactory; import joshua.corpus.vocab.Vocabulary; import joshua.prefix_tree.ExtractRules; import org.testng.Assert; import org.testng.annotations.Test; /** * Unit tests for decoder thread. * * @author Lane Schwartz * @version $LastChangedDate: 2009-08-28 11:02:40 -0500 (Fri, 28 Aug 2009) $ */ public class DecoderThreadTest { @Test public void setup() { String[] sourceSentences = { "a b c d", "a b c d", "a b c d" }; String[] targetSentences = { "w x y z", "w t u v", "s x y z" }; String[] alignmentLines = { "0-0 1-1 2-2 3-3", "0-0 1-1 2-2 3-3", "0-0 1-1 2-2 3-3" }; String[] testSentences = { "a b c" }; try { // Set up source corpus File sourceFile = File.createTempFile("source", new Date().toString()); PrintStream sourcePrintStream = new PrintStream(sourceFile, "UTF-8"); for (String sentence : sourceSentences) { sourcePrintStream.println(sentence); } sourcePrintStream.close(); String sourceCorpusFileName = sourceFile.getAbsolutePath(); Vocabulary symbolTable = new Vocabulary(); int[] sourceLengths = Vocabulary.initializeVocabulary(sourceCorpusFileName, symbolTable, true); Assert.assertEquals(sourceLengths.length, 2); int numberOfSentences = sourceLengths[1]; Corpus sourceCorpus = SuffixArrayFactory.createCorpusArray(sourceCorpusFileName, symbolTable, sourceLengths[0], sourceLengths[1]); // Set up target corpus File targetFile = File.createTempFile("target", new Date().toString()); PrintStream targetPrintStream = new PrintStream(targetFile, "UTF-8"); for (String sentence : targetSentences) { targetPrintStream.println(sentence); } targetPrintStream.close(); String targetCorpusFileName = targetFile.getAbsolutePath(); int[] targetLengths = Vocabulary.initializeVocabulary(targetCorpusFileName, symbolTable, true); Assert.assertEquals(targetLengths.length, sourceLengths.length); for (int i=0, n=targetLengths.length; i<n; i++) { Assert.assertEquals(targetLengths[i], sourceLengths[i]); } Corpus targetCorpus = SuffixArrayFactory.createCorpusArray(targetCorpusFileName, symbolTable, targetLengths[0], targetLengths[1]); // Construct alignments data structure File alignmentsFile = File.createTempFile("alignments", new Date().toString()); PrintStream alignmentsPrintStream = new PrintStream(alignmentsFile, "UTF-8"); for (String sentence : alignmentLines) { alignmentsPrintStream.println(sentence); } alignmentsPrintStream.close(); String alignmentFileName = alignmentsFile.getAbsolutePath(); AlignmentGrids grids = new AlignmentGrids( new Scanner(alignmentsFile), sourceCorpus, targetCorpus, numberOfSentences); // Set up test corpus File testFile = File.createTempFile("test", new Date().toString()); PrintStream testPrintStream = new PrintStream(testFile, "UTF-8"); for (String sentence : testSentences) { testPrintStream.println(sentence); } testPrintStream.close(); String testFileName = testFile.getAbsolutePath(); // Filename of the extracted rules file. String rulesFileName; { File rulesFile = File.createTempFile("rules", new Date().toString()); rulesFileName = rulesFile.getAbsolutePath(); } String joshDirName; { File joshDir = File.createTempFile(new Date().toString(), "josh"); joshDirName = joshDir.getAbsolutePath(); joshDir.delete(); } Compile compileJoshDir = new Compile(); compileJoshDir.setSourceCorpus(sourceCorpusFileName); compileJoshDir.setTargetCorpus(targetCorpusFileName); compileJoshDir.setAlignments(alignmentFileName); compileJoshDir.setOutputDir(joshDirName); compileJoshDir.execute(); ExtractRules extractRules = new ExtractRules(); extractRules.setJoshDir(joshDirName); extractRules.setTestFile(testFileName); extractRules.setOutputFile(rulesFileName); extractRules.execute(); } catch (IOException e) { Assert.fail("Unable to write temporary file. " + e.toString()); } catch (ClassNotFoundException e) { Assert.fail("Unable to extract rules. " + e.toString()); } } @Test public void basicSuffixArrayGrammar() { // Write configuration to temp file on disk // String configFile; // JoshuaDecoder decoder = // JoshuaDecoder.getUninitalizedDecoder(configFile); } }