package ivory.core.util; import java.io.File; import java.io.IOException; import junit.framework.Assert; import junit.framework.TestCase; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.junit.Test; import edu.umd.hooka.Vocab; import edu.umd.hooka.alignment.HadoopAlign; import edu.umd.hooka.ttables.TTable_monolithic_IFAs; public class CLIRUtilsTest extends TestCase { @Test public void testGIZA(){ String srcVocabFile = "etc/toy_vocab.de-en.de"; String trgVocabFile = "etc/toy_vocab.de-en.en"; String ttableFile = "etc/toy_ttable.de-en"; Configuration conf = new Configuration(); try { CLIRUtils.createTTableFromGIZA( "etc/toy_lex.0-0.f2n", srcVocabFile, trgVocabFile, ttableFile, 0.9f, 15, FileSystem.getLocal(conf)); } catch (IOException e) { e.printStackTrace(); } try { Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf)); Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf)); TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true); assertTrue(ttable.getMaxE()==srcVocab.size()-1); int src = srcVocab.get("buch"); int trg = trgVocab.get("book"); assertTrue("Giza_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); src = srcVocab.get("krankenhaus"); trg = trgVocab.get("hospit"); assertTrue("Giza_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } } @Test public void testGIZA2(){ String srcVocabFile = "etc/vocab.en-de.en"; String trgVocabFile = "etc/vocab.en-de.de"; String ttableFile = "etc/ttable.en-de"; Configuration conf = new Configuration(); try { CLIRUtils.createTTableFromGIZA( "etc/toy_lex.0-0.n2f", srcVocabFile, trgVocabFile, ttableFile, 0.9f, 15, FileSystem.getLocal(conf)); } catch (IOException e) { e.printStackTrace(); } try { Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf)); Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf)); TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true); assertTrue(ttable.getMaxE()==srcVocab.size()-1); int src = srcVocab.get("book"); int trg = trgVocab.get("buch"); assertTrue("Giza_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); src = srcVocab.get("hospit"); trg = trgVocab.get("krankenhaus"); assertTrue("Giza_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } new File(srcVocabFile).delete(); new File(trgVocabFile).delete(); new File(ttableFile).delete(); } @Test public void testBerkeleyAligner(){ String srcVocabFile = "etc/vocab.de-en.de"; String trgVocabFile = "etc/vocab.de-en.en"; String ttableFile = "etc/ttable.de-en"; Configuration conf = new Configuration(); try { CLIRUtils.createTTableFromBerkeleyAligner( "etc/toy_stage2.2.params.txt", srcVocabFile, trgVocabFile, ttableFile, 0.9f, 15, FileSystem.getLocal(conf)); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } try { Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf)); Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf)); TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true); assertTrue(ttable.getMaxE()==srcVocab.size()-1); int src = srcVocab.get("buch"); int trg = trgVocab.get("book"); assertTrue("Berk_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); src = srcVocab.get("krankenhaus"); trg = trgVocab.get("hospit"); assertTrue("Berk_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } new File(srcVocabFile).delete(); new File(trgVocabFile).delete(); new File(ttableFile).delete(); } @Test public void testBerkeleyAligner2(){ String srcVocabFile = "etc/vocab.en-de.en"; String trgVocabFile = "etc/vocab.en-de.de"; String ttableFile = "etc/ttable.en-de"; Configuration conf = new Configuration(); try { CLIRUtils.createTTableFromBerkeleyAligner( "etc/toy_stage2.1.params.txt", srcVocabFile, trgVocabFile, ttableFile, 0.9f, 15, FileSystem.getLocal(conf)); } catch (IOException e) { e.printStackTrace(); } try { Vocab srcVocab = HadoopAlign.loadVocab(new Path(srcVocabFile), FileSystem.getLocal(conf)); Vocab trgVocab = HadoopAlign.loadVocab(new Path(trgVocabFile), FileSystem.getLocal(conf)); TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(ttableFile), true); assertTrue(ttable.getMaxE()==srcVocab.size()-1); int src = srcVocab.get("book"); int trg = trgVocab.get("buch"); assertTrue("Berk_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); src = srcVocab.get("hospit"); trg = trgVocab.get("krankenhaus"); assertTrue("Berk_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } new File(srcVocabFile).delete(); new File(trgVocabFile).delete(); new File(ttableFile).delete(); } @Test public void testHooka(){ String finalSrcVocabFile = "etc/toy_vocab.de-en.de"; String finalTrgVocabFile = "etc/toy_vocab.de-en.en"; String finalTTableFile = "etc/toy_ttable.de-en"; Configuration conf = new Configuration(); try { CLIRUtils.createTTableFromHooka( "etc/toy_vocab.de-en.de.raw", "etc/toy_vocab.de-en.en.raw", "etc/toy_ttable.de-en.raw", finalSrcVocabFile, finalTrgVocabFile, finalTTableFile, 0.9f, 15, FileSystem.getLocal(conf)); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } try { Vocab srcVocab = HadoopAlign.loadVocab(new Path(finalSrcVocabFile), FileSystem.getLocal(conf)); Vocab trgVocab = HadoopAlign.loadVocab(new Path(finalTrgVocabFile), FileSystem.getLocal(conf)); TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(finalTTableFile), true); assertTrue(ttable.getMaxE()==srcVocab.size()-1); int src = srcVocab.get("buch"); int trg = trgVocab.get("book"); assertTrue("Hooka_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); src = srcVocab.get("krankenhaus"); trg = trgVocab.get("hospit"); assertTrue("Hooka_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } } @Test public void testHooka2(){ String finalSrcVocabFile = "etc/toy_vocab.en-de.en"; String finalTrgVocabFile = "etc/toy_vocab.en-de.de"; String finalTTableFile = "etc/toy_ttable.en-de"; Configuration conf = new Configuration(); try { CLIRUtils.createTTableFromHooka( "etc/toy_vocab.en-de.en.raw", "etc/toy_vocab.en-de.de.raw", "etc/toy_ttable.en-de.raw", finalSrcVocabFile, finalTrgVocabFile, finalTTableFile, 0.9f, 15, FileSystem.getLocal(conf)); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } try { Vocab srcVocab = HadoopAlign.loadVocab(new Path(finalSrcVocabFile), FileSystem.getLocal(conf)); Vocab trgVocab = HadoopAlign.loadVocab(new Path(finalTrgVocabFile), FileSystem.getLocal(conf)); TTable_monolithic_IFAs ttable = new TTable_monolithic_IFAs(FileSystem.getLocal(conf), new Path(finalTTableFile), true); assertTrue(ttable.getMaxE()==srcVocab.size()-1); int src = srcVocab.get("book"); int trg = trgVocab.get("buch"); assertTrue("Hooka_case1 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); src = srcVocab.get("hospit"); trg = trgVocab.get("krankenhaus"); assertTrue("Hooka_case2 --> "+ttable.get(src, trg), ttable.get(src, trg) > 0.3); } catch (IOException e) { e.printStackTrace(); Assert.fail(); } } }