/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2010 Alex Buloichik Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.util; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertNull; import java.io.File; import java.io.FileOutputStream; import java.util.Map; import java.util.TreeMap; import org.apache.commons.io.ByteOrderMark; import org.apache.commons.io.IOUtils; import org.junit.Test; import org.omegat.core.TestCore; /** * @author Alex Buloichik */ public class TMXReaderTest extends TestCore { protected File outFile = new File(System.getProperty("java.io.tmpdir"), "OmegaT test - " + getClass().getSimpleName()); @Test public void testLeveL1() throws Exception { final Map<String, String> tr = new TreeMap<String, String>(); new TMXReader2().readTMX(new File("test/data/tmx/test-level1.tmx"), new Language("en-US"), new Language("be"), false, false, false, false, new TMXReader2.LoadCallback() { public boolean onEntry(TMXReader2.ParsedTu tu, TMXReader2.ParsedTuv tuvSource, TMXReader2.ParsedTuv tuvTarget, boolean isParagraphSegtype) { tr.put(tuvSource.text, tuvTarget.text); return true; } }); assertEquals("betuv", tr.get("entuv")); assertEquals("tr1", tr.get("lang1")); assertEquals("tr2", tr.get("lang2")); assertEquals("tr3", tr.get("lang3")); } @Test public void testLeveL2() throws Exception { final Map<String, String> tr = new TreeMap<String, String>(); new TMXReader2().readTMX(new File("test/data/tmx/test-level2.tmx"), new Language("en-US"), new Language("be"), false, false, true, false, new TMXReader2.LoadCallback() { public boolean onEntry(TMXReader2.ParsedTu tu, TMXReader2.ParsedTuv tuvSource, TMXReader2.ParsedTuv tuvTarget, boolean isParagraphSegtype) { tr.put(tuvSource.text, tuvTarget.text); return true; } }); assertEquals("betuv", tr.get("entuv")); assertEquals("tr", tr.get("2 <a0> zz <t1>xx</t1>")); assertEquals("tr", tr.get("3 <n0>xx</n0>")); } @Test public void testInvalidTMX() throws Exception { final Map<String, String> tr = new TreeMap<String, String>(); new TMXReader2().readTMX(new File("test/data/tmx/invalid.tmx"), new Language("en"), new Language("be"), false, false, true, false, new TMXReader2.LoadCallback() { public boolean onEntry(TMXReader2.ParsedTu tu, TMXReader2.ParsedTuv tuvSource, TMXReader2.ParsedTuv tuvTarget, boolean isParagraphSegtype) { tr.put(tuvSource.text, tuvTarget.text); return true; } }); } @Test public void testSMP() throws Exception { final Map<String, String> tr = new TreeMap<String, String>(); new TMXReader2().readTMX(new File("test/data/tmx/test-SMP.tmx"), new Language("en"), new Language("be"), false, false, true, false, new TMXReader2.LoadCallback() { public boolean onEntry(TMXReader2.ParsedTu tu, TMXReader2.ParsedTuv tuvSource, TMXReader2.ParsedTuv tuvTarget, boolean isParagraphSegtype) { tr.put(tuvSource.text, tuvTarget.text); return true; } }); assertFalse(tr.isEmpty()); // Assert contents are {"ABC": "DEF"} where letters are MATHEMATICAL BOLD CAPITALs (U+1D400-) assertEquals("\uD835\uDC03\uD835\uDC04\uD835\uDC05", tr.get("\uD835\uDC00\uD835\uDC01\uD835\uDC02")); } @Test public void testGetTuvByLang() { TMXReader2.ParsedTuv tuvBE = new TMXReader2.ParsedTuv(); tuvBE.lang = "be"; TMXReader2.ParsedTuv tuvFR = new TMXReader2.ParsedTuv(); tuvFR.lang = "FR"; TMXReader2.ParsedTuv tuvFRCA = new TMXReader2.ParsedTuv(); tuvFRCA.lang = "FR-CA"; TMXReader2.ParsedTuv tuvFRFR = new TMXReader2.ParsedTuv(); tuvFRFR.lang = "FR-FR"; TMXReader2.ParsedTuv tuvENGB = new TMXReader2.ParsedTuv(); tuvENGB.lang = "EN-GB"; TMXReader2 tmx = new TMXReader2(); tmx.currentTu.tuvs.add(tuvBE); tmx.currentTu.tuvs.add(tuvFR); tmx.currentTu.tuvs.add(tuvFRCA); tmx.currentTu.tuvs.add(tuvFRFR); tmx.currentTu.tuvs.add(tuvENGB); assertEquals(tmx.getTuvByLang(new Language("BE")), tuvBE); assertEquals(tmx.getTuvByLang(new Language("BE-NN")), tuvBE); assertNotNull(tmx.getTuvByLang(new Language("FR"))); assertEquals(tmx.getTuvByLang(new Language("FR-CA")), tuvFRCA); assertEquals(tmx.getTuvByLang(new Language("FR-NN")), tuvFR); assertEquals(tmx.getTuvByLang(new Language("EN")), tuvENGB); assertEquals(tmx.getTuvByLang(new Language("EN-CA")), tuvENGB); assertNull(tmx.getTuvByLang(new Language("ZZ"))); } @Test public void testCharset() throws Exception { File xml = new File("build/testdata/test.xml"); xml.getParentFile().mkdirs(); testXml(xml, ByteOrderMark.UTF_8, "<?xml version=\"1.0\"?>", "UTF-8"); testXml(xml, ByteOrderMark.UTF_16LE, "<?xml version=\"1.0\"?>", "UTF-16LE"); testXml(xml, ByteOrderMark.UTF_16BE, "<?xml version=\"1.0\"?>", "UTF-16BE"); testXml(xml, ByteOrderMark.UTF_32LE, "<?xml version=\"1.0\"?>", "UTF-32LE"); testXml(xml, ByteOrderMark.UTF_32BE, "<?xml version=\"1.0\"?>", "UTF-32BE"); testXml(xml, null, "<?xml version=\"1.0\" encoding=\"UTF-8\"?>", "UTF-8"); testXml(xml, null, "<?xml version=\"1.0\" encoding=\"ISO-8859-1\"?>", "ISO-8859-1"); } void testXml(File xml, ByteOrderMark bom, String text, String charset) throws Exception { FileOutputStream out = new FileOutputStream(xml); try { if (bom != null) { out.write(bom.getBytes()); } out.write(text.getBytes(charset)); } finally { IOUtils.closeQuietly(out); } assertEquals(charset, TMXReader2.detectCharset(xml)); } }