/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2011 Alex Buloichik Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.core.data; import static org.junit.Assert.assertEquals; import java.io.File; import java.util.List; import java.util.Map; import java.util.TreeMap; import org.junit.Ignore; import org.junit.Test; import org.omegat.filters2.FilterContext; import org.omegat.filters2.html2.HTMLFilter2; import org.omegat.filters2.html2.HTMLOptions; import org.omegat.filters2.rc.RcFilter; import org.omegat.filters2.text.bundles.ResourceBundleFilter; /** * TMX Compliance tests as described on http://www.localization.org/fileadmin/standards/tmx1.4/comp.htm * * The Level 1 Compliance verifies mostly TMX structure, white spaces handling * and how the application deals with non-ASCII characters and special characters * in XML such as '<', or '&', XML syntax, encodings, and so forth. * * The Level 2 compliance verifies mostly how the application deals with content * markup. To qualify for Level 2 compliance, the application must also qualify * for Level 1 compliance. * * @author Alex Buloichik (alex73mail@gmail.com) */ public class TmxComplianceTests extends TmxComplianceBase { /** * Test Import1A - Internal Classic White Spaces. */ @Test public void testImport1A() throws Exception { translateAndCheckTextUsingTmx("ImportTest1A.txt", "UTF-8", "ImportTest1A.tmx", "ImportTest1A_fr-ca.txt", "UTF-8", "EN-US", "FR-CA", null); } /** * Test Import1B - XML Syntax. */ @Test public void testImport1B() throws Exception { translateAndCheckTextUsingTmx("ImportTest1B.txt", "UTF-8", "ImportTest1B.tmx", "ImportTest1B_fr-ca.txt", "UTF-8", "EN-US", "FR-CA", null); } /** * Test Import1C - Multiple Languages */ @Test public void testImport1C() throws Exception { translateAndCheckTextUsingTmx("ImportTest1C.txt", "UTF-8", "ImportTest1C.tmx", "ImportTest1C_fr-ca.txt", "UTF-8", "EN-US", "FR-CA", null); } /** * Test Import1D - UTF-8 with BOM */ @Test public void testImport1D() throws Exception { translateAndCheckTextUsingTmx("ImportTest1D.txt", "UTF-8", "ImportTest1D.tmx", "ImportTest1D_en-gb.txt", "UTF-8", "EN-US", "EN-GB", null); } /** * Test Import1E - UTF-8 without BOM */ @Test public void testImport1E() throws Exception { translateAndCheckTextUsingTmx("ImportTest1E.txt", "UTF-8", "ImportTest1E.tmx", "ImportTest1E_en-gb.txt", "UTF-8", "EN-US", "EN-GB", null); } /** * Test Import1F - UTF-16 LE */ @Test public void testImport1F() throws Exception { translateAndCheckTextUsingTmx("ImportTest1F.txt", "UTF-8", "ImportTest1F.tmx", "ImportTest1F_en-gb.txt", "UTF-8", "EN-US", "EN-GB", null); } /** * Test Import1F - UTF-16 BE */ @Test public void testImport1G() throws Exception { translateAndCheckTextUsingTmx("ImportTest1G.txt", "UTF-8", "ImportTest1G.tmx", "ImportTest1G_en-gb.txt", "UTF-8", "EN-US", "EN-GB", null); } /** * Test Import1H - ASCII */ @Test public void testImport1H() throws Exception { translateAndCheckTextUsingTmx("ImportTest1H.txt", "UTF-8", "ImportTest1H.tmx", "ImportTest1H_en-gb.txt", "UTF-8", "EN-US", "EN-GB", null); } /** * Test Import1I - Internal Asian White Spaces */ @Test public void testImport1I() throws Exception { translateAndCheckTextUsingTmx("ImportTest1I.txt", "UTF-8", "ImportTest1I.tmx", "ImportTest1I_ja-jp.txt", "UTF-16LE", "EN-US", "JA-JP", null); } /** * Test Import1J - Language Selection */ @Test public void testImport1J() throws Exception { ProjectProperties props = new TestProjectProperties("EN-US", "EN-GB"); final ProjectTMX tmx = new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), new File("test/data/tmx/TMXComplianceKit/ImportTest1J_many.tmx"), orphanedCallback); tmx.exportTMX(props, outFile, false, false, false); // TODO validate via TMXCheck } /** * Test Import1K - No Import */ @Ignore @Test public void testImport1K() throws Exception { ProjectProperties props = new TestProjectProperties("EN-US", "EN-GB"); final ProjectTMX tmx = new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), new File("test/data/tmx/TMXComplianceKit/ImportTest1K.tmx"), orphanedCallback); assertEquals(0, tmx.defaults.size()); } /** * Test Import1L - All Elements and Attributes */ @Test public void testImport1L() throws Exception { // TODO } /** * Test Export1A - RC */ @Test public void testExport1A() throws Exception { File tmxFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1A.tmx"); File sourceFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1A.rc"); File translatedFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1A_fr.rc"); ProjectProperties props = new TestProjectProperties("EN-US", "FR-CA"); RcFilter filter = new RcFilter(); align(filter, sourceFile, "windows-1252", translatedFile, "windows-1252", props); compareTMX(tmxFile, outFile, 6); } /** * Test Export1B - HTML */ @Test public void testExport1B() throws Exception { File tmxFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1B.tmx"); File sourceFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1B.htm"); File translatedFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1B_fr.htm"); ProjectProperties props = new TestProjectProperties("EN-US", "FR-CA"); FilterContext fc = new FilterContext(props); fc.setInEncoding("windows-1252"); Map<String, String> config = new TreeMap<String, String>(); new HTMLOptions(config).setSkipMeta("content=en-us,content=fr-ca"); List<String> sources = loadTexts(new HTMLFilter2(), sourceFile, null, fc, config); List<String> translations = loadTexts(new HTMLFilter2(), translatedFile, null, fc, config); assertEquals(sources.size(), translations.size()); ProjectTMX tmx = new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), outFile, orphanedCallback); for (int i = 0; i < sources.size(); i++) { tmx.defaults.put(sources.get(i), createTMXEntry(sources.get(i), translations.get(i), true)); } tmx.exportTMX(props, outFile, false, false, true); compareTMX(tmxFile, outFile, 2); } /** * Test Export1C - Java Properties */ @Test public void testExport1C() throws Exception { File tmxFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1C.tmx"); File sourceFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1C.properties"); File translatedFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1C_fr.properties"); ProjectProperties props = new TestProjectProperties("EN-US", "FR-CA"); align(new ResourceBundleFilter(), sourceFile, null, translatedFile, null, props); compareTMX(tmxFile, outFile, 6); } /** * Test Export1D - Portable Object */ @Test public void testExport1D() throws Exception { /* * Test data contains .po files, which doesn't compliance with PO * specification * (https://www.gnu.org/savannah-checkouts/gnu/gettext/ * manual/html_node/PO-Files.html). By the specification, msgid * should contain "untranslated-string", but in the ExportTest1D.po * file it contains ID. */ // File tmxFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1D.tmx"); // File sourceFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1D.po"); // File translatedFile = new File("test/data/tmx/TMXComplianceKit/ExportTest1D_fr.po"); // // ProjectProperties props = new TestProjectProperties("EN-US", "FR-CA"); // // align(new PoFilter(), sourceFile, "iso-8859-1", translatedFile, "iso-8859-1", props); // // compareTMX(tmxFile, outFile, 8); } /** * Test Export1E - XLIFF */ @Test public void testExport1E() throws Exception { // TODO } /** * Test Import2A - Content Markup Syntax in HTML. * * TEST CHANGED FROM TMX COMPLIANCE PACK BECAUSE WE HAVE OTHER SEGMENTATION SETTINGS, i.e. "Picture: <img * src="img.png"/>" should be processed as one segment by TMX compliance tests, but it's not a one segment * by OmegaT segmentation. Since it out of scope of testing, we patch tmx for runtime-only. */ @Test public void testImport2A() throws Exception { ProjectProperties props = new TestProjectProperties("EN-US", "FR-CA"); props.setSentenceSegmentingEnabled(true); Map<String, String> config = new TreeMap<String, String>(); config.put(HTMLOptions.OPTION_TRANSLATE_SRC, "false"); config.put(HTMLOptions.OPTION_SKIP_META, "true"); Map<String, TMXEntry> fix = new TreeMap<String, TMXEntry>(); fix.put("Picture:", createTMXEntry("Picture:", "Image:", true)); translateUsingTmx(new HTMLFilter2(), config, "ImportTest2A.htm", "UTF-8", "ImportTest2A.tmx", "windows-1252", props, fix); List<String> lines1 = readTextFile(new File("test/data/tmx/TMXComplianceKit/ImportTest2A_fr-ca.htm"), "windows-1252"); List<String> lines2 = readTextFile(outFile, "windows-1252"); // fix meta line, since OmegaT writes own meta line for encoding lines2.set(2, "<meta content=\"text/html; charset=windows-1252\" http-equiv=\"Content-Type\">"); assertEquals(lines1.size(), lines2.size()); for (int i = 0; i < lines1.size(); i++) { // HTML spec allows unescaped U+0022 QUOTE MARK (outside of attribute values); // we produce unescaped but the test assumes escaped, so we normalize for comparison purposes. String line1 = normalize(lines1.get(i)); String line2 = normalize(lines2.get(i)); assertEquals(line1, line2); } } private String normalize(String str) { return str.replace(""", "\""); } /** * Test Import2B - Content Markup Syntax in RTF */ @Test public void testImport2B() throws Exception { // RTF not supported } /** * Test Import2C - All Elements and Attributes */ @Test public void testImport2C() throws Exception { // TODO } /** * Test Export2A - HTML */ @Test public void testExport2A() throws Exception { File tmxFile = new File("test/data/tmx/TMXComplianceKit/ExportTest2A.tmx"); File sourceFile = new File("test/data/tmx/TMXComplianceKit/ExportTest2A.htm"); File translatedFile = new File("test/data/tmx/TMXComplianceKit/ExportTest2A_fr.htm"); ProjectProperties props = new TestProjectProperties("EN-US", "FR-CA"); props.setSentenceSegmentingEnabled(true); FilterContext fc = new FilterContext(props); fc.setInEncoding("windows-1252"); Map<String, String> config = new TreeMap<String, String>(); new HTMLOptions(config).setSkipMeta("content=en-us,content=fr-ca"); List<String> sources = loadTexts(new HTMLFilter2(), sourceFile, null, fc, config); List<String> translations = loadTexts(new HTMLFilter2(), translatedFile, null, fc, config); assertEquals(sources.size(), translations.size()); ProjectTMX tmx = new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), outFile, orphanedCallback); for (int i = 0; i < sources.size(); i++) { tmx.defaults.put(sources.get(i), createTMXEntry(sources.get(i), translations.get(i), true)); } tmx.exportTMX(props, outFile, false, true, true); compareTMX(tmxFile, outFile, 12); } TMXEntry createTMXEntry(String source, String translation, boolean def) { PrepareTMXEntry tr = new PrepareTMXEntry(); tr.source = source; tr.translation = translation; return new TMXEntry(tr, def, null); } }