/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2008-2013 Alex Buloichik
2015 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.junit.Assert.fail;
import java.io.File;
import java.net.URISyntaxException;
import java.nio.file.Files;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.apache.commons.io.FileUtils;
import org.junit.Before;
import org.junit.Test;
import org.omegat.core.Core;
import org.omegat.core.data.IProject;
import org.omegat.core.data.SourceTextEntry;
import org.omegat.core.statistics.StatCount;
import org.omegat.core.statistics.StatisticsSettings;
import org.omegat.filters2.ITranslateCallback;
import org.omegat.filters2.TranslationException;
import org.omegat.filters3.xml.xliff.XLIFFDialect;
import org.omegat.filters3.xml.xliff.XLIFFFilter;
import org.omegat.filters3.xml.xliff.XLIFFOptions;
import org.omegat.util.PatternConsts;
import org.omegat.util.Preferences;
import org.omegat.util.StaticUtils;
import org.xml.sax.SAXException;
public class XLIFFFilterTest extends TestFilterBase {
XLIFFFilter filter;
@Before
public final void setUp() {
filter = new XLIFFFilter();
XLIFFDialect dialect = (XLIFFDialect) filter.getDialect();
dialect.defineDialect(new XLIFFOptions(new TreeMap<String, String>()));
}
@Test
public void testParse() throws Exception {
parse(filter, "test/data/filters/xliff/file-XLIFFFilter.xlf");
}
@Test
public void testTranslate() throws Exception {
translateXML(filter, "test/data/filters/xliff/file-XLIFFFilter.xlf");
translateXML(filter, "test/data/filters/xliff/file-XLIFFFilter-SMP.xlf");
}
@Test
public void testLoad() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter.xlf";
IProject.FileInfo fi = loadSourceFiles(filter, f);
checkMultiStart(fi, f);
checkMulti("tr1=This is test", null, null, "", "tr2=test2", null);
checkMulti("tr2=test2", null, null, "tr1=This is test", "", null);
checkMultiEnd();
}
@Test
public void testTags() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-tags.xlf";
IProject.FileInfo fi = loadSourceFiles(filter, f);
SourceTextEntry ste;
checkMultiStart(fi, f);
checkMultiNoPrevNext("Link to <m0>http://localhost</m0>.", null, null, null); // #1988732
checkMultiNoPrevNext("About <b0>Gandalf</b0>", null, null, "7"); // #1988732
checkMultiNoPrevNext("<i0>Tags</i0> translation zz<i1>2</i1>z <b2>-NONTRANSLATED", null, null, null);
checkMultiNoPrevNext("one <a0> two </b1> three <c2> four </d3> five", null, null, null);
ste = checkMultiNoPrevNext("About <m0>Gandalf</m0> and <m1>other</m1>.", null, null, null);
assertEquals(3, ste.getProtectedParts().length);
assertEquals("<m0>Gandalf</m0>", ste.getProtectedParts()[0].getTextInSourceSegment());
assertEquals("<mrk mtype=\"protected\">Gandalf</mrk>",
ste.getProtectedParts()[0].getDetailsFromSourceFile());
assertEquals("Gandalf", ste.getProtectedParts()[0].getReplacementMatchCalculation());
assertEquals("<m1>", ste.getProtectedParts()[1].getTextInSourceSegment());
assertEquals("<mrk mtype=\"other\">", ste.getProtectedParts()[1].getDetailsFromSourceFile());
assertEquals(StaticUtils.TAG_REPLACEMENT, ste.getProtectedParts()[1].getReplacementMatchCalculation());
assertEquals("</m1>", ste.getProtectedParts()[2].getTextInSourceSegment());
assertEquals("</mrk>", ste.getProtectedParts()[2].getDetailsFromSourceFile());
assertEquals(StaticUtils.TAG_REPLACEMENT, ste.getProtectedParts()[2].getReplacementMatchCalculation());
checkMultiNoPrevNext("one <o0>two</o0> three", null, null, null);
checkMultiNoPrevNext("one <t0/> three", null, null, null);
checkMultiNoPrevNext("one <w0/> three", null, null, null);
checkMultiNoPrevNext("Nested tags: before <g0><g1><x2/></g1></g0> after", null, null, null);
checkMultiNoPrevNext("<m0>Check protected-only tag reading</m0>", null, null, null);
checkMultiEnd();
File inFile = new File("test/data/filters/xliff/file-XLIFFFilter-tags.xlf");
filter.translateFile(inFile, outFile, new TreeMap<String, String>(), context,
new ITranslateCallback() {
public String getTranslation(String id, String source, String path) {
return source.replace("NONTRANSLATED", "TRANSLATED");
}
public String getTranslation(String id, String source) {
return source.replace("NONTRANSLATED", "TRANSLATED");
}
public void linkPrevNextSegments() {
}
public void setPass(int pass) {
}
});
File trFile = new File(outFile.getPath() + "-translated");
List<String> lines = Files.lines(inFile.toPath()).map(line -> line.replace("NONTRANSLATED", "TRANSLATED"))
.collect(Collectors.toList());
Files.write(trFile.toPath(), lines);
compareXML(trFile, outFile);
}
@Test
public void testTagOptimization() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-tags-optimization.xlf";
Core.getFilterMaster().getConfig().setRemoveTags(false);
IProject.FileInfo fi = loadSourceFiles(filter, f);
checkMultiStart(fi, f);
checkMultiNoPrevNext("<b0>The text of a segment<b1>.<b2>", null, null, null);
checkMultiNoPrevNext("<b0>The text of a segment<b1>.<b2><b3><b4>", null, null, null);
checkMultiNoPrevNext("<b0>Link to a <a1>reference</a1></b0>", null, null, null);
checkMultiEnd();
translateXML(filter, f);
Core.getFilterMaster().getConfig().setRemoveTags(true);
fi = loadSourceFiles(filter, f);
checkMultiStart(fi, f);
checkMultiNoPrevNext("The text of a segment<b0>.", null, null, null);
checkMultiNoPrevNext("The text of a segment<b0>.", null, null, null);
checkMultiNoPrevNext("Link to a <a0>reference</a0>", null, null, null);
checkMultiEnd();
translateXML(filter, f);
}
@Test
public void testStatCounting() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-statcount.xlf";
StatisticsSettings.setCountingProtectedText(true);
StatisticsSettings.setCountingCustomTags(true);
IProject.FileInfo fi = loadSourceFiles(filter, f);
StatCount counts = new StatCount(fi.entries.get(0));
assertEquals(4, counts.words);
}
@Test
public void testStatCountingNoProtectedText() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-statcount.xlf";
StatisticsSettings.setCountingProtectedText(false);
StatisticsSettings.setCountingCustomTags(true);
IProject.FileInfo fi = loadSourceFiles(filter, f);
StatCount counts = new StatCount(fi.entries.get(0));
assertEquals(2, counts.words);
}
@Test
public void testStatCountingNoCustomTags() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-statcount.xlf";
StatisticsSettings.setCountingProtectedText(true);
StatisticsSettings.setCountingCustomTags(false);
Preferences.setPreference(Preferences.CHECK_CUSTOM_PATTERN, "CUSTOM");
PatternConsts.updatePlaceholderPattern();
IProject.FileInfo fi = loadSourceFiles(filter, f);
StatCount counts = new StatCount(fi.entries.get(0));
assertEquals(3, counts.words);
}
/*
* Test that an XLIFF file containing an invalid character (in this case
* U+0008) will cause the parser to die with a SAXParseException. This isn't
* actually important in and of itself; we wouldn't mind if the parser was
* lenient because we filter bad XML characters out on our own later. This
* is just necessary to set a baseline for testInvalidXMLOnWeirdPath().
*/
@Test
public void testInvalidXML() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-invalid-content.xlf";
try {
loadSourceFiles(filter, f);
fail("Should have died due to invalid XML character");
} catch (TranslationException ex) {
assertTrue(wasCausedBy(ex, SAXException.class));
}
}
/*
* Issue reported by Jean-Christophe Helary: When a file with invalid
* content is on a path that contains both spaces and "non-path" characters,
* a URISyntaxException was reported about the path instead of the
* SAXParseException about the file content.
*
* This may only fail with a particular underlying parser implementation, as
* it depends on a particular codepath in
* com.sun.org.apache.xerces.internal.impl.XMLEntityManager and
* com.sun.org.apache.xerces.internal.util.URI where it tries to be lenient
* in its acceptance of not-quite-valid URIs as system IDs.
*/
@Test
public void testInvalidXMLOnWeirdPath() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-invalid-content.xlf";
File tmpDir = Files.createTempDirectory("omegat").toFile();
assertTrue(tmpDir.isDirectory());
File weirdDir = new File(tmpDir, "a b\u2603"); // U+2603 SNOWMAN
File testFile = new File(weirdDir, "file-XLIFFFilter-invalid-content.xlf");
FileUtils.copyFile(new File(f), testFile);
assertTrue(testFile.isFile());
try {
loadSourceFiles(filter, testFile.getAbsolutePath());
fail("Should have died due to invalid XML character");
} catch (TranslationException ex) {
assertTrue(wasCausedBy(ex, SAXException.class));
assertFalse(wasCausedBy(ex, URISyntaxException.class));
}
FileUtils.deleteDirectory(tmpDir);
}
private static boolean wasCausedBy(Throwable ex, Class<?> cls) {
Throwable cause = ex.getCause();
if (cause == null) {
return false;
} else if (cause.getClass().equals(cls)) {
return true;
} else {
return wasCausedBy(cause, cls);
}
}
@Test
public void testProperties() throws Exception {
String f = "test/data/filters/xliff/file-XLIFFFilter-properties.xlf";
IProject.FileInfo fi = loadSourceFiles(filter, f);
// Check reading as properties. We don't really care about the order of the content in the parsed
// properties array (as long as the key=value pairs are consistent), so we do lose checking.
checkMultiStart(fi, f);
checkMultiProps("tr1=This is test", null, null, "", "tr2=test2", "note", "foo", "group", "bazinga");
checkMultiProps("tr2=test2", null, null, "tr1=This is test", "", "note", "bar", "resname", "baz",
"group", "bazinga");
checkMultiEnd();
// Check reading as old comment string blobs. We don't really care about the order of the content in
// the parsed properties array, but the way the test currently works, it will break if the order
// changes.
checkMultiStart(fi, f);
checkMulti("tr1=This is test", null, null, "", "tr2=test2", "foo\nbazinga");
checkMulti("tr2=test2", null, null, "tr1=This is test", "", "bar\nbazinga\nbaz");
checkMultiEnd();
}
}