/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2011 Alex Buloichik
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.data;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.custommonkey.xmlunit.XMLUnit;
import org.junit.Before;
import org.junit.Rule;
import org.junit.rules.TestName;
import org.omegat.core.Core;
import org.omegat.core.segmentation.SRX;
import org.omegat.core.segmentation.Segmenter;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.IFilter;
import org.omegat.filters2.IParseCallback;
import org.omegat.filters2.ITranslateCallback;
import org.omegat.filters2.master.FilterMaster;
import org.omegat.filters2.text.TextFilter;
import org.omegat.util.TestPreferencesInitializer;
/**
* Base methods for TMX compliance tests.
*
* @author Alex Buloichik (alex73mail@gmail.com)
*/
public abstract class TmxComplianceBase {
static Pattern RE_SEG = Pattern.compile("(<seg>.+</seg>)");
protected File outFile;
@Rule
public TestName name = new TestName();
@Before
public final void setUp() throws Exception {
Core.setFilterMaster(new FilterMaster(FilterMaster.createDefaultFiltersConfig()));
Core.setSegmenter(new Segmenter(SRX.getDefault()));
TestPreferencesInitializer.init();
outFile = new File("build/testdata/" + getClass().getSimpleName() + "-" + name.getMethodName() + ".out");
outFile.getParentFile().mkdirs();
if (outFile.exists()) {
if (!outFile.delete()) {
throw new IOException("Can't remove " + outFile.getAbsolutePath());
}
}
}
protected void compareTexts(File f1, String charset1, File f2, String charset2) throws Exception {
List<String> lines1 = readTextFile(f1, charset1);
List<String> lines2 = readTextFile(f2, charset2);
assertEquals(lines1.size(), lines2.size());
for (int i = 0; i < lines1.size(); i++) {
assertEquals(lines1.get(i), lines2.get(i));
}
}
protected List<String> readTextFile(File f, String charset) throws Exception {
BufferedReader rd = new BufferedReader(new InputStreamReader(new FileInputStream(f), charset));
int ch;
// BOM (byte order mark) bugfix
rd.mark(1);
ch = rd.read();
if (ch != 0xFEFF) {
rd.reset();
}
List<String> result = new ArrayList<String>();
String s;
while ((s = rd.readLine()) != null) {
result.add(s);
}
rd.close();
return result;
}
protected void translateAndCheckTextUsingTmx(String fileTextIn, String inCharset, String fileTMX,
String fileTextOut, String outCharset, String sourceLang, String targetLang,
Map<String, TMXEntry> tmxPatch) throws Exception {
TextFilter f = new TextFilter();
Map<String, String> c = new TreeMap<String, String>();
c.put(TextFilter.OPTION_SEGMENT_ON, TextFilter.SEGMENT_BREAKS);
ProjectProperties props = new TestProjectProperties(sourceLang, targetLang);
translateUsingTmx(f, c, fileTextIn, inCharset, fileTMX, outCharset, props, tmxPatch);
compareTexts(new File("test/data/tmx/TMXComplianceKit/" + fileTextOut), outCharset, outFile,
outCharset);
}
protected void translateUsingTmx(IFilter filter, Map<String, String> config, final String fileTextIn,
String inCharset, String fileTMX, String outCharset, ProjectProperties props,
Map<String, TMXEntry> tmxPatch) throws Exception {
final ProjectTMX tmx = new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), new File("test/data/tmx/TMXComplianceKit/" + fileTMX),
orphanedCallback);
if (tmxPatch != null) {
tmx.defaults.putAll(tmxPatch);
}
FilterContext fc = new FilterContext(props);
fc.setInEncoding(inCharset);
fc.setOutEncoding(outCharset);
ITranslateCallback cb = new TranslateEntry(props) {
@Override
protected String getSegmentTranslation(String id, int segmentIndex, String segmentSource,
String prevSegment, String nextSegment, String path) {
TMXEntry e = tmx.getDefaultTranslation(segmentSource);
assertNotNull(e);
return e.translation;
}
@Override
String getCurrentFile() {
return fileTextIn;
}
};
filter.translateFile(new File("test/data/tmx/TMXComplianceKit/" + fileTextIn), outFile, config, fc,
cb);
}
protected List<String> loadTexts(final IFilter filter, final File sourceFile, final String inCharset,
final FilterContext context, final Map<String, String> config) throws Exception {
final List<String> result = new ArrayList<String>();
IParseCallback callback = new IParseCallback() {
public void addEntry(String id, String source, String translation, boolean isFuzzy,
String comment, IFilter filter) {
}
public void addEntry(String id, String source, String translation, boolean isFuzzy, String comment,
String path, IFilter filter, List<ProtectedPart> protectedParts) {
String[] props = comment == null ? null : new String[] { SegmentProperties.COMMENT, comment };
addEntryWithProperties(id, source, translation, isFuzzy, props, path, filter, protectedParts);
}
@Override
public void addEntryWithProperties(String id, String source, String translation,
boolean isFuzzy, String[] props, String path,
IFilter filter, List<ProtectedPart> protectedParts) {
result.addAll(Core.getSegmenter().segment(context.getSourceLang(), source, null, null));
}
public void linkPrevNextSegments() {
}
};
filter.parseFile(sourceFile, config, context, callback);
return result;
}
protected void align(IFilter filter, File sourceFile, String inCharset, File translatedFile,
String outCharset, ProjectProperties props) throws Exception {
FilterContext fc = new FilterContext(props);
fc.setInEncoding(inCharset);
fc.setOutEncoding(outCharset);
RealProject.AlignFilesCallback callback = new RealProject.AlignFilesCallback(props);
filter.alignFile(sourceFile, translatedFile, null, fc, callback);
ProjectTMX tmx = new ProjectTMX(props.getSourceLanguage(), props.getTargetLanguage(), props.isSentenceSegmentingEnabled(), outFile, orphanedCallback);
for (Map.Entry<String, TMXEntry> en : callback.data.entrySet()) {
tmx.defaults.put(en.getKey(), en.getValue());
}
tmx.exportTMX(props, outFile, false, false, true);
}
protected Set<String> readTmxSegments(File tmx) throws Exception {
BufferedReader rd = new BufferedReader(new InputStreamReader(new FileInputStream(tmx), "UTF-8"));
String s;
Set<String> entries = new TreeSet<String>();
while ((s = rd.readLine()) != null) {
Matcher m = RE_SEG.matcher(s);
if (m.find()) {
entries.add(m.group(1));
}
}
rd.close();
return entries;
}
protected void compareTMX(File orig, File created, int segmentsCount) throws Exception {
Set<String> tmxOrig = readTmxSegments(orig);
Set<String> tmxCreated = readTmxSegments(created);
assertEquals(segmentsCount, tmxCreated.size());
assertEquals(tmxOrig.size(), tmxCreated.size());
List<String> listOrig = new ArrayList<String>(tmxOrig);
List<String> listCreated = new ArrayList<String>(tmxCreated);
for (int i = 0; i < listOrig.size(); i++) {
XMLUnit.compareXML(listOrig.get(i), listCreated.get(i));
}
}
protected ProjectTMX.CheckOrphanedCallback orphanedCallback = new ProjectTMX.CheckOrphanedCallback() {
public boolean existSourceInProject(String src) {
return true;
}
public boolean existEntryInProject(EntryKey key) {
return true;
}
};
protected static class TestProjectProperties extends ProjectProperties {
public TestProjectProperties(String sourceLang, String targetLang) {
setSupportDefaultTranslations(true);
setSourceLanguage(sourceLang);
setTargetLanguage(targetLang);
}
}
}