/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2017 Aaron Madlon-Kay
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.core.data;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.omegat.core.Core;
import org.omegat.core.data.ParseEntry.ParseEntryResult;
import org.omegat.filters2.FilterContext;
import org.omegat.filters2.IFilter;
import org.omegat.filters2.IParseCallback;
import org.omegat.filters2.master.FilterMaster;
import org.omegat.util.Language;
import org.omegat.util.OConsts;
import org.omegat.util.Preferences;
import org.omegat.util.StringUtil;
import org.omegat.util.TMXProp;
import org.omegat.util.TMXReader2;
/**
* Common utility class for external TMs.
*
* @author Aaron Madlon-Kay
*
*/
public final class ExternalTMFactory {
private ExternalTMFactory() {
}
public static boolean isSupported(File file) {
return TMXLoader.isSupported(file) || BifileLoader.isSupported(file);
}
public static ExternalTMX load(File file) throws Exception {
ProjectProperties props = Core.getProject().getProjectProperties();
if (TMXLoader.isSupported(file)) {
return new TMXLoader(file)
.setExtTmxLevel2(Preferences.isPreference(Preferences.EXT_TMX_SHOW_LEVEL2))
.setUseSlash(Preferences.isPreference(Preferences.EXT_TMX_USE_SLASH))
.setDoSegmenting(props.isSentenceSegmentingEnabled())
.load(props.getSourceLanguage(), props.getTargetLanguage());
} else if (BifileLoader.isSupported(file)) {
return new BifileLoader(file).setRemoveTags(props.isRemoveTags())
.setRemoveSpaces(Core.getFilterMaster().getConfig().isRemoveSpacesNonseg())
.setDoSegmenting(props.isSentenceSegmentingEnabled())
.load(props.getSourceLanguage(), props.getTargetLanguage());
} else {
throw new IllegalArgumentException("Unsupported external TM type: " + file.getName());
}
}
public static final class TMXLoader {
public static boolean isSupported(File file) {
String name = file.getName().toLowerCase();
return name.endsWith(OConsts.TMX_EXTENSION) || name.endsWith(OConsts.TMX_GZ_EXTENSION);
}
private final File file;
private boolean extTmxLevel2;
private boolean useSlash;
private boolean doSegmenting;
public TMXLoader(File file) {
this.file = file;
}
public TMXLoader setExtTmxLevel2(boolean extTmxLevel2) {
this.extTmxLevel2 = extTmxLevel2;
return this;
}
public TMXLoader setUseSlash(boolean useSlash) {
this.useSlash = useSlash;
return this;
}
public TMXLoader setDoSegmenting(boolean doSegmenting) {
this.doSegmenting = doSegmenting;
return this;
}
public ExternalTMX load(Language sourceLang, Language targetLang) throws Exception {
return new ExternalTMX(file.getName(), loadImpl(sourceLang, targetLang));
}
private List<PrepareTMXEntry> loadImpl(Language sourceLang, Language targetLang) throws Exception {
List<PrepareTMXEntry> entries = new ArrayList<>();
TMXReader2.LoadCallback loader = new TMXReader2.LoadCallback() {
public boolean onEntry(TMXReader2.ParsedTu tu, TMXReader2.ParsedTuv tuvSource,
TMXReader2.ParsedTuv tuvTarget, boolean isParagraphSegtype) {
if (tuvSource == null) {
return false;
}
if (tuvTarget != null) {
// add only target Tuv
addTuv(tu, tuvSource, tuvTarget, isParagraphSegtype);
} else {
// add all non-source Tuv
for (int i = 0; i < tu.tuvs.size(); i++) {
if (tu.tuvs.get(i) != tuvSource) {
addTuv(tu, tuvSource, tu.tuvs.get(i), isParagraphSegtype);
}
}
}
return true;
}
private void addTuv(TMXReader2.ParsedTu tu, TMXReader2.ParsedTuv tuvSource,
TMXReader2.ParsedTuv tuvTarget, boolean isParagraphSegtype) {
String changer = StringUtil.nvl(tuvTarget.changeid, tuvTarget.creationid, tu.changeid,
tu.creationid);
String creator = StringUtil.nvl(tuvTarget.creationid, tu.creationid);
long changed = StringUtil.nvlLong(tuvTarget.changedate, tuvTarget.creationdate,
tu.changedate, tu.creationdate);
long created = StringUtil.nvlLong(tuvTarget.creationdate, tu.creationdate);
List<String> sources = new ArrayList<String>();
List<String> targets = new ArrayList<String>();
Core.getSegmenter().segmentEntries(doSegmenting && isParagraphSegtype, sourceLang,
tuvSource.text, targetLang, tuvTarget.text, sources, targets);
for (int i = 0; i < sources.size(); i++) {
PrepareTMXEntry te = new PrepareTMXEntry();
te.source = sources.get(i);
te.translation = targets.get(i);
te.changer = changer;
te.changeDate = changed;
te.creator = creator;
te.creationDate = created;
te.note = tu.note;
te.otherProperties = tu.props;
entries.add(te);
}
}
};
TMXReader2 reader = new TMXReader2();
reader.readTMX(file, sourceLang, targetLang, doSegmenting, false, extTmxLevel2, useSlash, loader);
return entries;
}
}
public static final class BifileLoader {
public static boolean isSupported(File file) {
FilterMaster fm = Core.getFilterMaster();
try {
return fm.isFileSupported(file, true) && fm.isBilingualFile(file);
} catch (Exception e) {
return false;
}
}
private final File file;
private boolean removeTags;
private boolean removeSpaces;
private boolean doSegmenting;
public BifileLoader(File file) {
this.file = file;
}
public BifileLoader setRemoveTags(boolean removeTags) {
this.removeTags = removeTags;
return this;
}
public BifileLoader setRemoveSpaces(boolean removeSpaces) {
this.removeSpaces = removeSpaces;
return this;
}
public BifileLoader setDoSegmenting(boolean doSegmenting) {
this.doSegmenting = doSegmenting;
return this;
}
public ExternalTMX load(Language sourceLang, Language targetLang) throws Exception {
return new ExternalTMX(file.getName(), loadImpl(sourceLang, targetLang));
}
private List<PrepareTMXEntry> loadImpl(Language sourceLang, Language targetLang) throws Exception {
List<PrepareTMXEntry> entries = new ArrayList<>();
ParseEntryResult throwaway = new ParseEntryResult();
Core.getFilterMaster().loadFile(file.getPath(),
new FilterContext(sourceLang, targetLang, true).setRemoveAllTags(removeTags),
new IParseCallback() {
@Override
public void linkPrevNextSegments() {
}
@Override
public void addEntry(String id, String source, String translation, boolean isFuzzy,
String comment, IFilter filter) {
process(source, translation, id, comment, null, null);
}
@Override
public void addEntry(String id, String source, String translation, boolean isFuzzy,
String comment, String path, IFilter filter,
List<ProtectedPart> protectedParts) {
process(source, translation, id, comment, path, null);
}
@Override
public void addEntryWithProperties(String id, String source, String translation,
boolean isFuzzy, String[] props, String path, IFilter filter,
List<ProtectedPart> protectedParts) {
process(source, translation, id, null, null, props);
}
private void process(String source, String target, String id, String comment,
String path, String[] props) {
if (source == null || target == null) {
return;
}
source = StringUtil.normalizeUnicode(
ParseEntry.stripSomeChars(source, throwaway, removeTags, removeSpaces));
target = StringUtil.normalizeUnicode(
ParseEntry.stripSomeChars(target, throwaway, removeTags, removeSpaces));
List<String> sources = new ArrayList<>();
List<String> targets = new ArrayList<>();
Core.getSegmenter().segmentEntries(doSegmenting, sourceLang, source, targetLang,
target, sources, targets);
if (sources.size() == targets.size()) {
for (int i = 0; i < sources.size(); i++) {
addImpl(sources.get(i), targets.get(i), id, comment, path, props);
}
} else {
addImpl(source, target, id, comment, path, props);
}
}
private void addImpl(String source, String target, String id, String comment,
String path, String[] props) {
if (!source.trim().isEmpty()) {
entries.add(makeEntry(source, target, id, comment, path, props));
}
}
});
return entries;
}
}
public static final class Builder {
private final String name;
private final List<PrepareTMXEntry> entries = new ArrayList<>();
public Builder(String name) {
this.name = name;
}
public void addEntry(String source, String target, String id, String path, String[] props) {
if (!source.trim().isEmpty()) {
entries.add(makeEntry(source, target, id, null, path, props));
}
}
public ExternalTMX done() {
return new ExternalTMX(name, entries);
}
}
private static PrepareTMXEntry makeEntry(String source, String target, String id, String comment, String path,
String[] props) {
PrepareTMXEntry entry = new PrepareTMXEntry();
entry.source = source;
entry.translation = target;
entry.note = comment;
if (props != null) {
List<TMXProp> tmxProps = propsToList(props);
if (id != null) {
tmxProps.add(new TMXProp("id", id));
}
if (path != null) {
tmxProps.add(new TMXProp("path", path));
}
entry.otherProperties = tmxProps;
if (entry.note == null) {
entry.note = SegmentProperties.getProperty(props, SegmentProperties.COMMENT);
}
}
return entry;
}
private static List<TMXProp> propsToList(String[] props) {
List<TMXProp> result = new ArrayList<>(props.length / 2);
for (int i = 0; i < props.length; i++) {
result.add(new TMXProp(props[i], props[++i]));
}
return result;
}
}