/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk 2007-2010 Didier Briel 2013 Alex Buloichik, Didier Briel, Piotr Kulik 2014 Didier Briel, Aaron Madlon-Kay, Piotr Kulik Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.filters3.xml.xliff; import java.util.List; import org.omegat.core.data.ProtectedPart; import org.omegat.core.statistics.StatisticsSettings; import org.omegat.filters3.Attributes; import org.omegat.filters3.Element; import org.omegat.filters3.Tag; import org.omegat.filters3.xml.DefaultXMLDialect; import org.omegat.filters3.xml.XMLContentBasedTag; import org.omegat.filters3.xml.XMLText; import org.omegat.filters3.xml.xliff.XLIFFOptions.ID_TYPE; import org.omegat.util.InlineTagHandler; import org.omegat.util.StaticUtils; import org.omegat.util.StringUtil; /** * This class specifies XLIFF XML Dialect. * * XLIFF 1.2 specification: * http://docs.oasis-open.org/xliff/xliff-core/xliff-core.html * * @author Didier Briel * @author Alex Buloichik (alex73mail@gmail.com) * @author Piotr Kulik * @author Aaron Madlon-Kay */ public class XLIFFDialect extends DefaultXMLDialect { private boolean forceShortCutToF; private boolean ignoreTypeForPhTags; private boolean ignoreTypeForBptTags; /** * Sets whether alternative translations are identified by previous and next paragraphs or by <trans-unit> ID */ protected ID_TYPE altTransIDType; public XLIFFDialect() { } /** * Actually defines the dialect. It cannot be done during creation, because * options are not known at that step. */ public void defineDialect(XLIFFOptions options) { defineParagraphTags(new String[] { "source", "target", }); defineOutOfTurnTags(new String[] { "sub", }); if (options.get26Compatibility()) { // Old tag handling compatible with 2.6 defineIntactTags(new String[] { "source", "header", "bin-unit", "prop-group", "count-group", "alt-trans", "note", "ph", "bpt", "ept", "it", "context", "seg-source", "sdl:seg-defs"}); } else { // New tag handling defineIntactTags(new String[] { "source", "header", "bin-unit", "prop-group", "count-group", "alt-trans", "note", "context", "seg-source", "sdl:seg-defs"}); defineContentBasedTag("bpt", Tag.Type.BEGIN); defineContentBasedTag("ept", Tag.Type.END); defineContentBasedTag("it", Tag.Type.ALONE); defineContentBasedTag("ph", Tag.Type.ALONE); // "mrk", only <mrk mtype="protected"> is content-based tag. see validateContentBasedTag forceShortCutToF = options.getForceShortcutToF(); ignoreTypeForPhTags = options.getIgnoreTypeForPhTags(); ignoreTypeForBptTags = options.getIgnoreTypeForBptTags(); altTransIDType = options.getAltTransIDType(); } } /** * In the XLIFF filter, the tag <mrk> is a preformat tag when the * attribute "mtype" contains "seg". * * @param tag * An XML tag * @param atts * The attributes associated with the tag * @return <code>true</code> if this tag should be a preformat tag, * <code>false</code> otherwise */ @Override public Boolean validatePreformatTag(String tag, Attributes atts) { if (!tag.equalsIgnoreCase("mrk")) { return false; } if (atts != null) { if ("seg".equalsIgnoreCase(atts.getValueByName("mtype"))) { return true; } } return false; } /** * In the XLKIFF filter, content shouldn't be translated if translate="no" * http://docs.oasis-open.org/xliff/v1.2/os/xliff-core.html#translate * @param tag * An XML tag * @param atts * The attributes associated with the tag * @return <code>false</code> if the content of this tag should be * translated, <code>true</code> otherwise */ @Override public Boolean validateIntactTag(String tag, Attributes atts) { if (!tag.equalsIgnoreCase("group") // Translate can only appear in these tags && !tag.equalsIgnoreCase("trans-unit") && !tag.equalsIgnoreCase("bin-unit")) { return false; } if (atts != null) { if ("no".equalsIgnoreCase(atts.getValueByName("translate"))) { return true; } } return false; } @Override public Boolean validateContentBasedTag(String tag, Attributes atts) { return "mrk".equals(tag) && atts != null && "protected".equals(atts.getValueByName("mtype")); } @Override public String constructShortcuts(List<Element> elements, List<ProtectedPart> protectedParts) { protectedParts.clear(); // create shortcuts InlineTagHandler tagHandler = new InlineTagHandler(); StringBuilder r = new StringBuilder(); for (Element el : elements) { if (el instanceof XMLContentBasedTag) { XMLContentBasedTag tag = (XMLContentBasedTag) el; String shortcut = null; int shortcutLetter; int tagIndex; boolean tagProtected; if ("bpt".equals(tag.getTag())) { // XLIFF specification requires 'rid' and 'id' attributes, // but some tools uses 'i' attribute like for TMX tagHandler.startBPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i")); shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForBptTags); tagHandler.setTagShortcutLetter(shortcutLetter); tagIndex = tagHandler.endBPT(); shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>'; tagProtected = false; } else if ("ept".equals(tag.getTag())) { tagHandler.startEPT(tag.getAttribute("rid"), tag.getAttribute("id"), tag.getAttribute("i")); tagIndex = tagHandler.endEPT(); shortcutLetter = tagHandler.getTagShortcutLetter(); shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>'; tagProtected = false; } else if ("it".equals(tag.getTag())) { tagHandler.startOTHER(); tagHandler.setCurrentPos(tag.getAttribute("pos")); tagIndex = tagHandler.endOTHER(); // XLIFF specification requires 'open/close' values, // but some tools may use 'begin/end' values like for TMX shortcutLetter = calcTagShortcutLetter(tag); if ("close".equals(tagHandler.getCurrentPos()) || "end".equals(tagHandler.getCurrentPos())) { // In some cases, even if we're able to compute a shortcut, it's better to force to "f" // for better compatibility with corresponding TMX files if (forceShortCutToF) { shortcutLetter = 'f'; } shortcut = "</" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>'; } else { shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + '>'; } tagProtected = false; } else if ("ph".equals(tag.getTag())) { tagHandler.startOTHER(); tagIndex = tagHandler.endOTHER(); shortcutLetter = calcTagShortcutLetter(tag, ignoreTypeForPhTags); shortcut = "<" + (shortcutLetter != 0 ? String.valueOf(Character.toChars(shortcutLetter)) : 'f') + tagIndex + "/>"; tagProtected = false; } else if ("mrk".equals(tag.getTag())) { tagHandler.startOTHER(); tagIndex = tagHandler.endOTHER(); shortcutLetter = 'm'; shortcut = "<m" + tagIndex + ">" + tag.getIntactContents().sourceToOriginal() + "</m" + tagIndex + ">"; tagProtected = true; } else { shortcutLetter = 'f'; tagIndex = -1; tagProtected = false; } tag.setShortcutLetter(shortcutLetter); tag.setShortcutIndex(tagIndex); tag.setShortcut(shortcut); r.append(shortcut); ProtectedPart pp = new ProtectedPart(); pp.setTextInSourceSegment(shortcut); pp.setDetailsFromSourceFile(tag.toOriginal()); if (tagProtected) { // protected text with related tags, like <m0>Acme</m0> if (StatisticsSettings.isCountingProtectedText()) { // Protected texts are counted, but related tags are not counted in the word count pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT + tag.getIntactContents().sourceToOriginal() + StaticUtils.TAG_REPLACEMENT); } else { // All protected parts are not counted in the word count(default) pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT); } pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT); pp.setReplacementMatchCalculation(tag.getIntactContents().sourceToOriginal()); } else { // simple tag, like <i0> if (StatisticsSettings.isCountingStandardTags()) { pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut()); } else { pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT); } pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT); pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT); } protectedParts.add(pp); } else if (el instanceof Tag) { Tag tag = (Tag) el; int tagIndex = tagHandler.paired(tag.getTag(), tag.getType()); tag.setIndex(tagIndex); String shortcut = tag.toShortcut(); r.append(shortcut); ProtectedPart pp = new ProtectedPart(); pp.setTextInSourceSegment(shortcut); pp.setDetailsFromSourceFile(tag.toOriginal()); if (StatisticsSettings.isCountingStandardTags()) { pp.setReplacementWordsCountCalculation(tag.toSafeCalcShortcut()); } else { pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT); } pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT); pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT); protectedParts.add(pp); } else { r.append(el.toShortcut()); } } return r.toString(); } private int calcTagShortcutLetter(XMLContentBasedTag tag) { return calcTagShortcutLetter(tag, false); } private int calcTagShortcutLetter(XMLContentBasedTag tag, boolean ignoreTypeForPhtags) { int s; if (!tag.getIntactContents().isEmpty() && (tag.getIntactContents().get(0) instanceof XMLText)) { XMLText xmlText = (XMLText) tag.getIntactContents().get(0); s = StringUtil.getFirstLetterLowercase(xmlText.getText()); } else { String type = StringUtil.nvl(tag.getAttribute("ctype"), tag.getAttribute("type")); if (type != null && !ignoreTypeForPhtags) { s = StringUtil.getFirstLetterLowercase(type); } else { s = 0; } } return s; } }