DefaultXMLDialect.java example

Explorer
OmegaT-master
/**************************************************************************
 OmegaT - Computer Assisted Translation (CAT) tool
          with fuzzy matching, translation memory, keyword search,
          glossaries, and translation leveraging into updated projects.

 Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
               2008 Martin Fleurke
               2009 Didier Briel
               2010 Antonio Vilei
               2011 Didier Briel
               2013 Alex Buloichik
               Home page: http://www.omegat.org/
               Support center: http://groups.yahoo.com/group/OmegaT/

 This file is part of OmegaT.

 OmegaT is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation, either version 3 of the License, or
 (at your option) any later version.

 OmegaT is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program.  If not, see <http://www.gnu.org/licenses/>.
 **************************************************************************/

package org.omegat.filters3.xml;

import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import org.omegat.core.data.ProtectedPart;
import org.omegat.core.statistics.StatisticsSettings;
import org.omegat.filters3.Attributes;
import org.omegat.filters3.Element;
import org.omegat.filters3.Tag;
import org.omegat.filters3.Text;
import org.omegat.util.MultiMap;
import org.omegat.util.StaticUtils;
import org.xml.sax.InputSource;

/**
 * Helper class for describing a certain XML dialect.
 *
 * @author Maxym Mykhalchuk
 * @author Martin Fleurke
 * @author Didier Briel
 * @author Alex Buloichik (alex73mail@gmail.com)
 */
public class DefaultXMLDialect implements XMLDialect {
    /** The set of defined paragraph tags. */
    private Set<String> paragraphTags = new HashSet<String>();

    /** Defines paragraph tag. Allows duplicates. */
    public void defineParagraphTag(String tag) {
        paragraphTags.add(tag);
    }

    /** Defines a set of paragraph tags from an array. Allows duplicates. */
    public void defineParagraphTags(String[] tags) {
        for (String tag : tags) {
            defineParagraphTag(tag);
        }
    }

    /** The set of defined content based tags. */
    private Map<String, Tag.Type> contentBasedTags = new HashMap<String, Tag.Type>();

    public void defineContentBasedTag(String tag, Tag.Type type) {
        contentBasedTags.put(tag, type);
    }

    /** The set of defined tags that surround preformatted text. */
    private Set<String> preformatTags = new HashSet<String>();

    /** Defines preformat tag. Allows duplicates. */
    public void definePreformatTag(String tag) {
        preformatTags.add(tag);
    }

    /** Defines a set of preformat tags from an array. Allows duplicates. */
    public void definePreformatTags(String[] tags) {
        for (String tag : tags) {
            definePreformatTag(tag);
        }
    }

    /** The set of defined tags that surround intact text. */
    private Set<String> intactTags = new HashSet<String>();

    /** Defines intact tag. Allows duplicates. */
    public void defineIntactTag(String tag) {
        intactTags.add(tag);
    }

    /** Defines a set of intact tags from an array. Allows duplicates. */
    public void defineIntactTags(String[] tags) {
        for (String tag : tags) {
            defineIntactTag(tag);
        }
    }

    /** The set of defined paragraph tags. */
    private MultiMap<String, String> translatableTagAttributes = new MultiMap<String, String>();

    /** Defines translatable attribute of a tag. */
    public void defineTranslatableTagAttribute(String tag, String attribute) {
        translatableTagAttributes.put(tag, attribute);
    }

    /** Defines translatable attributes of a tag. */
    public void defineTranslatableTagAttributes(String tag, String[] attributes) {
        for (String attr : attributes) {
            defineTranslatableTagAttribute(tag, attr);
        }
    }

    /** Defines translatable attribute of several tags. */
    public void defineTranslatableTagsAttribute(String[] tags, String attribute) {
        for (String tag : tags) {
            defineTranslatableTagAttribute(tag, attribute);
        }
    }

    /** The set of defined paragraph tags. */
    private Set<String> translatableAttributes = new HashSet<String>();

    /**
     * Defines always translatable attribute (no matter what tag it belongs to).
     */
    public void defineTranslatableAttribute(String attribute) {
        translatableAttributes.add(attribute);
    }

    /**
     * Defines always translatable attributes (no matter what tag it belongs
     * to).
     */
    public void defineTranslatableAttributes(String[] attributes) {
        for (String attr : attributes) {
            defineTranslatableAttribute(attr);
        }
    }

    /**
     * The set of defined out of turn tags that surround chunks of text that
     * should be translated separately, not breaking currently collected text.
     */
    private Set<String> outOfTurnTags = new HashSet<String>();

    /**
     * Defines out of turn tag. Such tag surrounds chunk of text that should be
     * translated separately, not breaking currently collected text.
     */
    public void defineOutOfTurnTag(String tag) {
        outOfTurnTags.add(tag);
    }

    /**
     * Defines out of turn tags. Such tags surround chunks of text that should
     * be translated separately, not breaking currently collected text.
     */
    public void defineOutOfTurnTags(String[] tags) {
        for (String tag : tags) {
            defineOutOfTurnTag(tag);
        }
    }

    Map<Integer, Pattern> constraints = new HashMap<Integer, Pattern>();

    /**
     * Defines a constraint to restrict supported subset of XML files. There can
     * be only one constraint of each type.
     *
     * @param constraintType
     *            Type of constraint, see CONSTRAINT_... constants.
     * @param template
     *            Regular expression for a specified constrained string.
     */
    public void defineConstraint(Integer constraintType, Pattern template) {
        constraints.put(constraintType, template);
    }

    Map<String, String> shortcuts = new HashMap<String, String>();

    /**
     * Defines a shortcut for a tag, useful for formatting tags. Shortcut is a
     * short form of a tag visible to translator, and stored in OmegaT's flavor
     * of TMX files.
     *
     * @param tag
     *            Tag name.
     * @param shortcut
     *            The shortcut for a tag.
     */
    public void defineShortcut(String tag, String shortcut) {
        shortcuts.put(tag, shortcut);
    }

    /**
     * Defines shortcuts for formatting tags. An alternative to calling
     * {@link #defineShortcut(String,String)} multiple times.
     *
     * @param mappings
     *            Array of strings, where even elements (0th, 2nd, etc) are
     *            tags, and odd elements are their corresponding shortcuts.
     */
    public void defineShortcuts(String[] mappings) {
        for (int i = 0; i < mappings.length / 2; i++) {
            defineShortcut(mappings[2 * i], mappings[2 * i + 1]);
        }
    }

    // /////////////////////////////////////////////////////////////////////////
    // XMLDialect Interface Implementation
    // /////////////////////////////////////////////////////////////////////////

    /**
     * Returns the set of defined paragraph tags.
     * <p>
     * Each entry in a set should be a String class.
     */
    @Override
    public Set<String> getParagraphTags() {
        return paragraphTags;
    }

    /**
     * Returns the set of content based tags.
     */
    @Override
    public Map<String, Tag.Type> getContentBasedTags() {
        return contentBasedTags;
    }

    /**
     * Returns the set of tags that surround preformatted text.
     * <p>
     * Each entry in a set should be a String class.
     */
    @Override
    public Set<String> getPreformatTags() {
        return preformatTags;
    }

    /**
     * Returns the set of tags that surround intact portions of document, that
     * should not be translated at all.
     * <p>
     * Each entry in a set should be a String class.
     */
    @Override
    public Set<String> getIntactTags() {
        return intactTags;
    }

    /**
     * Returns the multimap of translatable attributes of each tag.
     * <p>
     * Each entry should map from a String to a set of Strings.
     */
    @Override
    public MultiMap<String, String> getTranslatableTagAttributes() {
        return translatableTagAttributes;
    }

    /**
     * Returns for a given attribute of a given tag if the attribute should be
     * translated with the given other attributes present. If the tagAttribute
     * is returned by getTranslatable(Tag)Attributes(), this function is called
     * to further test the attribute within its context. This allows for example
     * the XHTML filter to not translate the value attribute of an
     * input-element, except when it is a button or submit or reset.
     */
    @Override
    public Boolean validateTranslatableTagAttribute(String tag, String attribute, Attributes atts) {
        return true;
    }

    /**
     * For a given tag, return wether the content of this tag should be
     * translated, depending on the content of one attribute and the presence or
     * absence of other attributes. For instance, in the ResX filter, tags
     * should not be translated when they contain the attribute "type", or when
     * the attribute "name" starts with "&gt";
     *
     * @param tag
     *            The tag that could be translated
     * @param atts
     *            The list of the tag attributes
     * @return <code>true</code> or <code>false</code>
     */
    @Override
    public Boolean validateIntactTag(String tag, Attributes atts) {
        return false;
    }

    @Override
    public Boolean validateContentBasedTag(String tag, Attributes atts) {
        return false;
    }

    /**
     * For a given tag, return wether the content of this tag should be
     * translated, depending on the content of one attribute and the presence or
     * absence of other attributes. For instance, in the Typo3 filter, tags
     * should be translated when the attribute locazible="1". Contrary to
     * validateIntactTag, this applies only to the current tag, and the tags
     * contained in it are not affected.
     *
     * @param tag
     *            The tag that could be translated
     * @param atts
     *            The list of the tag attributes
     * @return <code>true</code> or <code>false</code>
     */
    @Override
    public Boolean validateTranslatableTag(String tag, Attributes atts) {
        return true;
    }

    /**
     * For a given tag, return wether the content of this tag is a paragraph
     * tag, depending on the content of one attribute (and/or the presence or
     * absence of other attributes). For instance, in the XLIFF filter, the
     * <mark> tag should start a new paragraph when the attribute "mtype"
     * contains "seg".
     *
     * @param tag
     *            The tag that could be a paragraph tag
     * @param atts
     *            The list of the tag attributes
     * @return <code>true</code> or <code>false</code>
     */
    @Override
    public Boolean validateParagraphTag(String tag, Attributes atts) {
        return false;
    }

    /**
     * For a given tag, return wether the content of this tag is a preformat
     * tag, depending on the content of one attribute (and/or the presence or
     * absence of other attributes). For instance, in the XLIFF filter, the
     * <mark> tag should be a preformat tag when the attribute "mtype"
     * contains "seg".
     *
     * @param tag
     *            The tag that could be a preformat tag
     * @param atts
     *            The list of the tag attributes
     * @return <code>true</code> or <code>false</code>
     */
    @Override
    public Boolean validatePreformatTag(String tag, Attributes atts) {
        return false;
    }

    /**
     * Returns the set of translatable attributes (no matter what tag they
     * belong to).
     * <p>
     * Each entry in a set should be a String class.
     */
    @Override
    public Set<String> getTranslatableAttributes() {
        return translatableAttributes;
    }

    /**
     * Returns the set of "out-of-turn" tags. Such tags specify chunks of text
     * that should be translated separately, not breaking currently collected
     * text entry. For example, footnotes in OpenDocument.
     * <p>
     * Each entry in a set should be a String class.
     */
    @Override
    public Set<String> getOutOfTurnTags() {
        return outOfTurnTags;
    }

    /**
     * Returns defined constraints to restrict supported subset of XML files.
     * There can be only one constraint of each type, see CONSTRAINT_...
     * constants.
     * <p>
     * Each entry should map an {@link Integer} to a {@link Pattern} -- regular
     * expression for a specified constrained string.
     */
    @Override
    public Map<Integer, Pattern> getConstraints() {
        return constraints;
    }

    /**
     * Resolves external entites if child filter needs it. Default
     * implementation returns <code>null</code>.
     */
    @Override
    public InputSource resolveEntity(String publicId, String systemId) {
        return null;
    }

    /**
     * Returns the map of tags to their shortcuts.
     * <p>
     * Each entry should map a {@link String} to a {@link String} -- a tag to
     * its shortcut.
     */
    @Override
    public Map<String, String> getShortcuts() {
        return shortcuts;
    }

    /**
     * The parameter setting wether closing tags should be used
     */
    private boolean closingTagRequired = false;

    /**
     * Sets closingTag to <code>true</code> or <code>false</code>
     *
     * @param onOff
     *            The parameter setting wether closing tags should be used or
     *            not for empty tags.
     */
    @Override
    public void setClosingTagRequired(boolean onOff) {
        closingTagRequired = onOff;
    }

    /**
     * Gives the value of closingTag
     */
    @Override
    public Boolean getClosingTagRequired() {
        return closingTagRequired;
    }

    /**
     * The parameter setting whether tags aggregation can be enabled
     */
    private boolean tagsAggregationEnabled = false;

    /**
     * {@inheritDoc}
     */
    @Override
    public void setTagsAggregationEnabled(boolean onOff) {
        tagsAggregationEnabled = onOff;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public Boolean getTagsAggregationEnabled() {
        return tagsAggregationEnabled;
    }
    private boolean forceSpacePreserving = false;

    /**
     * {@inheritDoc}
     */
    @Override
    public Boolean getForceSpacePreserving() {
        return forceSpacePreserving;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public void setForceSpacePreserving(boolean onOff) {
        forceSpacePreserving = onOff;
    }

    /**
     * {@inheritDoc}
     */
    @Override
    public String constructShortcuts(List<Element> elements, List<ProtectedPart> protectedParts) {
        protectedParts.clear();
        StringBuilder r = new StringBuilder();
        for (Element el : elements) {
            String shortcut = el.toShortcut();
            r.append(shortcut);
            if (!(el instanceof Text)) {
                ProtectedPart pp = new ProtectedPart();
                pp.setTextInSourceSegment(shortcut);
                pp.setDetailsFromSourceFile(el.toOriginal());
                if (StatisticsSettings.isCountingStandardTags()) {
                    pp.setReplacementWordsCountCalculation(el.toSafeCalcShortcut());
                } else {
                    pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
                }
                pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
                pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
                protectedParts.add(pp);
            }
        }
        return r.toString();
    }
}