/**************************************************************************
OmegaT - Computer Assisted Translation (CAT) tool
with fuzzy matching, translation memory, keyword search,
glossaries, and translation leveraging into updated projects.
Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk
2008 Martin Fleurke
2009 Didier Briel
2010 Antonio Vilei
2011 Didier Briel
2013 Alex Buloichik
Home page: http://www.omegat.org/
Support center: http://groups.yahoo.com/group/OmegaT/
This file is part of OmegaT.
OmegaT is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
OmegaT is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
**************************************************************************/
package org.omegat.filters3.xml;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import org.omegat.core.data.ProtectedPart;
import org.omegat.core.statistics.StatisticsSettings;
import org.omegat.filters3.Attributes;
import org.omegat.filters3.Element;
import org.omegat.filters3.Tag;
import org.omegat.filters3.Text;
import org.omegat.util.MultiMap;
import org.omegat.util.StaticUtils;
import org.xml.sax.InputSource;
/**
* Helper class for describing a certain XML dialect.
*
* @author Maxym Mykhalchuk
* @author Martin Fleurke
* @author Didier Briel
* @author Alex Buloichik (alex73mail@gmail.com)
*/
public class DefaultXMLDialect implements XMLDialect {
/** The set of defined paragraph tags. */
private Set<String> paragraphTags = new HashSet<String>();
/** Defines paragraph tag. Allows duplicates. */
public void defineParagraphTag(String tag) {
paragraphTags.add(tag);
}
/** Defines a set of paragraph tags from an array. Allows duplicates. */
public void defineParagraphTags(String[] tags) {
for (String tag : tags) {
defineParagraphTag(tag);
}
}
/** The set of defined content based tags. */
private Map<String, Tag.Type> contentBasedTags = new HashMap<String, Tag.Type>();
public void defineContentBasedTag(String tag, Tag.Type type) {
contentBasedTags.put(tag, type);
}
/** The set of defined tags that surround preformatted text. */
private Set<String> preformatTags = new HashSet<String>();
/** Defines preformat tag. Allows duplicates. */
public void definePreformatTag(String tag) {
preformatTags.add(tag);
}
/** Defines a set of preformat tags from an array. Allows duplicates. */
public void definePreformatTags(String[] tags) {
for (String tag : tags) {
definePreformatTag(tag);
}
}
/** The set of defined tags that surround intact text. */
private Set<String> intactTags = new HashSet<String>();
/** Defines intact tag. Allows duplicates. */
public void defineIntactTag(String tag) {
intactTags.add(tag);
}
/** Defines a set of intact tags from an array. Allows duplicates. */
public void defineIntactTags(String[] tags) {
for (String tag : tags) {
defineIntactTag(tag);
}
}
/** The set of defined paragraph tags. */
private MultiMap<String, String> translatableTagAttributes = new MultiMap<String, String>();
/** Defines translatable attribute of a tag. */
public void defineTranslatableTagAttribute(String tag, String attribute) {
translatableTagAttributes.put(tag, attribute);
}
/** Defines translatable attributes of a tag. */
public void defineTranslatableTagAttributes(String tag, String[] attributes) {
for (String attr : attributes) {
defineTranslatableTagAttribute(tag, attr);
}
}
/** Defines translatable attribute of several tags. */
public void defineTranslatableTagsAttribute(String[] tags, String attribute) {
for (String tag : tags) {
defineTranslatableTagAttribute(tag, attribute);
}
}
/** The set of defined paragraph tags. */
private Set<String> translatableAttributes = new HashSet<String>();
/**
* Defines always translatable attribute (no matter what tag it belongs to).
*/
public void defineTranslatableAttribute(String attribute) {
translatableAttributes.add(attribute);
}
/**
* Defines always translatable attributes (no matter what tag it belongs
* to).
*/
public void defineTranslatableAttributes(String[] attributes) {
for (String attr : attributes) {
defineTranslatableAttribute(attr);
}
}
/**
* The set of defined out of turn tags that surround chunks of text that
* should be translated separately, not breaking currently collected text.
*/
private Set<String> outOfTurnTags = new HashSet<String>();
/**
* Defines out of turn tag. Such tag surrounds chunk of text that should be
* translated separately, not breaking currently collected text.
*/
public void defineOutOfTurnTag(String tag) {
outOfTurnTags.add(tag);
}
/**
* Defines out of turn tags. Such tags surround chunks of text that should
* be translated separately, not breaking currently collected text.
*/
public void defineOutOfTurnTags(String[] tags) {
for (String tag : tags) {
defineOutOfTurnTag(tag);
}
}
Map<Integer, Pattern> constraints = new HashMap<Integer, Pattern>();
/**
* Defines a constraint to restrict supported subset of XML files. There can
* be only one constraint of each type.
*
* @param constraintType
* Type of constraint, see CONSTRAINT_... constants.
* @param template
* Regular expression for a specified constrained string.
*/
public void defineConstraint(Integer constraintType, Pattern template) {
constraints.put(constraintType, template);
}
Map<String, String> shortcuts = new HashMap<String, String>();
/**
* Defines a shortcut for a tag, useful for formatting tags. Shortcut is a
* short form of a tag visible to translator, and stored in OmegaT's flavor
* of TMX files.
*
* @param tag
* Tag name.
* @param shortcut
* The shortcut for a tag.
*/
public void defineShortcut(String tag, String shortcut) {
shortcuts.put(tag, shortcut);
}
/**
* Defines shortcuts for formatting tags. An alternative to calling
* {@link #defineShortcut(String,String)} multiple times.
*
* @param mappings
* Array of strings, where even elements (0th, 2nd, etc) are
* tags, and odd elements are their corresponding shortcuts.
*/
public void defineShortcuts(String[] mappings) {
for (int i = 0; i < mappings.length / 2; i++) {
defineShortcut(mappings[2 * i], mappings[2 * i + 1]);
}
}
// /////////////////////////////////////////////////////////////////////////
// XMLDialect Interface Implementation
// /////////////////////////////////////////////////////////////////////////
/**
* Returns the set of defined paragraph tags.
* <p>
* Each entry in a set should be a String class.
*/
@Override
public Set<String> getParagraphTags() {
return paragraphTags;
}
/**
* Returns the set of content based tags.
*/
@Override
public Map<String, Tag.Type> getContentBasedTags() {
return contentBasedTags;
}
/**
* Returns the set of tags that surround preformatted text.
* <p>
* Each entry in a set should be a String class.
*/
@Override
public Set<String> getPreformatTags() {
return preformatTags;
}
/**
* Returns the set of tags that surround intact portions of document, that
* should not be translated at all.
* <p>
* Each entry in a set should be a String class.
*/
@Override
public Set<String> getIntactTags() {
return intactTags;
}
/**
* Returns the multimap of translatable attributes of each tag.
* <p>
* Each entry should map from a String to a set of Strings.
*/
@Override
public MultiMap<String, String> getTranslatableTagAttributes() {
return translatableTagAttributes;
}
/**
* Returns for a given attribute of a given tag if the attribute should be
* translated with the given other attributes present. If the tagAttribute
* is returned by getTranslatable(Tag)Attributes(), this function is called
* to further test the attribute within its context. This allows for example
* the XHTML filter to not translate the value attribute of an
* input-element, except when it is a button or submit or reset.
*/
@Override
public Boolean validateTranslatableTagAttribute(String tag, String attribute, Attributes atts) {
return true;
}
/**
* For a given tag, return wether the content of this tag should be
* translated, depending on the content of one attribute and the presence or
* absence of other attributes. For instance, in the ResX filter, tags
* should not be translated when they contain the attribute "type", or when
* the attribute "name" starts with ">";
*
* @param tag
* The tag that could be translated
* @param atts
* The list of the tag attributes
* @return <code>true</code> or <code>false</code>
*/
@Override
public Boolean validateIntactTag(String tag, Attributes atts) {
return false;
}
@Override
public Boolean validateContentBasedTag(String tag, Attributes atts) {
return false;
}
/**
* For a given tag, return wether the content of this tag should be
* translated, depending on the content of one attribute and the presence or
* absence of other attributes. For instance, in the Typo3 filter, tags
* should be translated when the attribute locazible="1". Contrary to
* validateIntactTag, this applies only to the current tag, and the tags
* contained in it are not affected.
*
* @param tag
* The tag that could be translated
* @param atts
* The list of the tag attributes
* @return <code>true</code> or <code>false</code>
*/
@Override
public Boolean validateTranslatableTag(String tag, Attributes atts) {
return true;
}
/**
* For a given tag, return wether the content of this tag is a paragraph
* tag, depending on the content of one attribute (and/or the presence or
* absence of other attributes). For instance, in the XLIFF filter, the
* <mark> tag should start a new paragraph when the attribute "mtype"
* contains "seg".
*
* @param tag
* The tag that could be a paragraph tag
* @param atts
* The list of the tag attributes
* @return <code>true</code> or <code>false</code>
*/
@Override
public Boolean validateParagraphTag(String tag, Attributes atts) {
return false;
}
/**
* For a given tag, return wether the content of this tag is a preformat
* tag, depending on the content of one attribute (and/or the presence or
* absence of other attributes). For instance, in the XLIFF filter, the
* <mark> tag should be a preformat tag when the attribute "mtype"
* contains "seg".
*
* @param tag
* The tag that could be a preformat tag
* @param atts
* The list of the tag attributes
* @return <code>true</code> or <code>false</code>
*/
@Override
public Boolean validatePreformatTag(String tag, Attributes atts) {
return false;
}
/**
* Returns the set of translatable attributes (no matter what tag they
* belong to).
* <p>
* Each entry in a set should be a String class.
*/
@Override
public Set<String> getTranslatableAttributes() {
return translatableAttributes;
}
/**
* Returns the set of "out-of-turn" tags. Such tags specify chunks of text
* that should be translated separately, not breaking currently collected
* text entry. For example, footnotes in OpenDocument.
* <p>
* Each entry in a set should be a String class.
*/
@Override
public Set<String> getOutOfTurnTags() {
return outOfTurnTags;
}
/**
* Returns defined constraints to restrict supported subset of XML files.
* There can be only one constraint of each type, see CONSTRAINT_...
* constants.
* <p>
* Each entry should map an {@link Integer} to a {@link Pattern} -- regular
* expression for a specified constrained string.
*/
@Override
public Map<Integer, Pattern> getConstraints() {
return constraints;
}
/**
* Resolves external entites if child filter needs it. Default
* implementation returns <code>null</code>.
*/
@Override
public InputSource resolveEntity(String publicId, String systemId) {
return null;
}
/**
* Returns the map of tags to their shortcuts.
* <p>
* Each entry should map a {@link String} to a {@link String} -- a tag to
* its shortcut.
*/
@Override
public Map<String, String> getShortcuts() {
return shortcuts;
}
/**
* The parameter setting wether closing tags should be used
*/
private boolean closingTagRequired = false;
/**
* Sets closingTag to <code>true</code> or <code>false</code>
*
* @param onOff
* The parameter setting wether closing tags should be used or
* not for empty tags.
*/
@Override
public void setClosingTagRequired(boolean onOff) {
closingTagRequired = onOff;
}
/**
* Gives the value of closingTag
*/
@Override
public Boolean getClosingTagRequired() {
return closingTagRequired;
}
/**
* The parameter setting whether tags aggregation can be enabled
*/
private boolean tagsAggregationEnabled = false;
/**
* {@inheritDoc}
*/
@Override
public void setTagsAggregationEnabled(boolean onOff) {
tagsAggregationEnabled = onOff;
}
/**
* {@inheritDoc}
*/
@Override
public Boolean getTagsAggregationEnabled() {
return tagsAggregationEnabled;
}
private boolean forceSpacePreserving = false;
/**
* {@inheritDoc}
*/
@Override
public Boolean getForceSpacePreserving() {
return forceSpacePreserving;
}
/**
* {@inheritDoc}
*/
@Override
public void setForceSpacePreserving(boolean onOff) {
forceSpacePreserving = onOff;
}
/**
* {@inheritDoc}
*/
@Override
public String constructShortcuts(List<Element> elements, List<ProtectedPart> protectedParts) {
protectedParts.clear();
StringBuilder r = new StringBuilder();
for (Element el : elements) {
String shortcut = el.toShortcut();
r.append(shortcut);
if (!(el instanceof Text)) {
ProtectedPart pp = new ProtectedPart();
pp.setTextInSourceSegment(shortcut);
pp.setDetailsFromSourceFile(el.toOriginal());
if (StatisticsSettings.isCountingStandardTags()) {
pp.setReplacementWordsCountCalculation(el.toSafeCalcShortcut());
} else {
pp.setReplacementWordsCountCalculation(StaticUtils.TAG_REPLACEMENT);
}
pp.setReplacementUniquenessCalculation(StaticUtils.TAG_REPLACEMENT);
pp.setReplacementMatchCalculation(StaticUtils.TAG_REPLACEMENT);
protectedParts.add(pp);
}
}
return r.toString();
}
}