/************************************************************************** OmegaT - Computer Assisted Translation (CAT) tool with fuzzy matching, translation memory, keyword search, glossaries, and translation leveraging into updated projects. Copyright (C) 2000-2006 Keith Godfrey and Maxym Mykhalchuk 2007-2008 Didier Briel, Alex Buloichik, Martin Fleurke 2012 Didier Briel Home page: http://www.omegat.org/ Support center: http://groups.yahoo.com/group/OmegaT/ This file is part of OmegaT. OmegaT is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. OmegaT is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see <http://www.gnu.org/licenses/>. **************************************************************************/ package org.omegat.filters3.xml.xhtml; import java.net.URL; import java.util.HashMap; import java.util.regex.Pattern; import java.util.regex.PatternSyntaxException; import org.omegat.filters3.Attribute; import org.omegat.filters3.Attributes; import org.omegat.filters3.xml.DefaultXMLDialect; import org.omegat.util.Log; import org.omegat.util.StringUtil; import org.xml.sax.InputSource; /** * This class specifies XHTML dialect of XML. * * @author Maxym Mykhalchuk * @author Didier Briel * @author Alex Buloichik * @author Martin Fleurke */ public class XHTMLDialect extends DefaultXMLDialect { private static final Pattern XHTML_PUBLIC_DTD = Pattern.compile("-//W3C//DTD XHTML.*"); public XHTMLDialect() { defineConstraint(CONSTRAINT_PUBLIC_DOCTYPE, XHTML_PUBLIC_DTD); } private static final Pattern PUBLIC_XHTML = Pattern.compile("-//W3C//DTD\\s+XHTML.+"); private static final String DTD = "/org/omegat/filters3/xml/xhtml/res/xhtml2-flat.dtd"; private Boolean translateValue = false; private Boolean translateButtonValue = false; /** * A regular Expression Pattern to be matched to the strings to be * translated. If there is a match, the string should not be translated */ private Pattern skipRegExpPattern; /** * A map of attribute-name and attribute value pairs that, if exist in a * meta-tag, indicate that the meta-tag should not be translated */ private HashMap<String, String> skipMetaAttributes; /** * A map of attribute-name and attribute value pairs that, if exist in a * tag, indicate that this tag should not be translated */ private HashMap<String, String> ignoreTagsAttributes; /** * Resolves external entites if child filter needs it. Default * implementation returns <code>null</code>. */ @Override public InputSource resolveEntity(String publicId, String systemId) { if (publicId != null && PUBLIC_XHTML.matcher(publicId).matches() && systemId.endsWith(".dtd")) { URL dtdresource = XHTMLDialect.class.getResource(DTD); return new InputSource(dtdresource.toExternalForm()); } else return null; } /** * Actually defines the dialect. It cannot be done during creation, because * options are not known at that step. */ public void defineDialect(XHTMLOptions options) { defineParagraphTags(new String[] { "html", "head", "title", "body", "address", "blockquote", "center", "div", "h1", "h2", "h3", "h4", "h5", "table", "th", "tr", "td", "p", "ol", "ul", "li", "dl", "dt", "dd", "form", "textarea", "fieldset", "legend", "label", "select", "option", "hr" }); // Optional paragraph on BR if (options.getParagraphOnBr()) defineParagraphTag("br"); defineShortcut("br", "br"); definePreformatTags(new String[] { "textarea", "pre", }); defineIntactTags(new String[] { "style", "script", "object", "embed", }); defineTranslatableAttributes(new String[] { "abbr", "alt", "content", "summary", "title", "placeholder"}); if (options.getTranslateHref()) defineTranslatableAttribute("href"); if (options.getTranslateSrc()) defineTranslatableTagAttribute("img", "src"); if (options.getTranslateLang()) defineTranslatableAttributes(new String[] { "lang", "xml:lang", }); if (options.getTranslateHreflang()) defineTranslatableAttribute("hreflang"); if ((this.translateValue = options.getTranslateValue()) || (this.translateButtonValue = options.getTranslateButtonValue())) defineTranslatableTagAttribute("input", "value"); // Prepare matcher String skipRegExp = options.getSkipRegExp(); if (!StringUtil.isEmpty(skipRegExp)) { try { this.skipRegExpPattern = Pattern.compile(skipRegExp, Pattern.CASE_INSENSITIVE); } catch (PatternSyntaxException e) { Log.log(e); } } // Prepare set of attributes that indicate not to translate a meta-tag String skipMetaString = options.getSkipMeta(); skipMetaAttributes = new HashMap<String, String>(); String[] skipMetaAttributesStringarray = skipMetaString.split(","); for (int i = 0; i < skipMetaAttributesStringarray.length; i++) { String keyvalue = skipMetaAttributesStringarray[i].trim().toUpperCase(); skipMetaAttributes.put(keyvalue, ""); } // Prepare set of attributes that indicate that a tag should be intact String ignoreTagsString = options.getIgnoreTags(); ignoreTagsAttributes = new HashMap<String, String>(); String[] ignoreTagsAttributesStringarray = ignoreTagsString.split(","); for (int i = 0; i < ignoreTagsAttributesStringarray.length; i++) { String keyvalue = ignoreTagsAttributesStringarray[i].trim().toUpperCase(); ignoreTagsAttributes.put(keyvalue, ""); } } /** * Returns for a given attribute of a given tag if the attribute should be * translated with the given other attributes present. If the tagAttribute * is returned by getTranslatable(Tag)Attributes(), this function is called * to further test the attribute within its context. This allows for example * the XHTML filter to not translate the value attribute of an * input-element, except when it is a button or submit or reset. */ @Override public Boolean validateTranslatableTagAttribute(String tag, String attribute, Attributes atts) { // special case: if ("INPUT".equalsIgnoreCase(tag) && attribute.equalsIgnoreCase("value")) { // special handling of input tags value attribute. if (this.translateValue) return true; else if (this.translateButtonValue) { // translate the value only for buttons for (int i = 0; i < atts.size(); i++) { Attribute otherAttribute = atts.get(i); if ("type".equalsIgnoreCase(otherAttribute.getName()) && ("button".equalsIgnoreCase(otherAttribute.getValue()) || "submit".equalsIgnoreCase(otherAttribute.getValue()) || "reset" .equalsIgnoreCase(otherAttribute.getValue()))) { return super.validateTranslatableTagAttribute(tag, attribute, atts); } } // don't translate for other input elements return false; } else // should not be possible, because // validateTranslatableTagAttribute // is only called when input.value is in // translatable(Tag)Attributes. return super.validateTranslatableTagAttribute(tag, attribute, atts); } else if ("META".equalsIgnoreCase(tag) && "content".equalsIgnoreCase(attribute)) { // Special handling of meta-tag: depending on the other attributes // the content attribute should or should not be translated. // The group of attribute-value pairs indicating non-translation // are stored in the configuration boolean doSkipMetaTag = false; for (int i = 0; i < atts.size(); i++) { Attribute otherAttribute = atts.get(i); String name = otherAttribute.getName(); String value = otherAttribute.getValue(); if (name == null || value == null) continue; doSkipMetaTag = checkDoSkipMetaTag(name, value); if (doSkipMetaTag) break; } if (doSkipMetaTag) { return false; } else { return super.validateTranslatableTagAttribute(tag, attribute, atts); } } else { // default: return super.validateTranslatableTagAttribute(tag, attribute, atts); } } public Pattern getSkipRegExpPattern() { return skipRegExpPattern; } public HashMap<String, String> getSkipMetaAttributes() { return skipMetaAttributes; } public boolean checkDoSkipMetaTag(String key, String value) { return skipMetaAttributes.containsKey(key.toUpperCase() + "=" + value.toUpperCase()); } private boolean checkIgnoreTags(String key, String value) { return ignoreTagsAttributes.containsKey(key.toUpperCase() + "=" + value.toUpperCase()); } /** * In the XHTML filter, content should be translated in the * following condition: The pair attribute-value should not have been * declared as untranslatable in the options * * @param tag * An XML tag * @param atts * The attributes associated with the tag *@return <code>false</code> if the content of this tag should be * translated, <code>true</code> otherwise */ @Override public Boolean validateIntactTag(String tag, Attributes atts) { if (atts != null) { for (int i = 0; i < atts.size(); i++) { Attribute oneAttribute = atts.get(i); if (checkIgnoreTags(oneAttribute.getName(), oneAttribute.getValue())) { return true; } } } // If no key=value pair is found, the tag can be translated return false; } }