/* * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.template.soy.internal.i18n; import com.google.common.base.Preconditions; import com.google.common.html.HtmlEscapers; import com.google.template.soy.data.Dir; import com.ibm.icu.util.ULocale; import javax.annotation.Nullable; /** * Utility class for formatting text for display in a potentially opposite-directionality context * without garbling. The directionality of the context is set at formatter creation and the * directionality of the text can be either estimated or passed in when known. Provides the * following functionality: * * <p>1. Bidi Wrapping When text in one language is mixed into a document in another, * opposite-directionality language, e.g. when an English business name is embedded in a Hebrew web * page, both the inserted string and the text surrounding it may be displayed incorrectly unless * the inserted string is explicitly separated from the surrounding text in a "wrapper" that: * * <p>- Declares its directionality so that the string is displayed correctly. This can be done in * HTML markup (e.g. a 'span dir="rtl"' element) by {@link #spanWrap} and similar methods, or - only * in contexts where markup can't be used - in Unicode bidi formatting codes by {@link #unicodeWrap} * and similar methods. Optionally, the markup can be inserted even when the directionality is the * same, in order to keep the DOM structure more stable. * * <p>- Isolates the string's directionality, so it does not unduly affect the surrounding content. * Currently, this can only be done using invisible Unicode characters of the same direction as the * context (LRM or RLM) in addition to the directionality declaration above, thus "resetting" the * directionality to that of the context. The "reset" may need to be done at both ends of the * string. Without "reset" after the string, the string will "stick" to a number or logically * separate opposite-direction text that happens to follow it in-line (even if separated by neutral * content like spaces and punctuation). Without "reset" before the string, the same can happen * there, but only with more opposite-direction text, not a number. One approach is to "reset" the * direction only after each string, on the theory that if the preceding opposite- direction text is * itself bidi-wrapped, the "reset" after it will prevent the sticking. (Doing the "reset" only * before each string definitely does not work because we do not want to require bidi-wrapping * numbers, and a bidi-wrapped opposite-direction string could be followed by a number.) Still, the * safest policy is to do the "reset" on both ends of each string, since RTL message translations * often contain untranslated Latin-script brand names and technical terms, and one of these can be * followed by a bidi-wrapped inserted value. On the other hand, when one has such a message, it is * best to do the "reset" manually in the message translation itself, since the message's * opposite-direction text could be followed by an inserted number, which we would not bidi-wrap * anyway. Thus, "reset" only after the string is the current default. In an alternative to "reset", * recent additions to the HTML, CSS, and Unicode standards allow the isolation to be part of the * directionality declaration. This form of isolation is better than "reset" because it takes less * space, does not require knowing the context directionality, has a gentler effect than "reset", * and protects both ends of the string. However, we do not yet allow using it because required * platforms do not yet support it. * * <p>Providing these wrapping services is the basic purpose of the bidi formatter. * * <p>2. Directionality estimation How does one know whether a string about to be inserted into * surrounding text has the same directionality? Well, in many cases, one knows that this must be * the case when writing the code doing the insertion, e.g. when a localized message is inserted * into a localized page. In such cases there is no need to involve the bidi formatter at all. In * some other cases, it need not be the same as the context, but is either constant (e.g. urls are * always LTR) or otherwise known. In the remaining cases, e.g. when the string is user-entered or * comes from a database, the language of the string (and thus its directionality) is not known a * priori, and must be estimated at run-time. The bidi formatter can do this automatically. * * <p>3. Escaping When wrapping plain text - i.e. text that is not already HTML or HTML-escaped - in * HTML markup, the text must first be HTML-escaped to prevent XSS attacks and other nasty business. * This of course is always true, but the escaping can not be done after the string has already been * wrapped in markup, so the bidi formatter also serves as a last chance and includes escaping * services. * * <p>Thus, in a single call, the formatter can escape the input string as specified, determine its * directionality, and wrap it as necessary. It is then up to the caller to insert the return value * in the output. */ public class BidiFormatter { /** A class for building a BidiFormatter with non-default options. */ public static final class Builder { private Dir contextDir; private int flags; /** * Constructor * * @param contextDir The context directionality. Must not be NEUTRAL. It can be (Dir) null to * indicate that the context is unknown, but this is not recommended: the wrapping methods * then wrap text of either directionality, and cannot "reset" the directionality back to * the context. */ public Builder(@Nullable Dir contextDir) { Preconditions.checkArgument(contextDir != Dir.NEUTRAL); initialize(contextDir); } /** * Constructor * * @param rtlContext Whether the context directionality is RTL */ public Builder(boolean rtlContext) { initialize(rtlContext ? Dir.RTL : Dir.LTR); } /** * Constructor * * @param locale The context locale */ public Builder(ULocale locale) { initialize(BidiUtils.languageDir(locale)); } /** * Initializes the builder with the given context directionality and default options. * * @param contextDir The context directionality. */ private void initialize(@Nullable Dir contextDir) { this.contextDir = contextDir; this.flags = DEFAULT_FLAGS; } /** * Specifies whether the {@link #spanWrap} and {@link #spanWrapWithKnownDir} methods of the * BidiFormatter to be built should produce a stable span structure, i.e. wrap the string in a * span even when its directionality does not need to be declared. The default is false. */ public Builder alwaysSpan(boolean alwaysSpan) { if (alwaysSpan) { flags |= FLAG_ALWAYS_SPAN; } else { flags &= ~FLAG_ALWAYS_SPAN; } return this; } /** * Specifies whether the BidiFormatter to be built should also "reset" directionality before a * string being bidi-wrapped, not just after it. The default is false. */ public Builder stereoReset(boolean stereoReset) { if (stereoReset) { flags |= FLAG_STEREO_RESET; } else { flags &= ~FLAG_STEREO_RESET; } return this; } /** @return A BidiFormatter with the specified options. */ public BidiFormatter build() { if (flags == DEFAULT_FLAGS) { if (contextDir == Dir.LTR) { return DEFAULT_LTR_INSTANCE; } if (contextDir == Dir.RTL) { return DEFAULT_RTL_INSTANCE; } } return new BidiFormatter(contextDir, flags); } } private static final int FLAG_ALWAYS_SPAN = 1; private static final int FLAG_STEREO_RESET = 2; // We will soon also need the following: // private static final int FLAG_UNICODE_ISOLATES_SUPPORTED // private static final int FLAG_HTML_ISOLATES_SUPPORTED private static final int DEFAULT_FLAGS = 0; private static final BidiFormatter DEFAULT_LTR_INSTANCE = new BidiFormatter(Dir.LTR, DEFAULT_FLAGS); private static final BidiFormatter DEFAULT_RTL_INSTANCE = new BidiFormatter(Dir.RTL, DEFAULT_FLAGS); private final Dir contextDir; private final int flags; /** * Factory for creating an instance of BidiFormatter given the context directionality. The default * behavior of {@link #spanWrap} and its variations is set to avoid span wrapping unless there's a * reason ('dir' attribute should be appended). * * @param contextDir The context directionality. Must not be NEUTRAL. It can be (Dir) null to * indicate that the context is unknown, but this is not recommended: the wrapping methods * then wrap text of either directionality, and cannot "reset" the directionality back to the * context. */ public static BidiFormatter getInstance(@Nullable Dir contextDir) { return new Builder(contextDir).build(); } /** * Factory for creating an instance of BidiFormatter given the context directionality. The default * behavior of {@link #spanWrap} and its variations is set to avoid span wrapping unless there's a * reason ('dir' attribute should be appended). * * @param rtlContext Whether the context directionality is RTL */ public static BidiFormatter getInstance(boolean rtlContext) { return new Builder(rtlContext).build(); } /** * Factory for creating an instance of BidiFormatter for an unknown directionality context. This * is NOT RECOMMENDED: the wrapping methods then wrap text of either directionality, and cannot * "reset" the directionality back to the context. The default behavior of {@link #spanWrap} and * its variations is set to avoid span wrapping when it can (which is only for neutral content). */ public static BidiFormatter getInstanceWithNoContext() { return new Builder((Dir) null).build(); } /** * @param contextDir The context directionality * @param flags The option flags */ private BidiFormatter(@Nullable Dir contextDir, int flags) { this.contextDir = contextDir; this.flags = flags; } /** @return The context directionality */ @Nullable public Dir getContextDir() { return contextDir; } /** @return Whether the context directionality is RTL */ public boolean isRtlContext() { return contextDir == Dir.RTL; } /** * @return Whether the {@link #spanWrap} and {@link #spanWrapWithKnownDir} methods should produce * a stable span structure, i.e. wrap the string in a span even when its directionality does * not need to be declared. */ public boolean getAlwaysSpan() { return (flags & FLAG_ALWAYS_SPAN) != 0; } /** * @return Whether directionality "reset" should also be done before a string being bidi-wrapped, * not just after it. */ public boolean getStereoReset() { return (flags & FLAG_STEREO_RESET) != 0; } /** * Returns "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" if it is LTR. In case * it's NEUTRAL, returns "rtl" if the context directionality is RTL, and "ltr" otherwise. Needed * for GXP, which can't handle dirAttr. * * <p>Example use case: <td expr:dir='bidiFormatter.dirAttrValue(foo)'><gxp:eval expr='foo'></td> * * @param str String whose directionality is to be estimated * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return "rtl" if {@code str}'s estimated directionality is RTL, and "ltr" otherwise. */ public String dirAttrValue(String str, boolean isHtml) { return knownDirAttrValue(estimateDirection(str, isHtml)); } /** * Returns "rtl" if the given directionality is RTL, and "ltr" if it is LTR. In case the given * directionality is NEUTRAL, returns "rtl" if the context directionality is RTL, and "ltr" * otherwise. * * @param dir Given directionality. Must not be null. * @return "rtl" if the given directionality is RTL, and "ltr" otherwise. */ public String knownDirAttrValue(Dir dir) { Preconditions.checkNotNull(dir); if (dir == Dir.NEUTRAL) { dir = contextDir; } return dir == Dir.RTL ? "rtl" : "ltr"; } /** * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on {@code str}'s estimated directionality, if * it is not the same as the context directionality. Otherwise, returns the empty string. * * @param str String whose directionality is to be estimated * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR * context; else, the empty string. */ public String dirAttr(String str, boolean isHtml) { return knownDirAttr(estimateDirection(str, isHtml)); } /** * Operates like {@link #dirAttr(String, boolean)}, but assumes {@code isHtml} is false. * * @param str String whose directionality is to be estimated * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR * context; else, the empty string. */ public String dirAttr(String str) { return dirAttr(str, false); } /** * Returns "dir=\"ltr\"" or "dir=\"rtl\"", depending on the given directionality, if it is not * NEUTRAL or the same as the context directionality. Otherwise, returns "". * * @param dir Given directionality. Must not be null. * @return "dir=\"rtl\"" for RTL text in non-RTL context; "dir=\"ltr\"" for LTR text in non-LTR * context; else, the empty string. */ public String knownDirAttr(Dir dir) { Preconditions.checkNotNull(dir); if (dir != contextDir) { return dir == Dir.LTR ? "dir=\"ltr\"" : dir == Dir.RTL ? "dir=\"rtl\"" : ""; } return ""; } /** * Formats a given string of unknown directionality for use in HTML output of the context * directionality, so an opposite-directionality string is neither garbled nor garbles its * surroundings. * * <p>The algorithm: estimates the directionality of the given string. In case its directionality * doesn't match the context directionality, wraps it with a 'span' element and adds a "dir" * attribute (either 'dir=\"rtl\"' or 'dir=\"ltr\"'). * * <p>If the formatter was built using {@link #alwaysSpan(true)}, the input is always wrapped in a * span, skipping just the dir attribute when it's not needed. * * <p>If {@code isolate}, directionally isolates the string so that it does not garble its * surroundings. Currently, this is done by "resetting" the directionality after the string by * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when * either the overall directionality or the exit directionality of the string is opposite to that * of the context. If the formatter was built using {@link #stereoReset(true)}, also prepends a * Unicode bidi mark matching the context directionality when either the overall directionality or * the entry directionality of the string is opposite to that of the context. * * <p>If !{@code isHtml}, HTML-escapes the string regardless of wrapping. * * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @param isolate Whether to directionally isolate the string to prevent it from garbling the * content around it * @return Input string after applying the above processing. */ public String spanWrap(String str, boolean isHtml, boolean isolate) { return spanWrapWithKnownDir(null, str, isHtml, isolate); } /** * Operates like {@link #spanWrap(String, boolean, boolean)}, but assumes {@code isolate} is true. * * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return Input string after applying the above processing. */ public String spanWrap(String str, boolean isHtml) { return spanWrap(str, isHtml, true); } /** * Operates like {@link #spanWrap(String, boolean, boolean)}, but assumes {@code isHtml} is false * and {@code isolate} is true. * * @param str The input string * @return Input string after applying the above processing. */ public String spanWrap(String str) { return spanWrap(str, false, true); } /** * Formats a string of given directionality for use in HTML output of the context directionality, * so an opposite-directionality string is neither garbled nor garbles its surroundings. * * <p>The algorithm: In case the given directionality doesn't match the context directionality, * wraps the string with a 'span' element and adds a 'dir' attribute (either 'dir=\"rtl\"' or * 'dir=\"ltr\"'). * * <p>If the formatter was built using {@link #alwaysSpan(true)}, the input is always wrapped in a * span, skipping just the dir attribute when it's not needed. * * <p>If {@code isolate}, directionally isolates the string so that it does not garble its * surroundings. Currently, this is done by "resetting" the directionality after the string by * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when * either the overall directionality or the exit directionality of the string is opposite to that * of the context. If the formatter was built using {@link #stereoReset(true)}, also prepends a * Unicode bidi mark matching the context directionality when either the overall directionality or * the entry directionality of the string is opposite to that of the context. Note that as opposed * to the overall directionality, the entry and exit directionalities are determined from the * string itself. * * <p>If !{@code isHtml}, HTML-escapes the string regardless of wrapping. * * @param dir {@code str}'s directionality. If null, i.e. unknown, it is estimated. * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @param isolate Whether to directionally isolate the string to prevent it from garbling the * content around it * @return Input string after applying the above processing. */ public String spanWrapWithKnownDir( @Nullable Dir dir, String str, boolean isHtml, boolean isolate) { if (dir == null) { dir = estimateDirection(str, isHtml); } String origStr = str; if (!isHtml) { str = HtmlEscapers.htmlEscaper().escape(str); } StringBuilder result = new StringBuilder(); if (getStereoReset() && isolate) { result.append(markBeforeKnownDir(dir, origStr, isHtml)); } boolean dirCondition = (dir != Dir.NEUTRAL && dir != contextDir); if (getAlwaysSpan() || dirCondition) { result.append("<span"); if (dirCondition) { result.append(' ').append(dir == Dir.RTL ? "dir=\"rtl\"" : "dir=\"ltr\""); } result.append('>').append(str).append("</span>"); } else { result.append(str); } if (isolate) { result.append(markAfterKnownDir(dir, origStr, isHtml)); } return result.toString(); } /** * Operates like {@link #spanWrapWithKnownDir(Dir, String, boolean, boolean)}, but assumes {@code * isolate} is true. * * @param dir {@code str}'s directionality * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return Input string after applying the above processing. */ public String spanWrapWithKnownDir(@Nullable Dir dir, String str, boolean isHtml) { return spanWrapWithKnownDir(dir, str, isHtml, true); } /** * Operates like {@link #spanWrapWithKnownDir(Dir, String, boolean, boolean)}, but assumes {@code * isHtml} is false and {@code isolate} is true. * * @param dir {@code str}'s directionality * @param str The input string * @return Input string after applying the above processing. */ public String spanWrapWithKnownDir(@Nullable Dir dir, String str) { return spanWrapWithKnownDir(dir, str, false, true); } /** * Formats a given string of unknown directionality for use in plain-text output of the context * directionality, so an opposite-directionality string is neither garbled nor garbles its * surroundings. As opposed to {@link #spanWrap}, this makes use of Unicode bidi formatting * characters. In HTML, its *only* valid use is inside elements within which markup is not * allowed, e.g. the 'option' and 'title' elements. * * <p>The algorithm: estimates the directionality of the given string. In case it doesn't match * the context directionality, wraps it with Unicode bidi formatting characters: RLE+{@code * str}+PDF for RTL text, or LRE+{@code str}+PDF for LTR text. * * <p>If {@code isolate}, directionally isolates the string so that it does not garble its * surroundings. Currently, this is done by "resetting" the directionality after the string by * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when * either the overall directionality or the exit directionality of the string is opposite to that * of the context. If the formatter was built using {@link #stereoReset(true)}, also prepends a * Unicode bidi mark matching the context directionality when either the overall directionality or * the entry directionality of the string is opposite to that of the context. * * <p>Does *not* do HTML-escaping regardless of the value of {@code isHtml}. * * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @param isolate Whether to directionally isolate the string to prevent it from garbling the * content around it * @return Input string after applying the above processing. */ public String unicodeWrap(String str, boolean isHtml, boolean isolate) { return unicodeWrapWithKnownDir(null, str, isHtml, isolate); } /** * Operates like {@link #unicodeWrap(String, boolean, boolean)}, but assumes {@code isolate} is * true. * * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return Input string after applying the above processing. */ public String unicodeWrap(String str, boolean isHtml) { return unicodeWrap(str, isHtml, true); } /** * Operates like {@link #unicodeWrap(String, boolean, boolean)}, but assumes {@code isHtml} is * false and {@code isolate} is true. * * @param str The input string * @return Input string after applying the above processing. */ public String unicodeWrap(String str) { return unicodeWrap(str, false, true); } /** * Formats a string of given directionality for use in plain-text output of the context * directionality, so an opposite-directionality string is neither garbled nor garbles its * surroundings. As opposed to {@link #spanWrapWithKnownDir}, this makes use of Unicode bidi * formatting characters. In HTML, its *only* valid use is inside of elements that do not allow * markup, e.g. the 'option' and 'title' elements. * * <p>The algorithm: In case the given directionality doesn't match the context directionality, * wraps the string with Unicode bidi formatting characters: RLE+{@code str}+PDF for RTL text, or * LRE+{@code str}+PDF for LTR text. * * <p>If {@code isolate}, directionally isolates the string so that it does not garble its * surroundings. Currently, this is done by "resetting" the directionality after the string by * appending a trailing Unicode bidi mark matching the context directionality (LRM or RLM) when * either the overall directionality or the exit directionality of the string is opposite to that * of the context. If the formatter was built using {@link #stereoReset(true)}, also prepends a * Unicode bidi mark matching the context directionality when either the overall directionality or * the entry directionality of the string is opposite to that of the context. Note that as opposed * to the overall directionality, the entry and exit directionalities are determined from the * string itself. * * <p>Does *not* do HTML-escaping regardless of the value of {@code isHtml}. * * @param dir {@code str}'s directionality. If null, i.e. unknown, it is estimated. * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @param isolate Whether to directionally isolate the string to prevent it from garbling the * content around it * @return Input string after applying the above processing. */ public String unicodeWrapWithKnownDir( @Nullable Dir dir, String str, boolean isHtml, boolean isolate) { if (dir == null) { dir = estimateDirection(str, isHtml); } StringBuilder result = new StringBuilder(); if (getStereoReset() && isolate) { result.append(markBeforeKnownDir(dir, str, isHtml)); } if (dir != Dir.NEUTRAL && dir != contextDir) { result.append(dir == Dir.RTL ? BidiUtils.Format.RLE : BidiUtils.Format.LRE); result.append(str); result.append(BidiUtils.Format.PDF); } else { result.append(str); } if (isolate) { result.append(markAfterKnownDir(dir, str, isHtml)); } return result.toString(); } /** * Operates like {@link #unicodeWrapWithKnownDir(Dir, String, boolean, boolean)}, but assumes * {@code isolate} is true. * * @param dir {@code str}'s directionality * @param str The input string * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return Input string after applying the above processing. */ public String unicodeWrapWithKnownDir(@Nullable Dir dir, String str, boolean isHtml) { return unicodeWrapWithKnownDir(dir, str, isHtml, true); } /** * Operates like {@link #unicodeWrapWithKnownDir(Dir, String, boolean, boolean)}, but assumes * {@code isHtml} is false and {@code isolate} is true. * * @param dir {@code str}'s directionality * @param str The input string * @return Input string after applying the above processing. */ public String unicodeWrapWithKnownDir(@Nullable Dir dir, String str) { return unicodeWrapWithKnownDir(dir, str, false, true); } /** * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the * overall or the exit directionality of a given string is opposite to the context directionality. * Putting this after the string (including its directionality declaration wrapping) prevents it * from "sticking" to other opposite-directionality text or a number appearing after it inline * with only neutral content in between. Otherwise returns the empty string. * * @param str String after which the mark may need to appear * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markAfter(String str, boolean isHtml) { return markAfterKnownDir(null, str, isHtml); } /** * Operates like {@link #markAfter(String, boolean)}, but assumes {@code isHtml} is false. * * @param str String after which the mark may need to appear * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markAfter(String str) { return markAfter(str, false); } /** * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the * overall or the exit directionality of a given string is opposite to the context directionality. * Putting this after the string (including its directionality declaration wrapping) prevents it * from "sticking" to other opposite-directionality text or a number appearing after it inline * with only neutral content in between. Otherwise returns the empty string. While the exit * directionality is determined by scanning the end of the string, the overall directionality is * given explicitly in {@code dir}. * * @param str String after which the mark may need to appear * @param dir {@code str}'s overall directionality. If null, i.e. unknown, it is estimated. * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markAfterKnownDir(@Nullable Dir dir, String str, boolean isHtml) { if (dir == null) { dir = estimateDirection(str, isHtml); } // BidiUtils.getExitDir() is called only if needed (short-circuit). if (contextDir == Dir.LTR && (dir == Dir.RTL || BidiUtils.getExitDir(str, isHtml) == Dir.RTL)) { return BidiUtils.Format.LRM_STRING; } if (contextDir == Dir.RTL && (dir == Dir.LTR || BidiUtils.getExitDir(str, isHtml) == Dir.LTR)) { return BidiUtils.Format.RLM_STRING; } return ""; } /** * Operates like {@link #markAfterKnownDir(Dir, String, boolean)}, but assumes that {@code isHtml} * is false. * * @param str The input string * @param dir {@code str}'s overall directionality * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markAfterKnownDir(@Nullable Dir dir, String str) { return markAfterKnownDir(dir, str, false); } /** * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the * overall or the entry directionality of a given string is opposite to the context * directionality. Putting this before the string (including its directionality declaration * wrapping) prevents it from "sticking" to other opposite-directionality text appearing before it * inline with only neutral content in between. Otherwise returns the empty string. * * @param str String before which the mark may need to appear * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markBefore(String str, boolean isHtml) { return markBeforeKnownDir(null, str, isHtml); } /** * Operates like {@link #markBefore(String, boolean)}, but assumes {@code isHtml} is false. * * @param str String before which the mark may need to appear * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markBefore(String str) { return markBefore(str, false); } /** * Returns a Unicode bidi mark matching the context directionality (LRM or RLM) if either the * overall or the entry directionality of a given string is opposite to the context * directionality. Putting this before the string (including its directionality declaration * wrapping) prevents it from "sticking" to other opposite-directionality text appearing before it * inline with only neutral content in between. Otherwise returns the empty string. While the * entry directionality is determined by scanning the beginning of the string, the overall * directionality is given explicitly in {@code dir}. * * @param str String before which the mark may need to appear * @param dir {@code str}'s overall directionality. If null, i.e. unknown, it is estimated. * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markBeforeKnownDir(@Nullable Dir dir, String str, boolean isHtml) { if (dir == null) { dir = estimateDirection(str, isHtml); } // BidiUtils.getExitDir() is called only if needed (short-circuit). if (contextDir == Dir.LTR && (dir == Dir.RTL || BidiUtils.getEntryDir(str, isHtml) == Dir.RTL)) { return BidiUtils.Format.LRM_STRING; } if (contextDir == Dir.RTL && (dir == Dir.LTR || BidiUtils.getEntryDir(str, isHtml) == Dir.LTR)) { return BidiUtils.Format.RLM_STRING; } return ""; } /** * Operates like {@link #markBeforeKnownDir(Dir, String, boolean)}, but assumes that {@code * isHtml} is false. * * @param str String before which the mark may need to appear * @param dir {@code str}'s overall directionality * @return LRM for RTL text in LTR context; RLM for LTR text in RTL context; else, the empty * string. */ public String markBeforeKnownDir(@Nullable Dir dir, String str) { return markBeforeKnownDir(dir, str, false); } /** * Returns the Unicode bidi mark matching the context directionality (LRM for LTR context * directionality, RLM for RTL context directionality), or the empty string for unknown context * directionality. */ public String mark() { return contextDir == Dir.LTR ? BidiUtils.Format.LRM_STRING : contextDir == Dir.RTL ? BidiUtils.Format.RLM_STRING : ""; } /** * Returns "right" for RTL context directionality. Otherwise (LTR or unknown context * directionality) returns "left". */ public String startEdge() { return contextDir == Dir.RTL ? BidiUtils.RIGHT : BidiUtils.LEFT; } /** * Returns "left" for RTL context directionality. Otherwise (LTR or unknown context * directionality) returns "right". */ public String endEdge() { return contextDir == Dir.RTL ? BidiUtils.LEFT : BidiUtils.RIGHT; } /** * Estimates the directionality of a string using the best known general-purpose method, i.e. * using relative word counts. Dir.NEUTRAL return value indicates completely neutral input. * * @param str String whose directionality is to be estimated * @param isHtml Whether {@code str} is HTML / HTML-escaped * @return {@code str}'s estimated overall directionality */ public static Dir estimateDirection(String str, boolean isHtml) { return BidiUtils.estimateDirection(str, isHtml); } }