/* * Copyright 2010 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.template.soy.msgs.internal; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableList.Builder; import com.google.common.collect.ImmutableMap; import com.google.template.soy.base.internal.LegacyInternalSyntaxException; import com.google.template.soy.msgs.restricted.MsgPartUtils; import com.google.template.soy.msgs.restricted.SoyMsgPart; import com.google.template.soy.msgs.restricted.SoyMsgPart.Case; import com.google.template.soy.msgs.restricted.SoyMsgPlaceholderPart; import com.google.template.soy.msgs.restricted.SoyMsgPluralCaseSpec; import com.google.template.soy.msgs.restricted.SoyMsgPluralPart; import com.google.template.soy.msgs.restricted.SoyMsgPluralRemainderPart; import com.google.template.soy.msgs.restricted.SoyMsgRawTextPart; import com.google.template.soy.msgs.restricted.SoyMsgSelectPart; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Utilities for building msg parts with ICU syntax. * * <p>Important: Do not use outside of Soy code (treat as superpackage-private). * */ public class IcuSyntaxUtils { private IcuSyntaxUtils() {} /** * Given a list of msg parts: (a) if it contains any plural/select parts, then builds a new list * of msg parts where plural/select parts in the original msg parts are all embedded as raw text * in ICU format, (b) if it doesn't contain any plural/select parts, then simply returns the * original msg parts instead of creating a new list of identical msg parts. * * @param origMsgParts The msg parts to convert. * @param allowIcuEscapingInRawText If true, then ICU syntax chars needing escaping in will be * escaped. If false, then a SoySyntaxException will be thrown if an ICU syntax char needing * escaping is encountered in raw text. * @return A new list of msg parts with embedded ICU syntax if the original msg parts contain * plural/select parts, otherwise the original msg parts. */ public static ImmutableList<SoyMsgPart> convertMsgPartsToEmbeddedIcuSyntax( List<SoyMsgPart> origMsgParts, boolean allowIcuEscapingInRawText) { // If origMsgParts doesn't have plural/select parts, simply return it. if (!MsgPartUtils.hasPlrselPart(origMsgParts)) { // TODO: Once ImmutableList<SoyMsgPart> is everywhere, remove ImmutableList.copyOf() here. return ImmutableList.copyOf(origMsgParts); } // Build the new msg parts. ImmutableList.Builder<SoyMsgPart> newMsgPartsBuilder = ImmutableList.builder(); StringBuilder currRawTextSb = new StringBuilder(); convertMsgPartsHelper( newMsgPartsBuilder, currRawTextSb, origMsgParts, false, allowIcuEscapingInRawText); if (currRawTextSb.length() > 0) { newMsgPartsBuilder.add(SoyMsgRawTextPart.of(currRawTextSb.toString())); } return newMsgPartsBuilder.build(); } /** * Private helper for {@code convertMsgPartsToEmbeddedIcuSyntax()} to convert msg parts. * * @param newMsgPartsBuilder The new msg parts being built. * @param currRawTextSb The collector for the current raw text, which hasn't yet been turned into * a SoyMsgRawTextPart and added to newMsgPartsBuilder because it might not be complete. * @param origMsgParts The msg parts to convert. * @param isInPlrselPart Whether we're currently within a plural/select part's subtree. * @param allowIcuEscapingInRawText If true, then ICU syntax chars needing escaping in will be * escaped. If false, then a SoySyntaxException will be thrown if an ICU syntax char needing * escaping is encountered in raw text. */ private static void convertMsgPartsHelper( Builder<SoyMsgPart> newMsgPartsBuilder, StringBuilder currRawTextSb, List<SoyMsgPart> origMsgParts, boolean isInPlrselPart, boolean allowIcuEscapingInRawText) { for (SoyMsgPart origMsgPart : origMsgParts) { if (origMsgPart instanceof SoyMsgRawTextPart) { String rawText = ((SoyMsgRawTextPart) origMsgPart).getRawText(); if (isInPlrselPart) { if (allowIcuEscapingInRawText) { rawText = icuEscape(rawText); } else { checkIcuEscapingIsNotNeeded(rawText); } } currRawTextSb.append(rawText); } else if (origMsgPart instanceof SoyMsgPlaceholderPart) { // A placeholder ends the curr raw text, so if the collected curr raw text is nonempty, add // a msg part for it and clear the collector. if (currRawTextSb.length() > 0) { newMsgPartsBuilder.add(SoyMsgRawTextPart.of(currRawTextSb.toString())); currRawTextSb.setLength(0); } // Reuse the msg part for the placeholder since it's immutable. newMsgPartsBuilder.add(origMsgPart); } else if (origMsgPart instanceof SoyMsgPluralRemainderPart) { currRawTextSb.append(getPluralRemainderString()); } else if (origMsgPart instanceof SoyMsgPluralPart) { convertPluralPartHelper( newMsgPartsBuilder, currRawTextSb, (SoyMsgPluralPart) origMsgPart, allowIcuEscapingInRawText); } else if (origMsgPart instanceof SoyMsgSelectPart) { convertSelectPartHelper( newMsgPartsBuilder, currRawTextSb, (SoyMsgSelectPart) origMsgPart, allowIcuEscapingInRawText); } } } /** * Private helper for {@code convertMsgPartsToEmbeddedIcuSyntax()} to convert a plural part. * * @param newMsgPartsBuilder The new msg parts being built. * @param currRawTextSb The collector for the current raw text, which hasn't yet been turned into * a SoyMsgRawTextPart and added to newMsgPartsBuilder because it might not be complete. * @param origPluralPart The plural part to convert. * @param allowIcuEscapingInRawText If true, then ICU syntax chars needing escaping in will be * escaped. If false, then a SoySyntaxException will be thrown if an ICU syntax char needing * escaping is encountered in raw text. */ private static void convertPluralPartHelper( Builder<SoyMsgPart> newMsgPartsBuilder, StringBuilder currRawTextSb, SoyMsgPluralPart origPluralPart, boolean allowIcuEscapingInRawText) { currRawTextSb.append( getPluralOpenString(origPluralPart.getPluralVarName(), origPluralPart.getOffset())); for (Case<SoyMsgPluralCaseSpec> pluralCase : origPluralPart.getCases()) { currRawTextSb.append(getPluralCaseOpenString(pluralCase.spec())); convertMsgPartsHelper( newMsgPartsBuilder, currRawTextSb, pluralCase.parts(), true, allowIcuEscapingInRawText); currRawTextSb.append(getPluralCaseCloseString()); } currRawTextSb.append(getPluralCloseString()); } /** * Private helper for {@code convertMsgPartsToEmbeddedIcuSyntax()} to convert a select part. * * @param newMsgPartsBuilder The new msg parts being built. * @param currRawTextSb The collector for the current raw text, which hasn't yet been turned into * a SoyMsgRawTextPart and added to newMsgPartsBuilder because it might not be complete. * @param origSelectPart The select part to convert. * @param allowIcuEscapingInRawText If true, then ICU syntax chars needing escaping in will be * escaped. If false, then a SoySyntaxException will be thrown if an ICU syntax char needing * escaping is encountered in raw text. */ private static void convertSelectPartHelper( Builder<SoyMsgPart> newMsgPartsBuilder, StringBuilder currRawTextSb, SoyMsgSelectPart origSelectPart, boolean allowIcuEscapingInRawText) { currRawTextSb.append(getSelectOpenString(origSelectPart.getSelectVarName())); for (Case<String> selectCase : origSelectPart.getCases()) { currRawTextSb.append(getSelectCaseOpenString(selectCase.spec())); convertMsgPartsHelper( newMsgPartsBuilder, currRawTextSb, selectCase.parts(), true /* isInPlrselPart */, allowIcuEscapingInRawText); currRawTextSb.append(getSelectCaseCloseString()); } currRawTextSb.append(getSelectCloseString()); } // ----------------------------------------------------------------------------------------------- // Private low-level helpers. // A typical Plural command is as follows: // {plural $num_people offset="1"} // {case 0}Case 0 statement. // {case 1}Case 1 statement. // {default}Default statement for {remainder{$num_people}} out of {$num_people}. // {/plural} // // The corresponding ICU syntax string is: // {numPeople,plural,offset=1 // =0{Case 0 statement.} // =1{Case 1 statement} // other{Default statement for # out of {$numPeople}.} // } // // (The variable name "numPeople" may be different depending on what purpose the // string is generated.) // // Similarly, a typical select case: // // {select $gender} // {case 'female'}{$person} added you to her circle. // {default}{$person} added you to his circle. // {/select} // // The corresponding ICU syntax string is: // {gender,select, // female{{$person} added you to her circle.} // other{{$person} added you to his circle.} // } // // (The variable names "gender" and "person" may be different depending on what purpose the // string is generated.) /** * Regex pattern for ICU syntax chars needing escaping. Reference: * http://userguide.icu-project.org/formatparse/messages * * <p>Syntax chars are single quote, braces, and hash. Single quotes not followed by another * syntax char do not need escaping. We match for: (a) a single quote that precedes another syntax * char, (b) a single quote at the end of the raw text part (presumably the raw text is followed * by some ICU syntax, such as a placeholder or the end of a plural/select case), or (c) any brace * char but not the hash char (see important note below). * * <p>Important: In case (c), we do not match for the hash char '#' because we specifically turn * off ICU special handling of '#' in both (1) generating JS code for goog.getMsg * (GenJsCodeVisitorAssistantForMsgs.genI18nMessageFormatExprHelper) and (2) reading translated * msgs files (XtbIcuMsgParser.processIcuMessage), */ // Note: Need to escape hash char in regex due to Pattern.COMMENTS. private static final Pattern ICU_SYNTAX_CHAR_NEEDING_ESCAPE_PATTERN = Pattern.compile(" ' (?= ['{}\\#] ) | ' $ | [{}] ", Pattern.COMMENTS); /** Map from ICU syntax char to its escape sequence. */ private static final ImmutableMap<String, String> ICU_SYNTAX_CHAR_ESCAPE_MAP = ImmutableMap.of("'", "''", "{", "'{'", "}", "'}'"); /** * Regex pattern for ICU syntax chars other than single quote. Used in * checkIcuEscapingIsNotNeeded() to provide better error messages in some cases. */ private static final Pattern ICU_SYNTAX_CHAR_NOT_SINGLE_QUOTE_PATTERN = Pattern.compile("[{}]"); /** * Escapes ICU syntax characters in raw text. * * @param rawText The raw text to escaped. * @return The escaped raw text. If the given raw text doesn't need escaping, then the same string * object is returned. */ @VisibleForTesting static String icuEscape(String rawText) { Matcher matcher = ICU_SYNTAX_CHAR_NEEDING_ESCAPE_PATTERN.matcher(rawText); if (!matcher.find()) { return rawText; } StringBuffer escapedTextSb = new StringBuffer(); do { String repl = ICU_SYNTAX_CHAR_ESCAPE_MAP.get(matcher.group()); matcher.appendReplacement(escapedTextSb, repl); } while (matcher.find()); matcher.appendTail(escapedTextSb); return escapedTextSb.toString(); } /** * Checks that there are no ICU syntax characters needing escaping in the given raw text. Throws a * SoySyntaxException if the check fails. * * @param rawText The raw text to check. */ @VisibleForTesting static void checkIcuEscapingIsNotNeeded(String rawText) { Matcher matcher = ICU_SYNTAX_CHAR_NEEDING_ESCAPE_PATTERN.matcher(rawText); if (!matcher.find()) { return; } if (ICU_SYNTAX_CHAR_NOT_SINGLE_QUOTE_PATTERN.matcher(rawText).find()) { throw LegacyInternalSyntaxException.createWithoutMetaInfo( "Apologies, Soy currently does not support open/close brace characters in plural/gender" + " source msgs."); } else { if (!matcher.group().equals("'")) { throw new AssertionError(); } String errorMsgSuffix = " One possible workaround is to use the Unicode RIGHT SINGLE QUOTATION MARK character" + " (\\u2019) instead of a basic apostrophe."; if (matcher.end() == rawText.length()) { throw LegacyInternalSyntaxException.createWithoutMetaInfo( "Apologies, Soy currently does not support a single quote character at the end of a" + " text part in plural/gender source msgs (including immediately preceding an HTML" + " tag or Soy tag)." + errorMsgSuffix); } else if (rawText.charAt(matcher.end()) == '#') { throw LegacyInternalSyntaxException.createWithoutMetaInfo( "Apologies, Soy currently does not support a single quote character preceding a hash" + " character in plural/gender source msgs." + errorMsgSuffix); } else if (rawText.charAt(matcher.end()) == '\'') { throw LegacyInternalSyntaxException.createWithoutMetaInfo( "Apologies, Soy currently does not support consecutive single quote characters in" + " plural/gender source msgs." + errorMsgSuffix); } else { throw new AssertionError(); } } } // ------ Plural related strings. ------ /** * Gets the opening (left) string for a plural statement. * * @param varName The plural var name. * @param offset The offset. * @return the ICU syntax string for the plural opening string. */ private static String getPluralOpenString(String varName, int offset) { StringBuilder openingPartSb = new StringBuilder(); openingPartSb.append('{').append(varName).append(",plural,"); if (offset != 0) { openingPartSb.append("offset:").append(offset).append(' '); } return openingPartSb.toString(); } /** * Gets the closing (right) string for a plural statement. * * @return the ICU syntax string for the plural closing string. */ private static String getPluralCloseString() { return "}"; } /** * Gets the opening (left) string for a plural case statement. * * @param pluralCaseSpec The plural case spec object. * @return the ICU syntax string for the plural case opening string. */ private static String getPluralCaseOpenString(SoyMsgPluralCaseSpec pluralCaseSpec) { String icuCaseName = (pluralCaseSpec.getType() == SoyMsgPluralCaseSpec.Type.EXPLICIT) ? "=" + pluralCaseSpec.getExplicitValue() : pluralCaseSpec.getType().name().toLowerCase(); return icuCaseName + "{"; } /** * Gets the closing (right) string for a plural case statement. * * @return the ICU syntax string for the plural case closing string. */ private static String getPluralCaseCloseString() { return "}"; } /** * Gets the closing string for a plural remainder statement. * * @return the ICU syntax string for the plural remainder string. */ private static String getPluralRemainderString() { return "#"; } // ------ Select related strings. ------ /** * Gets the opening (left) string for a select statement. * * @param varName The select var name. * @return the ICU syntax string for the select opening string. */ private static String getSelectOpenString(String varName) { return "{" + varName + ",select,"; } /** * Gets the closing (right) string for a select statement. * * @return the ICU syntax string for the select closing string. */ private static String getSelectCloseString() { return "}"; } /** * Gets the opening (left) string for a select case statement. * * @param caseValue The case value, or {@code null} is it is the default statement. * @return the ICU syntax string for the select case opening string. */ private static String getSelectCaseOpenString(String caseValue) { return ((caseValue != null) ? caseValue : "other") + "{"; } /** * Gets the closing string for a plural remainder statement. * * @return the ICU syntax string for the plural remainder string. */ private static String getSelectCaseCloseString() { return "}"; } }