/* * Copyright 2009 Google Inc. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.google.template.soy.msgs.internal; import static java.nio.charset.StandardCharsets.UTF_8; import com.google.common.annotations.VisibleForTesting; import com.google.common.collect.ImmutableList; import com.google.template.soy.msgs.restricted.SoyMsgPart; import com.google.template.soy.msgs.restricted.SoyMsgPlaceholderPart; import com.google.template.soy.msgs.restricted.SoyMsgRawTextPart; import javax.annotation.Nullable; /** * Static methods to compute the unique message id for a message. * */ public class SoyMsgIdComputer { private SoyMsgIdComputer() {} /** * Computes the unique message id for a message, given the message parts, the meaning string (if * any), and the content type (if any). These are the only elements incorporated into the message * id. * * <p>In particular, note that the id of a message does not change when its desc changes. * * @param msgParts The parts of the message. * @param meaning The meaning string, or null if none (usually null). * @param contentType Content type of the document that this message will appear in (e.g. "{@code * text/html}", or null if not used. * @return The computed message id. */ public static long computeMsgId( ImmutableList<SoyMsgPart> msgParts, @Nullable String meaning, @Nullable String contentType) { return computeMsgIdHelper(msgParts, false, meaning, contentType); } /** * Computes an alternate unique message id for a message, given the message parts, the meaning * string (if any), and the content type (if any). These are the only elements incorporated into * the message id. * * <p>In particular, note that the id of a message does not change when its desc changes. * * <p>Important: This is an alternate message id computation using braced placeholders. Only use * this function instead of {@link #computeMsgId} if you know that you need this alternate format. * * @param msgParts The parts of the message. * @param meaning The meaning string, or null if none (usually null). * @param contentType Content type of the document that this message will appear in (e.g. "{@code * text/html}", or null if not used.. * @return The computed message id. */ public static long computeMsgIdUsingBracedPhs( ImmutableList<SoyMsgPart> msgParts, @Nullable String meaning, @Nullable String contentType) { return computeMsgIdHelper(msgParts, true, meaning, contentType); } /** * Computes the unique message id for a message, given the message parts, the meaning string (if * any), and the content type (if any). These are the only elements incorporated into the message * id. * * <p>In particular, note that the id of a message does not change when its desc changes. * * @param msgParts The parts of the message. * @param doUseBracedPhs Whether to use braced placeholders. (Even though braced placeholders * originated from ICU syntax, the choice of whether to use braced placeholders when computing * msg id is still a separate decision, even if your message is plural/select and you do use * ICU syntax to represent plural/select parts.) * @param meaning The meaning string, or null if none (usually null). * @param contentType Content type of the document that this message will appear in (e.g. "{@code * text/html}", or null if not used.. */ private static long computeMsgIdHelper( ImmutableList<SoyMsgPart> msgParts, boolean doUseBracedPhs, @Nullable String meaning, @Nullable String contentType) { // Important: Do not change this algorithm. Doing so will break backwards compatibility. String msgContentStrForMsgIdComputation = buildMsgContentStrForMsgIdComputation(msgParts, doUseBracedPhs); long fp = fingerprint(msgContentStrForMsgIdComputation); // If there is a meaning, incorporate its fingerprint. if (meaning != null) { fp = (fp << 1) + (fp < 0 ? 1 : 0) + fingerprint(meaning); } // If there is a content type other than "text/html", incorporate its fingerprint. if (contentType != null && !contentType.equals("text/html")) { fp = (fp << 1) + (fp < 0 ? 1 : 0) + fingerprint(contentType); } // To avoid negative ids we strip the high-order bit. return fp & 0x7fffffffffffffffL; } /** * Private helper to build the canonical message content string that should be used for msg id * computation. * * <p>Note: For people who know what "presentation" means in this context, the result string * should be exactly the presentation string. * * @param msgParts The parts of the message. * @param doUseBracedPhs Whether to use braced placeholders. * @return The canonical message content string that should be used for msg id computation. */ @VisibleForTesting static String buildMsgContentStrForMsgIdComputation( ImmutableList<SoyMsgPart> msgParts, boolean doUseBracedPhs) { // Note: For source messages, disallow ICU syntax chars that need escaping in raw text. msgParts = IcuSyntaxUtils.convertMsgPartsToEmbeddedIcuSyntax(msgParts, false); StringBuilder msgStrSb = new StringBuilder(); for (SoyMsgPart msgPart : msgParts) { if (msgPart instanceof SoyMsgRawTextPart) { msgStrSb.append(((SoyMsgRawTextPart) msgPart).getRawText()); } else if (msgPart instanceof SoyMsgPlaceholderPart) { if (doUseBracedPhs) { msgStrSb.append('{'); } msgStrSb.append(((SoyMsgPlaceholderPart) msgPart).getPlaceholderName()); if (doUseBracedPhs) { msgStrSb.append('}'); } } else { throw new AssertionError(); } } return msgStrSb.toString(); } @VisibleForTesting static long fingerprint(String str) { byte[] strBytes = str.getBytes(UTF_8); int hi = hash32(strBytes, 0, strBytes.length, 0); int lo = hash32(strBytes, 0, strBytes.length, 102072); if ((hi == 0) && (lo == 0 || lo == 1)) { // Turn 0/1 into another fingerprint hi ^= 0x130f9bef; lo ^= 0x94a0a928; } return (((long) hi) << 32) | (lo & 0xffffffffL); } @SuppressWarnings({ "PointlessBitwiseExpression", "PointlessArithmeticExpression", "FallThrough" }) // IntelliJ private static int hash32(byte[] str, int start, int limit, int c) { int a = 0x9e3779b9; int b = 0x9e3779b9; int i; for (i = start; i + 12 <= limit; i += 12) { a += (((str[i + 0] & 0xff) << 0) | ((str[i + 1] & 0xff) << 8) | ((str[i + 2] & 0xff) << 16) | ((str[i + 3] & 0xff) << 24)); b += (((str[i + 4] & 0xff) << 0) | ((str[i + 5] & 0xff) << 8) | ((str[i + 6] & 0xff) << 16) | ((str[i + 7] & 0xff) << 24)); c += (((str[i + 8] & 0xff) << 0) | ((str[i + 9] & 0xff) << 8) | ((str[i + 10] & 0xff) << 16) | ((str[i + 11] & 0xff) << 24)); // Mix. a -= b; a -= c; a ^= (c >>> 13); b -= c; b -= a; b ^= (a << 8); c -= a; c -= b; c ^= (b >>> 13); a -= b; a -= c; a ^= (c >>> 12); b -= c; b -= a; b ^= (a << 16); c -= a; c -= b; c ^= (b >>> 5); a -= b; a -= c; a ^= (c >>> 3); b -= c; b -= a; b ^= (a << 10); c -= a; c -= b; c ^= (b >>> 15); } c += limit - start; switch (limit - i) { // Deal with rest. Cases fall through. case 11: c += (str[i + 10] & 0xff) << 24; case 10: c += (str[i + 9] & 0xff) << 16; case 9: c += (str[i + 8] & 0xff) << 8; // the first byte of c is reserved for the length case 8: b += (str[i + 7] & 0xff) << 24; case 7: b += (str[i + 6] & 0xff) << 16; case 6: b += (str[i + 5] & 0xff) << 8; case 5: b += (str[i + 4] & 0xff); case 4: a += (str[i + 3] & 0xff) << 24; case 3: a += (str[i + 2] & 0xff) << 16; case 2: a += (str[i + 1] & 0xff) << 8; case 1: a += (str[i + 0] & 0xff); // case 0 : nothing left to add } // Mix. a -= b; a -= c; a ^= (c >>> 13); b -= c; b -= a; b ^= (a << 8); c -= a; c -= b; c ^= (b >>> 13); a -= b; a -= c; a ^= (c >>> 12); b -= c; b -= a; b ^= (a << 16); c -= a; c -= b; c ^= (b >>> 5); a -= b; a -= c; a ^= (c >>> 3); b -= c; b -= a; b ^= (a << 10); c -= a; c -= b; c ^= (b >>> 15); return c; } }