/*
* Copyright 2008 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.template.soy.msgs.restricted;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.annotations.VisibleForTesting;
import java.util.Arrays;
/**
* Represents a raw text string within a message (the stuff that translators change).
*
*/
public abstract class SoyMsgRawTextPart extends SoyMsgPart {
// TODO(lukes): when java9 is released, remove this.. the optimization performed by these
// subclasses is getting added to the vm. see http://openjdk.java.net/jeps/254
private static final int BYTES_PER_CHAR = 2;
/** Returns a SoyMsgRawTextPart representing the specified raw text string. */
public static SoyMsgRawTextPart of(String rawText) {
byte[] utf8Bytes = rawText.getBytes(UTF_8);
// Determine whether UTF8 or UTF16 uses less memory, and choose between one of the two internal
// implementations. char[] is preferred if the sizes are equal because it is faster to turn
// back into a String. In a realistic application with 1 million messages in memory, using
// UTF-8 saves about 35M, and dynamicaly switching encodings saves another 10M.
// IMPORTANT! This choice is deterministic, so that for any particular input string the choice
// of implementation class is the same. This ensures operations like equals() and hashCode()
// do not have to decode the contents.
if (utf8Bytes.length < rawText.length() * BYTES_PER_CHAR) {
return new Utf8SoyMsgRawTextPart(utf8Bytes);
} else {
return new CharArraySoyMsgRawTextPart(rawText.toCharArray());
}
}
/** Returns the raw text string. */
public abstract String getRawText();
@Override
public final String toString() {
return getRawText();
}
/** Constructor only intended to be used internally. */
SoyMsgRawTextPart() {}
/**
* UTF-8 raw message text representation.
*
* <p>For most messages, UTF8 represents the string more compactly. For Latin strings, UTF8 will
* always be half the size of UTF16.
*/
@VisibleForTesting
static final class Utf8SoyMsgRawTextPart extends SoyMsgRawTextPart {
private final byte[] utf8Bytes;
Utf8SoyMsgRawTextPart(byte[] utf8Bytes) {
this.utf8Bytes = utf8Bytes;
}
@Override
public boolean equals(Object other) {
// NOTE: Since message encoding is deterministic, we know the messages don't match if
// the other one is encoded as chars.
return other.getClass() == Utf8SoyMsgRawTextPart.class
&& Arrays.equals(utf8Bytes, ((Utf8SoyMsgRawTextPart) other).utf8Bytes);
}
@Override
public int hashCode() {
return getClass().hashCode() + Arrays.hashCode(utf8Bytes);
}
@Override
public String getRawText() {
return new String(utf8Bytes, UTF_8);
}
}
/**
* Character array representation.
*
* <p>Using a character array over String saves another 7M on a realistic application with 1
* million messages in memory, by avoiding the overhead of the String object.
*/
@VisibleForTesting
static final class CharArraySoyMsgRawTextPart extends SoyMsgRawTextPart {
private final char[] charArray;
CharArraySoyMsgRawTextPart(char[] charArray) {
this.charArray = charArray;
}
@Override
public boolean equals(Object other) {
// NOTE: Since message encoding is deterministic, we know the messages don't match if
// the other one is encoded as UTF8.
return other.getClass() == CharArraySoyMsgRawTextPart.class
&& Arrays.equals(charArray, ((CharArraySoyMsgRawTextPart) other).charArray);
}
@Override
public int hashCode() {
return getClass().hashCode() + Arrays.hashCode(charArray);
}
@Override
public String getRawText() {
return new String(charArray);
}
}
}