/*
* Copyright 2011 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.devtools.j2objc.util;
import java.io.UnsupportedEncodingException;
import java.util.Locale;
/**
* Utility methods for translating Unicode strings to Objective-C.
*
* @author Tom Ball
*/
public final class UnicodeUtils {
private UnicodeUtils() {
// Don't instantiate.
}
/**
* Returns a valid C/C++ character literal (including quotes).
*/
public static String escapeCharLiteral(char c) {
if (c >= 0x20 && c <= 0x7E) { // if ASCII
switch (c) {
case '\'': return "'\\''";
case '\\': return "'\\\\'";
}
return "'" + c + "'";
} else {
return UnicodeUtils.format("0x%04x", (int) c);
}
}
/**
* Returns a valid ObjC string literal (excluding quotes).
*/
public static String escapeStringLiteral(String s) {
StringBuilder sb = null;
int len = s.length();
int lastIndex = 0;
for (int i = 0; i < len; i++) {
String replacement = escapeCharacterForStringLiteral(s.charAt(i), s, i);
if (replacement == null) {
continue;
}
if (sb == null) {
sb = new StringBuilder();
}
if (lastIndex < i) {
sb.append(s.substring(lastIndex, i));
}
lastIndex = i + 1;
sb.append(replacement);
}
if (sb != null) {
sb.append(s.substring(lastIndex, len));
return sb.toString();
} else {
return s;
}
}
private static String escapeCharacterForStringLiteral(char c, String s, int idx) {
switch (c) {
case '\\': return "\\\\";
case '"': return "\\\"";
case '\n': return "\\n";
case '\t': return "\\t";
}
if (c >= 0x20 && c <= 0x7E) {
// Printable ASCII character.
return null;
} else if (c < 0x20 || (c >= 0x7F && c < 0xA0)) {
// Invalid C++ Unicode number, convert to UTF-8 sequence.
if (idx + 1 < s.length() && isHexChar(s.charAt(idx + 1))) {
// If followed by another hex character, we must terminate the hex sequence.
return escapeUtf8(c) + "\"\"";
}
return escapeUtf8(c);
} else {
if (!isValidCppCharacter(c)) {
ErrorUtil.error(String.format(
"Illegal C/C++ Unicode character \\u%4x in \"%s\"", (int) c, s));
}
return "\\u" + UnicodeUtils.format("%04x", (int) c);
}
}
private static boolean isHexChar(char c) {
return c >= '0' && c <= '9' || c >= 'a' && c <= 'f' || c >= 'A' && c <= 'F';
}
/**
* Returns true if all characters in a string can be expressed as either
* C++ universal characters or valid hexadecimal escape sequences.
*/
public static boolean hasValidCppCharacters(String s) {
for (char c : s.toCharArray()) {
if (!isValidCppCharacter(c)) {
return false;
}
}
return true;
}
private static String escapeUtf8(char value) {
StringBuilder buffer = new StringBuilder();
String charString = Character.toString(value);
try {
for (byte b : charString.getBytes("UTF-8")) {
int unsignedByte = b & 0xFF;
buffer.append("\\x");
if (unsignedByte < 16) {
buffer.append('0');
}
buffer.append(Integer.toHexString(unsignedByte));
}
} catch (UnsupportedEncodingException e) {
throw new AssertionError("UTF-8 is an unsupported encoding");
}
return buffer.toString();
}
/**
* Returns true if the specified character can be represented in a C string
* or character literal declaration. This invalid character range is from
* section the <a
* href="http://www.open-std.org/jtc1/sc22/wg14/www/docs/n1124.pdf">C99
* specification</a>, section 6.4.3.
*/
public static boolean isValidCppCharacter(char c) {
return c < 0xd800 || c > 0xdfff;
}
/**
* For a given String, returns a legal identifier for Objective-C.
*/
// TODO(mthvedt): Consider using this for all identifiers.
public static String asValidObjcIdentifier(String word) {
StringBuffer objcWord = new StringBuffer();
int offset = 0;
if (word.length() > 0 && Character.isDigit(word.codePointAt(0))) {
// Identifiers must not start with a digit
objcWord.append("_");
}
while (offset < word.length()) {
int codepoint = word.codePointAt(offset);
offset += Character.charCount(codepoint);
if (Character.isLetterOrDigit(codepoint)) {
objcWord.appendCodePoint(codepoint);
} else if (codepoint == '$') {
// Allowed by Clang in non-strict mode (and used in J2ObjC)
objcWord.append('$');
} else {
objcWord.append("_");
}
}
return objcWord.toString();
}
/**
* Invokes String.format() using Locale.ROOT, so that local locale
* settings don't cause generated code to have characters the C compiler
* can't manage. This method shouldn't be called for error messages or
* other text displayed to the developer invoking j2objc, however.
*
* {@link https://github.com/google/j2objc/issues/698}
*/
public static String format(String format, Object... args) {
return String.format(Locale.ROOT, format, args);
}
}