/** * Copyright (c) 2012-2016 André Bargull * Alle Rechte vorbehalten / All Rights Reserved. Use is subject to license terms. * * <https://github.com/anba/es6draft> */ package com.github.anba.es6draft.parser; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.lang.UCharacterCategory; /** * */ public final class Characters { private Characters() { } /** * <strong>[11.2] White Space</strong> * * <pre> * WhiteSpace :: * {@literal <TAB>} (U+0009) * {@literal <VT>} (U+000B) * {@literal <FF>} (U+000C) * {@literal <SP>} (U+0020) * {@literal <NBSP>} (U+00A0) * {@literal <ZWNBSP>} (U+FEFF) * {@literal <USP>} ("Zs") * </pre> * * @param c * the character * @return {@code true} if the character is a whitespace */ public static boolean isWhitespace(int c) { if (c <= 127) { return c == 0x09 || c == 0x0B || c == 0x0C || c == 0x20; } return c == 0xA0 || c == 0xFEFF || isSpaceSeparator(c); } /** * Unicode category "Zs" (space separator) * * @param c * the character * @return {@code true} if the character is space separator */ public static boolean isSpaceSeparator(int c) { // Unicode 8.0 return c == 0x20 || c == 0xA0 || c == 0x1680 || (0x2000 <= c && c <= 0x200A) || c == 0x202F || c == 0x205F || c == 0x3000; } /** * <strong>[11.3] Line Terminators</strong> * * <pre> * LineTerminator :: * {@literal <LF>} (U+000A) * {@literal <CR>} (U+000D) * {@literal <LS>} (U+2028) * {@literal <PS>} (U+2029) * </pre> * * @param c * the character * @return {@code true} if the character is a line terminator */ public static boolean isLineTerminator(int c) { if ((c & ~0b0010_0000_0010_1111) != 0) { return false; } return c == 0x0A || c == 0x0D || c == 0x2028 || c == 0x2029; } /** * <strong>[11.2] White Space</strong><br> * <strong>[11.3] Line Terminators</strong> * * @param c * the character * @return {@code true} if the character is whitespace or a line terminator */ public static boolean isWhitespaceOrLineTerminator(int c) { if (c <= 127) { return (0x09 <= c && c <= 0x0D) || c == 0x20; } return isWhitespace(c) || isLineTerminator(c); } /** * <strong>[11.6] Names and Keywords</strong> * * <pre> * IdentifierStart :: * UnicodeIDStart * $ * _ * \ UnicodeEscapeSequence * UnicodeIDStart :: * any Unicode character with the Unicode property "ID_Start". * </pre> * * @param c * the character * @return {@code true} if the character is an identifier start character */ public static boolean isIdentifierStart(int c) { if (c <= 127) { return ('a' <= (c | 0x20) && (c | 0x20) <= 'z') || c == '$' || c == '_'; } return isIdentifierStartUnlikely(c); } // Cf. definition of "ID_Start" from http://www.unicode.org/reports/tr31/. private static final int ID_START_MASK = 1 << UCharacterCategory.UPPERCASE_LETTER | 1 << UCharacterCategory.LOWERCASE_LETTER | 1 << UCharacterCategory.TITLECASE_LETTER | 1 << UCharacterCategory.MODIFIER_LETTER | 1 << UCharacterCategory.OTHER_LETTER | 1 << UCharacterCategory.LETTER_NUMBER; private static boolean isIdentifierStartUnlikely(int c) { if (c == '\u2E2F') { // VERTICAL TILDE is in 'Lm' and [:Pattern_Syntax:] return false; } if ((1 << UCharacter.getType(c) & ID_START_MASK) != 0) { return true; } // Grandfathered characters (Other_ID_Start) [Unicode 8.0]. switch (c) { case '\u2118': case '\u212E': case '\u309B': case '\u309C': return true; default: return false; } } /** * <strong>[11.6] Names and Keywords</strong> * * <pre> * IdentifierPart :: * UnicodeIDContinue * $ * _ * \ UnicodeEscapeSequence * <ZWNJ> * <ZWJ> * UnicodeIDContinue :: * any Unicode character with the Unicode property "ID_Continue" * </pre> * * @param c * the character * @return {@code true} if the character is an identifier part character */ public static boolean isIdentifierPart(int c) { if (c <= 127) { return ('a' <= (c | 0x20) && (c | 0x20) <= 'z') || ('0' <= c && c <= '9') || c == '$' || c == '_'; } return isIdentifierPartUnlikely(c); } // Cf. definition of "ID_Continue" from http://www.unicode.org/reports/tr31/. private static final int ID_CONTINUE_MASK = 1 << UCharacterCategory.UPPERCASE_LETTER | 1 << UCharacterCategory.LOWERCASE_LETTER | 1 << UCharacterCategory.TITLECASE_LETTER | 1 << UCharacterCategory.MODIFIER_LETTER | 1 << UCharacterCategory.OTHER_LETTER | 1 << UCharacterCategory.LETTER_NUMBER | 1 << UCharacterCategory.NON_SPACING_MARK | 1 << UCharacterCategory.COMBINING_SPACING_MARK | 1 << UCharacterCategory.DECIMAL_DIGIT_NUMBER | 1 << UCharacterCategory.CONNECTOR_PUNCTUATION; private static boolean isIdentifierPartUnlikely(int c) { if (c == '\u200C' || c == '\u200D') return true; if (c == '\u2E2F') { // VERTICAL TILDE is in 'Lm' and [:Pattern_Syntax:] return false; } if ((1 << UCharacter.getType(c) & ID_CONTINUE_MASK) != 0) { return true; } // Grandfathered characters (Other_ID_Start + Other_ID_Continue) [Unicode 8.0]. switch (c) { case '\u00B7': case '\u0387': case '\u1369': case '\u136A': case '\u136B': case '\u136C': case '\u136D': case '\u136E': case '\u136F': case '\u1370': case '\u1371': case '\u19DA': case '\u2118': case '\u212E': case '\u309B': case '\u309C': return true; default: return false; } } /** * <strong>[11.6] Names and Keywords</strong> * * <pre> * UnicodeIDStart :: * any Unicode character with the Unicode property "ID_Start". * </pre> * * @param c * the character * @return {@code true} if the character is an identifier start character */ public static boolean isUnicodeIDStart(int c) { if (c <= 127) { return ('a' <= (c | 0x20) && (c | 0x20) <= 'z'); } return isUnicodeIDStartUnlikely(c); } private static boolean isUnicodeIDStartUnlikely(int c) { if (c == '\u2E2F') { // VERTICAL TILDE is in 'Lm' and [:Pattern_Syntax:] return false; } if ((1 << UCharacter.getType(c) & ID_START_MASK) != 0) { return true; } // Grandfathered characters (Other_ID_Start) [Unicode 8.0]. switch (c) { case '\u2118': case '\u212E': case '\u309B': case '\u309C': return true; default: return false; } } /** * <strong>[11.6] Names and Keywords</strong> * * <pre> * UnicodeIDContinue :: * any Unicode character with the Unicode property "ID_Continue" * </pre> * * @param c * the character * @return {@code true} if the character is an identifier part character */ public static boolean isUnicodeIDContinue(int c) { if (c <= 127) { return ('a' <= (c | 0x20) && (c | 0x20) <= 'z') || ('0' <= c && c <= '9') || c == '_'; } return isUnicodeIDContinueUnlikely(c); } private static boolean isUnicodeIDContinueUnlikely(int c) { if (c == '\u2E2F') { // VERTICAL TILDE is in 'Lm' and [:Pattern_Syntax:] return false; } if ((1 << UCharacter.getType(c) & ID_CONTINUE_MASK) != 0) { return true; } // Grandfathered characters (Other_ID_Start + Other_ID_Continue) [Unicode 8.0]. switch (c) { case '\u00B7': case '\u0387': case '\u1369': case '\u136A': case '\u136B': case '\u136C': case '\u136D': case '\u136E': case '\u136F': case '\u1370': case '\u1371': case '\u19DA': case '\u2118': case '\u212E': case '\u309B': case '\u309C': return true; default: return false; } } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * BinaryDigit :: one of * 0 1 * </pre> * * @param c * the character to test * @return {@code true} if the character is a binary digit */ public static boolean isBinaryDigit(int c) { return (c == '0' || c == '1'); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * OctalDigit :: one of * 0 1 2 3 4 5 6 7 * </pre> * * @param c * the character to test * @return {@code true} if the character is an octal digit */ public static boolean isOctalDigit(int c) { return ('0' <= c && c <= '7'); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * DecimalDigit :: one of * 0 1 2 3 4 5 6 7 8 9 * </pre> * * @param c * the character to test * @return {@code true} if the character is decimal digit */ public static boolean isDecimalDigit(int c) { return ('0' <= c && c <= '9'); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * HexDigit :: one of * 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F * </pre> * * @param c * the character to test * @return {@code true} if the character is a hexadecimal digit */ public static boolean isHexDigit(int c) { return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f'); } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * DecimalDigit :: one of * 0 1 2 3 4 5 6 7 8 9 * </pre> * * @param c * the character to convert * @return the converted integer or {@code -1} if not a valid decimal-digit */ public static int digit(int c) { if ('0' <= c && c <= '9') { return (c - '0'); } return -1; } /** * <strong>[11.8.3] Numeric Literals</strong> * * <pre> * HexDigit :: one of * 0 1 2 3 4 5 6 7 8 9 a b c d e f A B C D E F * </pre> * * @param c * the character to convert * @return the converted integer or {@code -1} if not a valid hex-digit */ public static int hexDigit(int c) { if ('0' <= c && c <= '9') { return (c - '0'); } else if ('A' <= c && c <= 'F') { return (c - ('A' - 10)); } else if ('a' <= c && c <= 'f') { return (c - ('a' - 10)); } return -1; } /** * Tests for ASCII alphabetical letter, i.e. a character in the ranges {@code A-Z} and {@code a-z} . * * @param c * the character to test * @return {@code true} if the character is an ASCII alphabetical letter */ public static boolean isASCIIAlpha(int c) { return 'a' <= (c | 0x20) && (c | 0x20) <= 'z'; } /** * Tests for ASCII alphanumeric letter including underscore, i.e. a character in the ranges {@code 0-9}, {@code A-Z} * and {@code a-z} plus the single letter {@code _}. * * @param c * the character to test * @return {@code true} if the character is an ASCII alphanumeric letter or underscore */ public static boolean isASCIIAlphaNumericUnderscore(int c) { return ('0' <= c && c <= '9') || ('a' <= (c | 0x20) && (c | 0x20) <= 'z') || c == '_'; } }