/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package java.lang; import java.io.Serializable; import java.util.Arrays; /** * The wrapper for the primitive type {@code char}. This class also provides a * number of utility methods for working with characters. * * <p>Character data is kept up to date as Unicode evolves. * See the <a href="../util/Locale.html#locale_data">Locale data</a> section of * the {@code Locale} documentation for details of the Unicode versions implemented by current * and historical Android releases. * * <p>The Unicode specification, character tables, and other information are available at * <a href="http://www.unicode.org/">http://www.unicode.org/</a>. * * <p>Unicode characters are referred to as <i>code points</i>. The range of valid * code points is U+0000 to U+10FFFF. The <i>Basic Multilingual Plane (BMP)</i> * is the code point range U+0000 to U+FFFF. Characters above the BMP are * referred to as <i>Supplementary Characters</i>. On the Java platform, UTF-16 * encoding and {@code char} pairs are used to represent code points in the * supplementary range. A pair of {@code char} values that represent a * supplementary character are made up of a <i>high surrogate</i> with a value * range of 0xD800 to 0xDBFF and a <i>low surrogate</i> with a value range of * 0xDC00 to 0xDFFF. * <p> * On the Java platform a {@code char} value represents either a single BMP code * point or a UTF-16 unit that's part of a surrogate pair. The {@code int} type * is used to represent all Unicode code points. * * <a name="unicode_categories"></a><h3>Unicode categories</h3> * <p>Here's a list of the Unicode character categories and the corresponding Java constant, * grouped semantically to provide a convenient overview. This table is also useful in * conjunction with {@code \p} and {@code \P} in {@link java.util.regex.Pattern regular expressions}. * <span class="datatable"> * <style type="text/css"> * .datatable td { padding-right: 20px; } * </style> * <p><table> * <tr> <td> Cn </td> <td> Unassigned </td> <td>{@link #UNASSIGNED}</td> </tr> * <tr> <td> Cc </td> <td> Control </td> <td>{@link #CONTROL}</td> </tr> * <tr> <td> Cf </td> <td> Format </td> <td>{@link #FORMAT}</td> </tr> * <tr> <td> Co </td> <td> Private use </td> <td>{@link #PRIVATE_USE}</td> </tr> * <tr> <td> Cs </td> <td> Surrogate </td> <td>{@link #SURROGATE}</td> </tr> * <tr> <td><br></td> </tr> * <tr> <td> Lu </td> <td> Uppercase letter </td> <td>{@link #UPPERCASE_LETTER}</td> </tr> * <tr> <td> Ll </td> <td> Lowercase letter </td> <td>{@link #LOWERCASE_LETTER}</td> </tr> * <tr> <td> Lt </td> <td> Titlecase letter </td> <td>{@link #TITLECASE_LETTER}</td> </tr> * <tr> <td> Lm </td> <td> Modifier letter </td> <td>{@link #MODIFIER_LETTER}</td> </tr> * <tr> <td> Lo </td> <td> Other letter </td> <td>{@link #OTHER_LETTER}</td> </tr> * <tr> <td><br></td> </tr> * <tr> <td> Mn </td> <td> Non-spacing mark </td> <td>{@link #NON_SPACING_MARK}</td> </tr> * <tr> <td> Me </td> <td> Enclosing mark </td> <td>{@link #ENCLOSING_MARK}</td> </tr> * <tr> <td> Mc </td> <td> Combining spacing mark </td> <td>{@link #COMBINING_SPACING_MARK}</td> </tr> * <tr> <td><br></td> </tr> * <tr> <td> Nd </td> <td> Decimal digit number </td> <td>{@link #DECIMAL_DIGIT_NUMBER}</td> </tr> * <tr> <td> Nl </td> <td> Letter number </td> <td>{@link #LETTER_NUMBER}</td> </tr> * <tr> <td> No </td> <td> Other number </td> <td>{@link #OTHER_NUMBER}</td> </tr> * <tr> <td><br></td> </tr> * <tr> <td> Pd </td> <td> Dash punctuation </td> <td>{@link #DASH_PUNCTUATION}</td> </tr> * <tr> <td> Ps </td> <td> Start punctuation </td> <td>{@link #START_PUNCTUATION}</td> </tr> * <tr> <td> Pe </td> <td> End punctuation </td> <td>{@link #END_PUNCTUATION}</td> </tr> * <tr> <td> Pc </td> <td> Connector punctuation </td> <td>{@link #CONNECTOR_PUNCTUATION}</td> </tr> * <tr> <td> Pi </td> <td> Initial quote punctuation </td> <td>{@link #INITIAL_QUOTE_PUNCTUATION}</td> </tr> * <tr> <td> Pf </td> <td> Final quote punctuation </td> <td>{@link #FINAL_QUOTE_PUNCTUATION}</td> </tr> * <tr> <td> Po </td> <td> Other punctuation </td> <td>{@link #OTHER_PUNCTUATION}</td> </tr> * <tr> <td><br></td> </tr> * <tr> <td> Sm </td> <td> Math symbol </td> <td>{@link #MATH_SYMBOL}</td> </tr> * <tr> <td> Sc </td> <td> Currency symbol </td> <td>{@link #CURRENCY_SYMBOL}</td> </tr> * <tr> <td> Sk </td> <td> Modifier symbol </td> <td>{@link #MODIFIER_SYMBOL}</td> </tr> * <tr> <td> So </td> <td> Other symbol </td> <td>{@link #OTHER_SYMBOL}</td> </tr> * <tr> <td><br></td> </tr> * <tr> <td> Zs </td> <td> Space separator </td> <td>{@link #SPACE_SEPARATOR}</td> </tr> * <tr> <td> Zl </td> <td> Line separator </td> <td>{@link #LINE_SEPARATOR}</td> </tr> * <tr> <td> Zp </td> <td> Paragraph separator </td> <td>{@link #PARAGRAPH_SEPARATOR}</td> </tr> * </table> * </span> * * @since 1.0 */ @FindBugsSuppressWarnings("DM_NUMBER_CTOR") public final class Character implements Serializable, Comparable<Character> { private static final long serialVersionUID = 3786198910865385080L; private final char value; /** * The minimum {@code Character} value. */ public static final char MIN_VALUE = '\u0000'; /** * The maximum {@code Character} value. */ public static final char MAX_VALUE = '\uffff'; /** * The minimum radix used for conversions between characters and integers. */ public static final int MIN_RADIX = 2; /** * The maximum radix used for conversions between characters and integers. */ public static final int MAX_RADIX = 36; /** * The {@link Class} object that represents the primitive type {@code char}. */ @SuppressWarnings("unchecked") public static final Class<Character> TYPE = (Class<Character>) char[].class.getComponentType(); // Note: Character.TYPE can't be set to "char.class", since *that* is // defined to be "java.lang.Character.TYPE"; /** * Unicode category constant Cn. */ public static final byte UNASSIGNED = 0; /** * Unicode category constant Lu. */ public static final byte UPPERCASE_LETTER = 1; /** * Unicode category constant Ll. */ public static final byte LOWERCASE_LETTER = 2; /** * Unicode category constant Lt. */ public static final byte TITLECASE_LETTER = 3; /** * Unicode category constant Lm. */ public static final byte MODIFIER_LETTER = 4; /** * Unicode category constant Lo. */ public static final byte OTHER_LETTER = 5; /** * Unicode category constant Mn. */ public static final byte NON_SPACING_MARK = 6; /** * Unicode category constant Me. */ public static final byte ENCLOSING_MARK = 7; /** * Unicode category constant Mc. */ public static final byte COMBINING_SPACING_MARK = 8; /** * Unicode category constant Nd. */ public static final byte DECIMAL_DIGIT_NUMBER = 9; /** * Unicode category constant Nl. */ public static final byte LETTER_NUMBER = 10; /** * Unicode category constant No. */ public static final byte OTHER_NUMBER = 11; /** * Unicode category constant Zs. */ public static final byte SPACE_SEPARATOR = 12; /** * Unicode category constant Zl. */ public static final byte LINE_SEPARATOR = 13; /** * Unicode category constant Zp. */ public static final byte PARAGRAPH_SEPARATOR = 14; /** * Unicode category constant Cc. */ public static final byte CONTROL = 15; /** * Unicode category constant Cf. */ public static final byte FORMAT = 16; /** * Unicode category constant Co. */ public static final byte PRIVATE_USE = 18; /** * Unicode category constant Cs. */ public static final byte SURROGATE = 19; /** * Unicode category constant Pd. */ public static final byte DASH_PUNCTUATION = 20; /** * Unicode category constant Ps. */ public static final byte START_PUNCTUATION = 21; /** * Unicode category constant Pe. */ public static final byte END_PUNCTUATION = 22; /** * Unicode category constant Pc. */ public static final byte CONNECTOR_PUNCTUATION = 23; /** * Unicode category constant Po. */ public static final byte OTHER_PUNCTUATION = 24; /** * Unicode category constant Sm. */ public static final byte MATH_SYMBOL = 25; /** * Unicode category constant Sc. */ public static final byte CURRENCY_SYMBOL = 26; /** * Unicode category constant Sk. */ public static final byte MODIFIER_SYMBOL = 27; /** * Unicode category constant So. */ public static final byte OTHER_SYMBOL = 28; /** * Unicode category constant Pi. * * @since 1.4 */ public static final byte INITIAL_QUOTE_PUNCTUATION = 29; /** * Unicode category constant Pf. * * @since 1.4 */ public static final byte FINAL_QUOTE_PUNCTUATION = 30; /** * Unicode bidirectional constant. * * @since 1.4 */ public static final byte DIRECTIONALITY_UNDEFINED = -1; /** * Unicode bidirectional constant L. * * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; /** * Unicode bidirectional constant R. * * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; /** * Unicode bidirectional constant AL. * * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; /** * Unicode bidirectional constant EN. * * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; /** * Unicode bidirectional constant ES. * * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; /** * Unicode bidirectional constant ET. * * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; /** * Unicode bidirectional constant AN. * * @since 1.4 */ public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; /** * Unicode bidirectional constant CS. * * @since 1.4 */ public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; /** * Unicode bidirectional constant NSM. * * @since 1.4 */ public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; /** * Unicode bidirectional constant BN. * * @since 1.4 */ public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; /** * Unicode bidirectional constant B. * * @since 1.4 */ public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; /** * Unicode bidirectional constant S. * * @since 1.4 */ public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; /** * Unicode bidirectional constant WS. * * @since 1.4 */ public static final byte DIRECTIONALITY_WHITESPACE = 12; /** * Unicode bidirectional constant ON. * * @since 1.4 */ public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; /** * Unicode bidirectional constant LRE. * * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; /** * Unicode bidirectional constant LRO. * * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; /** * Unicode bidirectional constant RLE. * * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; /** * Unicode bidirectional constant RLO. * * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; /** * Unicode bidirectional constant PDF. * * @since 1.4 */ public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; /** * The minimum value of a high surrogate or leading surrogate unit in UTF-16 * encoding, {@code '\uD800'}. * * @since 1.5 */ public static final char MIN_HIGH_SURROGATE = '\uD800'; /** * The maximum value of a high surrogate or leading surrogate unit in UTF-16 * encoding, {@code '\uDBFF'}. * * @since 1.5 */ public static final char MAX_HIGH_SURROGATE = '\uDBFF'; /** * The minimum value of a low surrogate or trailing surrogate unit in UTF-16 * encoding, {@code '\uDC00'}. * * @since 1.5 */ public static final char MIN_LOW_SURROGATE = '\uDC00'; /** * The maximum value of a low surrogate or trailing surrogate unit in UTF-16 * encoding, {@code '\uDFFF'}. * * @since 1.5 */ public static final char MAX_LOW_SURROGATE = '\uDFFF'; /** * The minimum value of a surrogate unit in UTF-16 encoding, {@code '\uD800'}. * * @since 1.5 */ public static final char MIN_SURROGATE = '\uD800'; /** * The maximum value of a surrogate unit in UTF-16 encoding, {@code '\uDFFF'}. * * @since 1.5 */ public static final char MAX_SURROGATE = '\uDFFF'; /** * The minimum value of a supplementary code point, {@code U+010000}. * * @since 1.5 */ public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x10000; /** * The minimum code point value, {@code U+0000}. * * @since 1.5 */ public static final int MIN_CODE_POINT = 0x000000; /** * The maximum code point value, {@code U+10FFFF}. * * @since 1.5 */ public static final int MAX_CODE_POINT = 0x10FFFF; /** * The number of bits required to represent a {@code Character} value * unsigned form. * * @since 1.5 */ public static final int SIZE = 16; private static final byte[] DIRECTIONALITY = new byte[] { DIRECTIONALITY_LEFT_TO_RIGHT, DIRECTIONALITY_RIGHT_TO_LEFT, DIRECTIONALITY_EUROPEAN_NUMBER, DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR, DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR, DIRECTIONALITY_ARABIC_NUMBER, DIRECTIONALITY_COMMON_NUMBER_SEPARATOR, DIRECTIONALITY_PARAGRAPH_SEPARATOR, DIRECTIONALITY_SEGMENT_SEPARATOR, DIRECTIONALITY_WHITESPACE, DIRECTIONALITY_OTHER_NEUTRALS, DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING, DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE, DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC, DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING, DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE, DIRECTIONALITY_POP_DIRECTIONAL_FORMAT, DIRECTIONALITY_NONSPACING_MARK, DIRECTIONALITY_BOUNDARY_NEUTRAL }; /* * Represents a subset of the Unicode character set. */ public static class Subset { private final String name; /** * Constructs a new {@code Subset}. */ protected Subset(String name) { if (name == null) { throw new NullPointerException("name == null"); } this.name = name; } /** * Compares this character subset for identity with the specified object. */ @Override public final boolean equals(Object object) { return object == this; } /** * Returns this subset's hash code, which is the hash code computed by * {@link java.lang.Object#hashCode()}. */ @Override public final int hashCode() { return super.hashCode(); } /** * Returns this subset's name. */ @Override public final String toString() { return name; } } /** * Represents a block of Unicode characters. This class provides constants for various * well-known blocks (but not all blocks) and methods for looking up a block * by name {@link #forName} or by code point {@link #of}. * * @since 1.2 */ public static final class UnicodeBlock extends Subset { /** * The Surrogates Area Unicode block. * * @deprecated As of Java 5, this block has been replaced by * {@link #HIGH_SURROGATES}, * {@link #HIGH_PRIVATE_USE_SURROGATES} and * {@link #LOW_SURROGATES}. */ @Deprecated public static final UnicodeBlock SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"); /** The Basic Latin Unicode block. */ public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"); /** The Latin-1 Supplement Unicode block. */ public static final UnicodeBlock LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"); /** The Latin Extended-A Unicode block. */ public static final UnicodeBlock LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"); /** The Latin Extended-B Unicode block. */ public static final UnicodeBlock LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"); /** The IPA Extensions Unicode block. */ public static final UnicodeBlock IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"); /** The Spacing Modifier Letters Unicode block. */ public static final UnicodeBlock SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"); /** The Combining Diacritical Marks Unicode block. */ public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"); /** * The Greek and Coptic Unicode block. Previously referred to as Greek. */ public static final UnicodeBlock GREEK = new UnicodeBlock("GREEK"); /** The Cyrillic Unicode block. */ public static final UnicodeBlock CYRILLIC = new UnicodeBlock("CYRILLIC"); /** * The Cyrillic Supplement Unicode block. Previously referred to as Cyrillic Supplementary. */ public static final UnicodeBlock CYRILLIC_SUPPLEMENTARY = new UnicodeBlock("CYRILLIC_SUPPLEMENTARY"); /** The Armenian Unicode block. */ public static final UnicodeBlock ARMENIAN = new UnicodeBlock("ARMENIAN"); /** The Hebrew Unicode block. */ public static final UnicodeBlock HEBREW = new UnicodeBlock("HEBREW"); /** The Arabic Unicode block. */ public static final UnicodeBlock ARABIC = new UnicodeBlock("ARABIC"); /** The Syriac Unicode block. */ public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"); /** The Thaana Unicode block. */ public static final UnicodeBlock THAANA = new UnicodeBlock("THAANA"); /** The Devanagari Unicode block. */ public static final UnicodeBlock DEVANAGARI = new UnicodeBlock("DEVANAGARI"); /** The Bengali Unicode block. */ public static final UnicodeBlock BENGALI = new UnicodeBlock("BENGALI"); /** The Gurmukhi Unicode block. */ public static final UnicodeBlock GURMUKHI = new UnicodeBlock("GURMUKHI"); /** The Gujarati Unicode block. */ public static final UnicodeBlock GUJARATI = new UnicodeBlock("GUJARATI"); /** The Oriya Unicode block. */ public static final UnicodeBlock ORIYA = new UnicodeBlock("ORIYA"); /** The Tamil Unicode block. */ public static final UnicodeBlock TAMIL = new UnicodeBlock("TAMIL"); /** The Telugu Unicode block. */ public static final UnicodeBlock TELUGU = new UnicodeBlock("TELUGU"); /** The Kannada Unicode block. */ public static final UnicodeBlock KANNADA = new UnicodeBlock("KANNADA"); /** The Malayalam Unicode block. */ public static final UnicodeBlock MALAYALAM = new UnicodeBlock("MALAYALAM"); /** The Sinhala Unicode block. */ public static final UnicodeBlock SINHALA = new UnicodeBlock("SINHALA"); /** The Thai Unicode block. */ public static final UnicodeBlock THAI = new UnicodeBlock("THAI"); /** The Lao Unicode block. */ public static final UnicodeBlock LAO = new UnicodeBlock("LAO"); /** The Tibetan Unicode block. */ public static final UnicodeBlock TIBETAN = new UnicodeBlock("TIBETAN"); /** The Myanmar Unicode block. */ public static final UnicodeBlock MYANMAR = new UnicodeBlock("MYANMAR"); /** The Georgian Unicode block. */ public static final UnicodeBlock GEORGIAN = new UnicodeBlock("GEORGIAN"); /** The Hangul Jamo Unicode block. */ public static final UnicodeBlock HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"); /** The Ethiopic Unicode block. */ public static final UnicodeBlock ETHIOPIC = new UnicodeBlock("ETHIOPIC"); /** The Cherokee Unicode block. */ public static final UnicodeBlock CHEROKEE = new UnicodeBlock("CHEROKEE"); /** The Unified Canadian Aboriginal Syllabics Unicode block. */ public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"); /** The Ogham Unicode block. */ public static final UnicodeBlock OGHAM = new UnicodeBlock("OGHAM"); /** The Runic Unicode block. */ public static final UnicodeBlock RUNIC = new UnicodeBlock("RUNIC"); /** The Tagalog Unicode block. */ public static final UnicodeBlock TAGALOG = new UnicodeBlock("TAGALOG"); /** The Hanunoo Unicode block. */ public static final UnicodeBlock HANUNOO = new UnicodeBlock("HANUNOO"); /** The Buhid Unicode block. */ public static final UnicodeBlock BUHID = new UnicodeBlock("BUHID"); /** The Tagbanwa Unicode block. */ public static final UnicodeBlock TAGBANWA = new UnicodeBlock("TAGBANWA"); /** The Khmer Unicode block. */ public static final UnicodeBlock KHMER = new UnicodeBlock("KHMER"); /** The Mongolian Unicode block. */ public static final UnicodeBlock MONGOLIAN = new UnicodeBlock("MONGOLIAN"); /** The Limbu Unicode block. */ public static final UnicodeBlock LIMBU = new UnicodeBlock("LIMBU"); /** The Tai Le Unicode block. */ public static final UnicodeBlock TAI_LE = new UnicodeBlock("TAI_LE"); /** The Khmer Symbols Unicode block. */ public static final UnicodeBlock KHMER_SYMBOLS = new UnicodeBlock("KHMER_SYMBOLS"); /** The Phonetic Extensions Unicode block. */ public static final UnicodeBlock PHONETIC_EXTENSIONS = new UnicodeBlock("PHONETIC_EXTENSIONS"); /** The Latin Extended Additional Unicode block. */ public static final UnicodeBlock LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"); /** The Greek Extended Unicode block. */ public static final UnicodeBlock GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"); /** The General Punctuation Unicode block. */ public static final UnicodeBlock GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"); /** The Superscripts and Subscripts Unicode block. */ public static final UnicodeBlock SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"); /** The Currency Symbols Unicode block. */ public static final UnicodeBlock CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"); /** * The Combining Diacritical Marks for Symbols Unicode * Block. Previously referred to as Combining Marks for * Symbols. */ public static final UnicodeBlock COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"); /** The Letterlike Symbols Unicode block. */ public static final UnicodeBlock LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"); /** The Number Forms Unicode block. */ public static final UnicodeBlock NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"); /** The Arrows Unicode block. */ public static final UnicodeBlock ARROWS = new UnicodeBlock("ARROWS"); /** The Mathematical Operators Unicode block. */ public static final UnicodeBlock MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"); /** The Miscellaneous Technical Unicode block. */ public static final UnicodeBlock MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"); /** The Control Pictures Unicode block. */ public static final UnicodeBlock CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"); /** The Optical Character Recognition Unicode block. */ public static final UnicodeBlock OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"); /** The Enclosed Alphanumerics Unicode block. */ public static final UnicodeBlock ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"); /** The Box Drawing Unicode block. */ public static final UnicodeBlock BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"); /** The Block Elements Unicode block. */ public static final UnicodeBlock BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"); /** The Geometric Shapes Unicode block. */ public static final UnicodeBlock GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"); /** The Miscellaneous Symbols Unicode block. */ public static final UnicodeBlock MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"); /** The Dingbats Unicode block. */ public static final UnicodeBlock DINGBATS = new UnicodeBlock("DINGBATS"); /** The Miscellaneous Mathematical Symbols-A Unicode block. */ public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A"); /** The Supplemental Arrows-A Unicode block. */ public static final UnicodeBlock SUPPLEMENTAL_ARROWS_A = new UnicodeBlock("SUPPLEMENTAL_ARROWS_A"); /** The Braille Patterns Unicode block. */ public static final UnicodeBlock BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"); /** The Supplemental Arrows-B Unicode block. */ public static final UnicodeBlock SUPPLEMENTAL_ARROWS_B = new UnicodeBlock("SUPPLEMENTAL_ARROWS_B"); /** The Miscellaneous Mathematical Symbols-B Unicode block. */ public static final UnicodeBlock MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = new UnicodeBlock("MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B"); /** The Supplemental Mathematical Operators Unicode block. */ public static final UnicodeBlock SUPPLEMENTAL_MATHEMATICAL_OPERATORS = new UnicodeBlock("SUPPLEMENTAL_MATHEMATICAL_OPERATORS"); /** The Miscellaneous Symbols and Arrows Unicode block. */ public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_ARROWS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_ARROWS"); /** The CJK Radicals Supplement Unicode block. */ public static final UnicodeBlock CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"); /** The Kangxi Radicals Unicode block. */ public static final UnicodeBlock KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"); /** The Ideographic Description Characters Unicode block. */ public static final UnicodeBlock IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"); /** The CJK Symbols and Punctuation Unicode block. */ public static final UnicodeBlock CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"); /** The Hiragana Unicode block. */ public static final UnicodeBlock HIRAGANA = new UnicodeBlock("HIRAGANA"); /** The Katakana Unicode block. */ public static final UnicodeBlock KATAKANA = new UnicodeBlock("KATAKANA"); /** The Bopomofo Unicode block. */ public static final UnicodeBlock BOPOMOFO = new UnicodeBlock("BOPOMOFO"); /** The Hangul Compatibility Jamo Unicode block. */ public static final UnicodeBlock HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"); /** The Kanbun Unicode block. */ public static final UnicodeBlock KANBUN = new UnicodeBlock("KANBUN"); /** The Bopomofo Extended Unicode block. */ public static final UnicodeBlock BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"); /** The Katakana Phonetic Extensions Unicode block. */ public static final UnicodeBlock KATAKANA_PHONETIC_EXTENSIONS = new UnicodeBlock("KATAKANA_PHONETIC_EXTENSIONS"); /** The Enclosed CJK Letters and Months Unicode block. */ public static final UnicodeBlock ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"); /** The CJK Compatibility Unicode block. */ public static final UnicodeBlock CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"); /** The CJK Unified Ideographs Extension A Unicode block. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"); /** The Yijing Hexagram Symbols Unicode block. */ public static final UnicodeBlock YIJING_HEXAGRAM_SYMBOLS = new UnicodeBlock("YIJING_HEXAGRAM_SYMBOLS"); /** The CJK Unified Ideographs Unicode block. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"); /** The Yi Syllables Unicode block. */ public static final UnicodeBlock YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"); /** The Yi Radicals Unicode block. */ public static final UnicodeBlock YI_RADICALS = new UnicodeBlock("YI_RADICALS"); /** The Hangul Syllables Unicode block. */ public static final UnicodeBlock HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"); /** * The High Surrogates Unicode block. This block represents * code point values in the high surrogate range 0xD800 to 0xDB7F */ public static final UnicodeBlock HIGH_SURROGATES = new UnicodeBlock("HIGH_SURROGATES"); /** * The High Private Use Surrogates Unicode block. This block * represents code point values in the high surrogate range 0xDB80 to * 0xDBFF */ public static final UnicodeBlock HIGH_PRIVATE_USE_SURROGATES = new UnicodeBlock("HIGH_PRIVATE_USE_SURROGATES"); /** * The Low Surrogates Unicode block. This block represents * code point values in the low surrogate range 0xDC00 to 0xDFFF */ public static final UnicodeBlock LOW_SURROGATES = new UnicodeBlock("LOW_SURROGATES"); /** The Private Use Area Unicode block. */ public static final UnicodeBlock PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"); /** The CJK Compatibility Ideographs Unicode block. */ public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"); /** The Alphabetic Presentation Forms Unicode block. */ public static final UnicodeBlock ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"); /** The Arabic Presentation Forms-A Unicode block. */ public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"); /** The Variation Selectors Unicode block. */ public static final UnicodeBlock VARIATION_SELECTORS = new UnicodeBlock("VARIATION_SELECTORS"); /** The Combining Half Marks Unicode block. */ public static final UnicodeBlock COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"); /** The CJK Compatibility Forms Unicode block. */ public static final UnicodeBlock CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"); /** The Small Form Variants Unicode block. */ public static final UnicodeBlock SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"); /** The Arabic Presentation Forms-B Unicode block. */ public static final UnicodeBlock ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"); /** The Halfwidth and Fullwidth Forms Unicode block. */ public static final UnicodeBlock HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"); /** The Specials Unicode block. */ public static final UnicodeBlock SPECIALS = new UnicodeBlock("SPECIALS"); /** The Linear B Syllabary Unicode block. */ public static final UnicodeBlock LINEAR_B_SYLLABARY = new UnicodeBlock("LINEAR_B_SYLLABARY"); /** The Linear B Ideograms Unicode block. */ public static final UnicodeBlock LINEAR_B_IDEOGRAMS = new UnicodeBlock("LINEAR_B_IDEOGRAMS"); /** The Aegean Numbers Unicode block. */ public static final UnicodeBlock AEGEAN_NUMBERS = new UnicodeBlock("AEGEAN_NUMBERS"); /** The Old Italic Unicode block. */ public static final UnicodeBlock OLD_ITALIC = new UnicodeBlock("OLD_ITALIC"); /** The Gothic Unicode block. */ public static final UnicodeBlock GOTHIC = new UnicodeBlock("GOTHIC"); /** The Ugaritic Unicode block. */ public static final UnicodeBlock UGARITIC = new UnicodeBlock("UGARITIC"); /** The Deseret Unicode block. */ public static final UnicodeBlock DESERET = new UnicodeBlock("DESERET"); /** The Shavian Unicode block. */ public static final UnicodeBlock SHAVIAN = new UnicodeBlock("SHAVIAN"); /** The Osmanya Unicode block. */ public static final UnicodeBlock OSMANYA = new UnicodeBlock("OSMANYA"); /** The Cypriot Syllabary Unicode block. */ public static final UnicodeBlock CYPRIOT_SYLLABARY = new UnicodeBlock("CYPRIOT_SYLLABARY"); /** The Byzantine Musical Symbols Unicode block. */ public static final UnicodeBlock BYZANTINE_MUSICAL_SYMBOLS = new UnicodeBlock("BYZANTINE_MUSICAL_SYMBOLS"); /** The Musical Symbols Unicode block. */ public static final UnicodeBlock MUSICAL_SYMBOLS = new UnicodeBlock("MUSICAL_SYMBOLS"); /** The Tai Xuan Jing Symbols Unicode block. */ public static final UnicodeBlock TAI_XUAN_JING_SYMBOLS = new UnicodeBlock("TAI_XUAN_JING_SYMBOLS"); /** The Mathematical Alphanumeric Symbols Unicode block. */ public static final UnicodeBlock MATHEMATICAL_ALPHANUMERIC_SYMBOLS = new UnicodeBlock("MATHEMATICAL_ALPHANUMERIC_SYMBOLS"); /** The CJK Unified Ideographs Extension B Unicode block. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B"); /** The CJK Compatibility Ideographs Supplement Unicode block. */ public static final UnicodeBlock CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT"); /** The Tags Unicode block. */ public static final UnicodeBlock TAGS = new UnicodeBlock("TAGS"); /** The Variation Selectors Supplement Unicode block. */ public static final UnicodeBlock VARIATION_SELECTORS_SUPPLEMENT = new UnicodeBlock("VARIATION_SELECTORS_SUPPLEMENT"); /** The Supplementary Private Use Area-A Unicode block. */ public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_A = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_A"); /** The Supplementary Private Use Area-B Unicode block. */ public static final UnicodeBlock SUPPLEMENTARY_PRIVATE_USE_AREA_B = new UnicodeBlock("SUPPLEMENTARY_PRIVATE_USE_AREA_B"); // Unicode 4.1. /** The Ancient Greek Musical Notation Unicode 4.1 block. */ public static final UnicodeBlock ANCIENT_GREEK_MUSICAL_NOTATION = new UnicodeBlock("ANCIENT_GREEK_MUSICAL_NOTATION"); /** The Ancient Greek Numbers Unicode 4.1 block. */ public static final UnicodeBlock ANCIENT_GREEK_NUMBERS = new UnicodeBlock("ANCIENT_GREEK_NUMBERS"); /** The Arabic Supplement Unicode 4.1 block. */ public static final UnicodeBlock ARABIC_SUPPLEMENT = new UnicodeBlock("ARABIC_SUPPLEMENT"); /** The Buginese Unicode 4.1 block. */ public static final UnicodeBlock BUGINESE = new UnicodeBlock("BUGINESE"); /** The CJK Strokes Unicode 4.1 block. */ public static final UnicodeBlock CJK_STROKES = new UnicodeBlock("CJK_STROKES"); /** The Combining Diacritical Marks Supplement Unicode 4.1 block. */ public static final UnicodeBlock COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS_SUPPLEMENT"); /** The Coptic Unicode 4.1 block. */ public static final UnicodeBlock COPTIC = new UnicodeBlock("COPTIC"); /** The Ethiopic Extended Unicode 4.1 block. */ public static final UnicodeBlock ETHIOPIC_EXTENDED = new UnicodeBlock("ETHIOPIC_EXTENDED"); /** The Ethiopic Supplement Unicode 4.1 block. */ public static final UnicodeBlock ETHIOPIC_SUPPLEMENT = new UnicodeBlock("ETHIOPIC_SUPPLEMENT"); /** The Georgian Supplement Unicode 4.1 block. */ public static final UnicodeBlock GEORGIAN_SUPPLEMENT = new UnicodeBlock("GEORGIAN_SUPPLEMENT"); /** The Glagolitic Unicode 4.1 block. */ public static final UnicodeBlock GLAGOLITIC = new UnicodeBlock("GLAGOLITIC"); /** The Kharoshthi Unicode 4.1 block. */ public static final UnicodeBlock KHAROSHTHI = new UnicodeBlock("KHAROSHTHI"); /** The Modifier Tone Letters Unicode 4.1 block. */ public static final UnicodeBlock MODIFIER_TONE_LETTERS = new UnicodeBlock("MODIFIER_TONE_LETTERS"); /** The New Tai Lue Unicode 4.1 block. */ public static final UnicodeBlock NEW_TAI_LUE = new UnicodeBlock("NEW_TAI_LUE"); /** The Old Persian Unicode 4.1 block. */ public static final UnicodeBlock OLD_PERSIAN = new UnicodeBlock("OLD_PERSIAN"); /** The Phonetic Extensions Supplement Unicode 4.1 block. */ public static final UnicodeBlock PHONETIC_EXTENSIONS_SUPPLEMENT = new UnicodeBlock("PHONETIC_EXTENSIONS_SUPPLEMENT"); /** The Supplemental Punctuation Unicode 4.1 block. */ public static final UnicodeBlock SUPPLEMENTAL_PUNCTUATION = new UnicodeBlock("SUPPLEMENTAL_PUNCTUATION"); /** The Syloti Nagri Unicode 4.1 block. */ public static final UnicodeBlock SYLOTI_NAGRI = new UnicodeBlock("SYLOTI_NAGRI"); /** The Tifinagh Unicode 4.1 block. */ public static final UnicodeBlock TIFINAGH = new UnicodeBlock("TIFINAGH"); /** The Vertical Forms Unicode 4.1 block. */ public static final UnicodeBlock VERTICAL_FORMS = new UnicodeBlock("VERTICAL_FORMS"); // Unicode 5.0. /** The NKo Unicode 5.0 block. */ public static final UnicodeBlock NKO = new UnicodeBlock("NKO"); /** The Balinese Unicode 5.0 block. */ public static final UnicodeBlock BALINESE = new UnicodeBlock("BALINESE"); /** The Latin Extended C Unicode 5.0 block. */ public static final UnicodeBlock LATIN_EXTENDED_C = new UnicodeBlock("LATIN_EXTENDED_C"); /** The Latin Extended D Unicode 5.0 block. */ public static final UnicodeBlock LATIN_EXTENDED_D = new UnicodeBlock("LATIN_EXTENDED_D"); /** The Phags-pa Unicode 5.0 block. */ public static final UnicodeBlock PHAGS_PA = new UnicodeBlock("PHAGS_PA"); /** The Phoenician Unicode 5.0 block. */ public static final UnicodeBlock PHOENICIAN = new UnicodeBlock("PHOENICIAN"); /** The Cuneiform Unicode 5.0 block. */ public static final UnicodeBlock CUNEIFORM = new UnicodeBlock("CUNEIFORM"); /** The Cuneiform Numbers And Punctuation Unicode 5.0 block. */ public static final UnicodeBlock CUNEIFORM_NUMBERS_AND_PUNCTUATION = new UnicodeBlock("CUNEIFORM_NUMBERS_AND_PUNCTUATION"); /** The Counting Rod Numerals Unicode 5.0 block. */ public static final UnicodeBlock COUNTING_ROD_NUMERALS = new UnicodeBlock("COUNTING_ROD_NUMERALS"); // Unicode 5.1. /** The Sudanese Unicode 5.1 block. */ public static final UnicodeBlock SUNDANESE = new UnicodeBlock("SUNDANESE"); /** The Lepcha Unicode 5.1 block. */ public static final UnicodeBlock LEPCHA = new UnicodeBlock("LEPCHA"); /** The Ol Chiki Unicode 5.1 block. */ public static final UnicodeBlock OL_CHIKI = new UnicodeBlock("OL_CHIKI"); /** The Cyrillic Extended-A Unicode 5.1 block. */ public static final UnicodeBlock CYRILLIC_EXTENDED_A = new UnicodeBlock("CYRILLIC_EXTENDED_A"); /** The Vai Unicode 5.1 block. */ public static final UnicodeBlock VAI = new UnicodeBlock("VAI"); /** The Cyrillic Extended-B Unicode 5.1 block. */ public static final UnicodeBlock CYRILLIC_EXTENDED_B = new UnicodeBlock("CYRILLIC_EXTENDED_B"); /** The Saurashtra Unicode 5.1 block. */ public static final UnicodeBlock SAURASHTRA = new UnicodeBlock("SAURASHTRA"); /** The Kayah Li Unicode 5.1 block. */ public static final UnicodeBlock KAYAH_LI = new UnicodeBlock("KAYAH_LI"); /** The Rejang Unicode 5.1 block. */ public static final UnicodeBlock REJANG = new UnicodeBlock("REJANG"); /** The Cham Unicode 5.1 block. */ public static final UnicodeBlock CHAM = new UnicodeBlock("CHAM"); /** The Ancient Symbols Unicode 5.1 block. */ public static final UnicodeBlock ANCIENT_SYMBOLS = new UnicodeBlock("ANCIENT_SYMBOLS"); /** The Phaistos Disc Unicode 5.1 block. */ public static final UnicodeBlock PHAISTOS_DISC = new UnicodeBlock("PHAISTOS_DISC"); /** The Lycian Unicode 5.1 block. */ public static final UnicodeBlock LYCIAN = new UnicodeBlock("LYCIAN"); /** The Carian Unicode 5.1 block. */ public static final UnicodeBlock CARIAN = new UnicodeBlock("CARIAN"); /** The Lydian Unicode 5.1 block. */ public static final UnicodeBlock LYDIAN = new UnicodeBlock("LYDIAN"); /** The Mahjong Tiles Unicode 5.1 block. */ public static final UnicodeBlock MAHJONG_TILES = new UnicodeBlock("MAHJONG_TILES"); /** The Domino Tiles Unicode 5.1 block. */ public static final UnicodeBlock DOMINO_TILES = new UnicodeBlock("DOMINO_TILES"); // Unicode 5.2. /** The Samaritan Unicode 5.2 block. */ public static final UnicodeBlock SAMARITAN = new UnicodeBlock("SAMARITAN"); /** The Unified Canadian Aboriginal Syllabics Expanded Unicode 5.2 block. */ public static final UnicodeBlock UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED"); /** The Tai Tham Unicode 5.2 block. */ public static final UnicodeBlock TAI_THAM = new UnicodeBlock("TAI_THAM"); /** The Vedic Extensions Unicode 5.2 block. */ public static final UnicodeBlock VEDIC_EXTENSIONS = new UnicodeBlock("VEDIC_EXTENSIONS"); /** The Lisu Extensions Unicode 5.2 block. */ public static final UnicodeBlock LISU = new UnicodeBlock("LISU"); /** The Bamum Extensions Unicode 5.2 block. */ public static final UnicodeBlock BAMUM = new UnicodeBlock("BAMUM"); /** The Common Indic Number Forms Unicode 5.2 block. */ public static final UnicodeBlock COMMON_INDIC_NUMBER_FORMS = new UnicodeBlock("COMMON_INDIC_NUMBER_FORMS"); /** The Devanagari Extended Unicode 5.2 block. */ public static final UnicodeBlock DEVANAGARI_EXTENDED = new UnicodeBlock("DEVANAGARI_EXTENDED"); /** The Hangul Jamo Extended-A Unicode 5.2 block. */ public static final UnicodeBlock HANGUL_JAMO_EXTENDED_A = new UnicodeBlock("HANGUL_JAMO_EXTENDED_A"); /** The Javanese Unicode 5.2 block. */ public static final UnicodeBlock JAVANESE = new UnicodeBlock("JAVANESE"); /** The Myanmar Extended-A Unicode 5.2 block. */ public static final UnicodeBlock MYANMAR_EXTENDED_A = new UnicodeBlock("MYANMAR_EXTENDED_A"); /** The Tai Viet Unicode 5.2 block. */ public static final UnicodeBlock TAI_VIET = new UnicodeBlock("TAI_VIET"); /** The Meetei Mayek Unicode 5.2 block. */ public static final UnicodeBlock MEETEI_MAYEK = new UnicodeBlock("MEETEI_MAYEK"); /** The Hangul Jamo Extended-B Unicode 5.2 block. */ public static final UnicodeBlock HANGUL_JAMO_EXTENDED_B = new UnicodeBlock("HANGUL_JAMO_EXTENDED_B"); /** The Imperial Aramaic Unicode 5.2 block. */ public static final UnicodeBlock IMPERIAL_ARAMAIC = new UnicodeBlock("IMPERIAL_ARAMAIC"); /** The Old South Arabian Unicode 5.2 block. */ public static final UnicodeBlock OLD_SOUTH_ARABIAN = new UnicodeBlock("OLD_SOUTH_ARABIAN"); /** The Avestan Unicode 5.2 block. */ public static final UnicodeBlock AVESTAN = new UnicodeBlock("AVESTAN"); /** The Inscriptional Pathian Unicode 5.2 block. */ public static final UnicodeBlock INSCRIPTIONAL_PARTHIAN = new UnicodeBlock("INSCRIPTIONAL_PARTHIAN"); /** The Inscriptional Pahlavi Unicode 5.2 block. */ public static final UnicodeBlock INSCRIPTIONAL_PAHLAVI = new UnicodeBlock("INSCRIPTIONAL_PAHLAVI"); /** The Old Turkic Unicode 5.2 block. */ public static final UnicodeBlock OLD_TURKIC = new UnicodeBlock("OLD_TURKIC"); /** The Rumi Numeral Symbols Unicode 5.2 block. */ public static final UnicodeBlock RUMI_NUMERAL_SYMBOLS = new UnicodeBlock("RUMI_NUMERAL_SYMBOLS"); /** The Kaithi Unicode 5.2 block. */ public static final UnicodeBlock KAITHI = new UnicodeBlock("KAITHI"); /** The Egyptian Hieroglyphs Unicode 5.2 block. */ public static final UnicodeBlock EGYPTIAN_HIEROGLYPHS = new UnicodeBlock("EGYPTIAN_HIEROGLYPHS"); /** The Enclosed Alphanumeric Supplement Unicode 5.2 block. */ public static final UnicodeBlock ENCLOSED_ALPHANUMERIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_ALPHANUMERIC_SUPPLEMENT"); /** The Enclosed Ideographic Supplement Unicode 5.2 block. */ public static final UnicodeBlock ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = new UnicodeBlock("ENCLOSED_IDEOGRAPHIC_SUPPLEMENT"); /** The CJK Unified Ideographs Unicode 5.2 block. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C"); // Unicode 6.0. /** The Mandaic Unicode 6.0 block. */ public static final UnicodeBlock MANDAIC = new UnicodeBlock("MANDAIC"); /** The Batak Unicode 6.0 block. */ public static final UnicodeBlock BATAK = new UnicodeBlock("BATAK"); /** The Ethiopic Extended-A Unicode 6.0 block. */ public static final UnicodeBlock ETHIOPIC_EXTENDED_A = new UnicodeBlock("ETHIOPIC_EXTENDED_A"); /** The Brahmi Unicode 6.0 block. */ public static final UnicodeBlock BRAHMI = new UnicodeBlock("BRAHMI"); /** The Bamum Supplement Unicode 6.0 block. */ public static final UnicodeBlock BAMUM_SUPPLEMENT = new UnicodeBlock("BAMUM_SUPPLEMENT"); /** The Kana Supplement Unicode 6.0 block. */ public static final UnicodeBlock KANA_SUPPLEMENT = new UnicodeBlock("KANA_SUPPLEMENT"); /** The Playing Cards Supplement Unicode 6.0 block. */ public static final UnicodeBlock PLAYING_CARDS = new UnicodeBlock("PLAYING_CARDS"); /** The Miscellaneous Symbols And Pictographs Supplement Unicode 6.0 block. */ public static final UnicodeBlock MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS"); /** The Emoticons Unicode 6.0 block. */ public static final UnicodeBlock EMOTICONS = new UnicodeBlock("EMOTICONS"); /** The Transport And Map Symbols Unicode 6.0 block. */ public static final UnicodeBlock TRANSPORT_AND_MAP_SYMBOLS = new UnicodeBlock("TRANSPORT_AND_MAP_SYMBOLS"); /** The Alchemical Symbols Unicode 6.0 block. */ public static final UnicodeBlock ALCHEMICAL_SYMBOLS = new UnicodeBlock("ALCHEMICAL_SYMBOLS"); /** The CJK Unified Ideographs Extension-D Unicode 6.0 block. */ public static final UnicodeBlock CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D"); /* * All of the UnicodeBlocks above, in the icu4c UBlock enum order. */ private static UnicodeBlock[] BLOCKS = new UnicodeBlock[] { null, // icu4c numbers blocks starting at 1, so index 0 should be null. UnicodeBlock.BASIC_LATIN, UnicodeBlock.LATIN_1_SUPPLEMENT, UnicodeBlock.LATIN_EXTENDED_A, UnicodeBlock.LATIN_EXTENDED_B, UnicodeBlock.IPA_EXTENSIONS, UnicodeBlock.SPACING_MODIFIER_LETTERS, UnicodeBlock.COMBINING_DIACRITICAL_MARKS, UnicodeBlock.GREEK, UnicodeBlock.CYRILLIC, UnicodeBlock.ARMENIAN, UnicodeBlock.HEBREW, UnicodeBlock.ARABIC, UnicodeBlock.SYRIAC, UnicodeBlock.THAANA, UnicodeBlock.DEVANAGARI, UnicodeBlock.BENGALI, UnicodeBlock.GURMUKHI, UnicodeBlock.GUJARATI, UnicodeBlock.ORIYA, UnicodeBlock.TAMIL, UnicodeBlock.TELUGU, UnicodeBlock.KANNADA, UnicodeBlock.MALAYALAM, UnicodeBlock.SINHALA, UnicodeBlock.THAI, UnicodeBlock.LAO, UnicodeBlock.TIBETAN, UnicodeBlock.MYANMAR, UnicodeBlock.GEORGIAN, UnicodeBlock.HANGUL_JAMO, UnicodeBlock.ETHIOPIC, UnicodeBlock.CHEROKEE, UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, UnicodeBlock.OGHAM, UnicodeBlock.RUNIC, UnicodeBlock.KHMER, UnicodeBlock.MONGOLIAN, UnicodeBlock.LATIN_EXTENDED_ADDITIONAL, UnicodeBlock.GREEK_EXTENDED, UnicodeBlock.GENERAL_PUNCTUATION, UnicodeBlock.SUPERSCRIPTS_AND_SUBSCRIPTS, UnicodeBlock.CURRENCY_SYMBOLS, UnicodeBlock.COMBINING_MARKS_FOR_SYMBOLS, UnicodeBlock.LETTERLIKE_SYMBOLS, UnicodeBlock.NUMBER_FORMS, UnicodeBlock.ARROWS, UnicodeBlock.MATHEMATICAL_OPERATORS, UnicodeBlock.MISCELLANEOUS_TECHNICAL, UnicodeBlock.CONTROL_PICTURES, UnicodeBlock.OPTICAL_CHARACTER_RECOGNITION, UnicodeBlock.ENCLOSED_ALPHANUMERICS, UnicodeBlock.BOX_DRAWING, UnicodeBlock.BLOCK_ELEMENTS, UnicodeBlock.GEOMETRIC_SHAPES, UnicodeBlock.MISCELLANEOUS_SYMBOLS, UnicodeBlock.DINGBATS, UnicodeBlock.BRAILLE_PATTERNS, UnicodeBlock.CJK_RADICALS_SUPPLEMENT, UnicodeBlock.KANGXI_RADICALS, UnicodeBlock.IDEOGRAPHIC_DESCRIPTION_CHARACTERS, UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION, UnicodeBlock.HIRAGANA, UnicodeBlock.KATAKANA, UnicodeBlock.BOPOMOFO, UnicodeBlock.HANGUL_COMPATIBILITY_JAMO, UnicodeBlock.KANBUN, UnicodeBlock.BOPOMOFO_EXTENDED, UnicodeBlock.ENCLOSED_CJK_LETTERS_AND_MONTHS, UnicodeBlock.CJK_COMPATIBILITY, UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS, UnicodeBlock.YI_SYLLABLES, UnicodeBlock.YI_RADICALS, UnicodeBlock.HANGUL_SYLLABLES, UnicodeBlock.HIGH_SURROGATES, UnicodeBlock.HIGH_PRIVATE_USE_SURROGATES, UnicodeBlock.LOW_SURROGATES, UnicodeBlock.PRIVATE_USE_AREA, UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS, UnicodeBlock.ALPHABETIC_PRESENTATION_FORMS, UnicodeBlock.ARABIC_PRESENTATION_FORMS_A, UnicodeBlock.COMBINING_HALF_MARKS, UnicodeBlock.CJK_COMPATIBILITY_FORMS, UnicodeBlock.SMALL_FORM_VARIANTS, UnicodeBlock.ARABIC_PRESENTATION_FORMS_B, UnicodeBlock.SPECIALS, UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS, // Unicode 3.1. UnicodeBlock.OLD_ITALIC, UnicodeBlock.GOTHIC, UnicodeBlock.DESERET, UnicodeBlock.BYZANTINE_MUSICAL_SYMBOLS, UnicodeBlock.MUSICAL_SYMBOLS, UnicodeBlock.MATHEMATICAL_ALPHANUMERIC_SYMBOLS, UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, UnicodeBlock.TAGS, // Unicode 3.2. UnicodeBlock.CYRILLIC_SUPPLEMENTARY, UnicodeBlock.TAGALOG, UnicodeBlock.HANUNOO, UnicodeBlock.BUHID, UnicodeBlock.TAGBANWA, UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A, UnicodeBlock.SUPPLEMENTAL_ARROWS_A, UnicodeBlock.SUPPLEMENTAL_ARROWS_B, UnicodeBlock.MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B, UnicodeBlock.SUPPLEMENTAL_MATHEMATICAL_OPERATORS, UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS, UnicodeBlock.VARIATION_SELECTORS, UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_A, UnicodeBlock.SUPPLEMENTARY_PRIVATE_USE_AREA_B, // Unicode 4.0. UnicodeBlock.LIMBU, UnicodeBlock.TAI_LE, UnicodeBlock.KHMER_SYMBOLS, UnicodeBlock.PHONETIC_EXTENSIONS, UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_ARROWS, UnicodeBlock.YIJING_HEXAGRAM_SYMBOLS, UnicodeBlock.LINEAR_B_SYLLABARY, UnicodeBlock.LINEAR_B_IDEOGRAMS, UnicodeBlock.AEGEAN_NUMBERS, UnicodeBlock.UGARITIC, UnicodeBlock.SHAVIAN, UnicodeBlock.OSMANYA, UnicodeBlock.CYPRIOT_SYLLABARY, UnicodeBlock.TAI_XUAN_JING_SYMBOLS, UnicodeBlock.VARIATION_SELECTORS_SUPPLEMENT, // Unicode 4.1. UnicodeBlock.ANCIENT_GREEK_MUSICAL_NOTATION, UnicodeBlock.ANCIENT_GREEK_NUMBERS, UnicodeBlock.ARABIC_SUPPLEMENT, UnicodeBlock.BUGINESE, UnicodeBlock.CJK_STROKES, UnicodeBlock.COMBINING_DIACRITICAL_MARKS_SUPPLEMENT, UnicodeBlock.COPTIC, UnicodeBlock.ETHIOPIC_EXTENDED, UnicodeBlock.ETHIOPIC_SUPPLEMENT, UnicodeBlock.GEORGIAN_SUPPLEMENT, UnicodeBlock.GLAGOLITIC, UnicodeBlock.KHAROSHTHI, UnicodeBlock.MODIFIER_TONE_LETTERS, UnicodeBlock.NEW_TAI_LUE, UnicodeBlock.OLD_PERSIAN, UnicodeBlock.PHONETIC_EXTENSIONS_SUPPLEMENT, UnicodeBlock.SUPPLEMENTAL_PUNCTUATION, UnicodeBlock.SYLOTI_NAGRI, UnicodeBlock.TIFINAGH, UnicodeBlock.VERTICAL_FORMS, // Unicode 5.0. UnicodeBlock.NKO, UnicodeBlock.BALINESE, UnicodeBlock.LATIN_EXTENDED_C, UnicodeBlock.LATIN_EXTENDED_D, UnicodeBlock.PHAGS_PA, UnicodeBlock.PHOENICIAN, UnicodeBlock.CUNEIFORM, UnicodeBlock.CUNEIFORM_NUMBERS_AND_PUNCTUATION, UnicodeBlock.COUNTING_ROD_NUMERALS, // Unicode 5.1. UnicodeBlock.SUNDANESE, UnicodeBlock.LEPCHA, UnicodeBlock.OL_CHIKI, UnicodeBlock.CYRILLIC_EXTENDED_A, UnicodeBlock.VAI, UnicodeBlock.CYRILLIC_EXTENDED_B, UnicodeBlock.SAURASHTRA, UnicodeBlock.KAYAH_LI, UnicodeBlock.REJANG, UnicodeBlock.CHAM, UnicodeBlock.ANCIENT_SYMBOLS, UnicodeBlock.PHAISTOS_DISC, UnicodeBlock.LYCIAN, UnicodeBlock.CARIAN, UnicodeBlock.LYDIAN, UnicodeBlock.MAHJONG_TILES, UnicodeBlock.DOMINO_TILES, // Unicode 5.2. UnicodeBlock.SAMARITAN, UnicodeBlock.UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED, UnicodeBlock.TAI_THAM, UnicodeBlock.VEDIC_EXTENSIONS, UnicodeBlock.LISU, UnicodeBlock.BAMUM, UnicodeBlock.COMMON_INDIC_NUMBER_FORMS, UnicodeBlock.DEVANAGARI_EXTENDED, UnicodeBlock.HANGUL_JAMO_EXTENDED_A, UnicodeBlock.JAVANESE, UnicodeBlock.MYANMAR_EXTENDED_A, UnicodeBlock.TAI_VIET, UnicodeBlock.MEETEI_MAYEK, UnicodeBlock.HANGUL_JAMO_EXTENDED_B, UnicodeBlock.IMPERIAL_ARAMAIC, UnicodeBlock.OLD_SOUTH_ARABIAN, UnicodeBlock.AVESTAN, UnicodeBlock.INSCRIPTIONAL_PARTHIAN, UnicodeBlock.INSCRIPTIONAL_PAHLAVI, UnicodeBlock.OLD_TURKIC, UnicodeBlock.RUMI_NUMERAL_SYMBOLS, UnicodeBlock.KAITHI, UnicodeBlock.EGYPTIAN_HIEROGLYPHS, UnicodeBlock.ENCLOSED_ALPHANUMERIC_SUPPLEMENT, UnicodeBlock.ENCLOSED_IDEOGRAPHIC_SUPPLEMENT, UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C, // Unicode 6.0. UnicodeBlock.MANDAIC, UnicodeBlock.BATAK, UnicodeBlock.ETHIOPIC_EXTENDED_A, UnicodeBlock.BRAHMI, UnicodeBlock.BAMUM_SUPPLEMENT, UnicodeBlock.KANA_SUPPLEMENT, UnicodeBlock.PLAYING_CARDS, UnicodeBlock.MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS, UnicodeBlock.EMOTICONS, UnicodeBlock.TRANSPORT_AND_MAP_SYMBOLS, UnicodeBlock.ALCHEMICAL_SYMBOLS, UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D, }; /** * Returns the Unicode block for the given block name, or null if there is no * such block. * * <p>Block names may be one of the following: * <ul> * <li>Canonical block name, as defined by the Unicode specification; * case-insensitive.</li> * <li>Canonical block name without any spaces, as defined by the * Unicode specification; case-insensitive.</li> * <li>A {@code UnicodeBlock} constant identifier. This is determined by * converting the canonical name to uppercase and replacing all spaces and hyphens * with underscores.</li> * </ul> * * @throws NullPointerException * if {@code blockName == null}. * @throws IllegalArgumentException * if {@code blockName} is not the name of any known block. * @since 1.5 */ public static UnicodeBlock forName(String blockName) { if (blockName == null) { throw new NullPointerException("blockName == null"); } int block = unicodeBlockForName(blockName); if (block == -1) { throw new IllegalArgumentException("Unknown block: " + blockName); } return BLOCKS[block]; } /** * Returns the Unicode block containing the given code point, or null if the * code point does not belong to any known block. */ public static UnicodeBlock of(char c) { return of((int) c); } /** * Returns the Unicode block containing the given code point, or null if the * code point does not belong to any known block. */ public static UnicodeBlock of(int codePoint) { checkValidCodePoint(codePoint); int block = unicodeBlockForCodePoint(codePoint); if (block == -1 || block >= BLOCKS.length) { return null; } return BLOCKS[block]; } private UnicodeBlock(String blockName) { super(blockName); } } private static native int unicodeBlockForName(String blockName); private static native int unicodeBlockForCodePoint(int codePoint); private static native int unicodeScriptForName(String blockName); private static native int unicodeScriptForCodePoint(int codePoint); /** * Constructs a new {@code Character} with the specified primitive char * value. * * @param value * the primitive char value to store in the new instance. */ public Character(char value) { this.value = value; } /** * Gets the primitive value of this character. * * @return this object's primitive value. */ public char charValue() { return value; } private static void checkValidCodePoint(int codePoint) { if (!isValidCodePoint(codePoint)) { throw new IllegalArgumentException("Invalid code point: " + codePoint); } } /** * Compares this object to the specified character object to determine their * relative order. * * @param c * the character object to compare this object to. * @return {@code 0} if the value of this character and the value of * {@code c} are equal; a positive value if the value of this * character is greater than the value of {@code c}; a negative * value if the value of this character is less than the value of * {@code c}. * @see java.lang.Comparable * @since 1.2 */ public int compareTo(Character c) { return compare(value, c.value); } /** * Compares two {@code char} values. * @return 0 if lhs = rhs, less than 0 if lhs < rhs, and greater than 0 if lhs > rhs. * @since 1.7 */ public static int compare(char lhs, char rhs) { return lhs - rhs; } /** * Returns a {@code Character} instance for the {@code char} value passed. * <p> * If it is not necessary to get a new {@code Character} instance, it is * recommended to use this method instead of the constructor, since it * maintains a cache of instances which may result in better performance. * * @param c * the char value for which to get a {@code Character} instance. * @return the {@code Character} instance for {@code c}. * @since 1.5 */ public static Character valueOf(char c) { return c < 128 ? SMALL_VALUES[c] : new Character(c); } /** * A cache of instances used by {@link #valueOf(char)} and auto-boxing */ private static final Character[] SMALL_VALUES = new Character[128]; static { for (int i = 0; i < 128; i++) { SMALL_VALUES[i] = new Character((char) i); } } /** * Indicates whether {@code codePoint} is a valid Unicode code point. * * @param codePoint * the code point to test. * @return {@code true} if {@code codePoint} is a valid Unicode code point; * {@code false} otherwise. * @since 1.5 */ public static boolean isValidCodePoint(int codePoint) { return (MIN_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); } /** * Indicates whether {@code codePoint} is within the supplementary code * point range. * * @param codePoint * the code point to test. * @return {@code true} if {@code codePoint} is within the supplementary * code point range; {@code false} otherwise. * @since 1.5 */ public static boolean isSupplementaryCodePoint(int codePoint) { return (MIN_SUPPLEMENTARY_CODE_POINT <= codePoint && MAX_CODE_POINT >= codePoint); } /** * Indicates whether {@code ch} is a high- (or leading-) surrogate code unit * that is used for representing supplementary characters in UTF-16 * encoding. * * @param ch * the character to test. * @return {@code true} if {@code ch} is a high-surrogate code unit; * {@code false} otherwise. * @see #isLowSurrogate(char) * @since 1.5 */ public static boolean isHighSurrogate(char ch) { return (MIN_HIGH_SURROGATE <= ch && MAX_HIGH_SURROGATE >= ch); } /** * Indicates whether {@code ch} is a low- (or trailing-) surrogate code unit * that is used for representing supplementary characters in UTF-16 * encoding. * * @param ch * the character to test. * @return {@code true} if {@code ch} is a low-surrogate code unit; * {@code false} otherwise. * @see #isHighSurrogate(char) * @since 1.5 */ public static boolean isLowSurrogate(char ch) { return (MIN_LOW_SURROGATE <= ch && MAX_LOW_SURROGATE >= ch); } /** * Returns true if the given character is a high or low surrogate. * @since 1.7 */ public static boolean isSurrogate(char ch) { return ch >= MIN_SURROGATE && ch <= MAX_SURROGATE; } /** * Indicates whether the specified character pair is a valid surrogate pair. * * @param high * the high surrogate unit to test. * @param low * the low surrogate unit to test. * @return {@code true} if {@code high} is a high-surrogate code unit and * {@code low} is a low-surrogate code unit; {@code false} * otherwise. * @see #isHighSurrogate(char) * @see #isLowSurrogate(char) * @since 1.5 */ public static boolean isSurrogatePair(char high, char low) { return (isHighSurrogate(high) && isLowSurrogate(low)); } /** * Calculates the number of {@code char} values required to represent the * specified Unicode code point. This method checks if the {@code codePoint} * is greater than or equal to {@code 0x10000}, in which case {@code 2} is * returned, otherwise {@code 1}. To test if the code point is valid, use * the {@link #isValidCodePoint(int)} method. * * @param codePoint * the code point for which to calculate the number of required * chars. * @return {@code 2} if {@code codePoint >= 0x10000}; {@code 1} otherwise. * @see #isValidCodePoint(int) * @see #isSupplementaryCodePoint(int) * @since 1.5 */ public static int charCount(int codePoint) { return (codePoint >= 0x10000 ? 2 : 1); } /** * Converts a surrogate pair into a Unicode code point. This method assumes * that the pair are valid surrogates. If the pair are <i>not</i> valid * surrogates, then the result is indeterminate. The * {@link #isSurrogatePair(char, char)} method should be used prior to this * method to validate the pair. * * @param high * the high surrogate unit. * @param low * the low surrogate unit. * @return the Unicode code point corresponding to the surrogate unit pair. * @see #isSurrogatePair(char, char) * @since 1.5 */ public static int toCodePoint(char high, char low) { // See RFC 2781, Section 2.2 // http://www.ietf.org/rfc/rfc2781.txt int h = (high & 0x3FF) << 10; int l = low & 0x3FF; return (h | l) + 0x10000; } /** * Returns the code point at {@code index} in the specified sequence of * character units. If the unit at {@code index} is a high-surrogate unit, * {@code index + 1} is less than the length of the sequence and the unit at * {@code index + 1} is a low-surrogate unit, then the supplementary code * point represented by the pair is returned; otherwise the {@code char} * value at {@code index} is returned. * * @param seq * the source sequence of {@code char} units. * @param index * the position in {@code seq} from which to retrieve the code * point. * @return the Unicode code point or {@code char} value at {@code index} in * {@code seq}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if the {@code index} is negative or greater than or equal to * the length of {@code seq}. * @since 1.5 */ public static int codePointAt(CharSequence seq, int index) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length(); if (index < 0 || index >= len) { throw new IndexOutOfBoundsException(); } char high = seq.charAt(index++); if (index >= len) { return high; } char low = seq.charAt(index); if (isSurrogatePair(high, low)) { return toCodePoint(high, low); } return high; } /** * Returns the code point at {@code index} in the specified array of * character units. If the unit at {@code index} is a high-surrogate unit, * {@code index + 1} is less than the length of the array and the unit at * {@code index + 1} is a low-surrogate unit, then the supplementary code * point represented by the pair is returned; otherwise the {@code char} * value at {@code index} is returned. * * @param seq * the source array of {@code char} units. * @param index * the position in {@code seq} from which to retrieve the code * point. * @return the Unicode code point or {@code char} value at {@code index} in * {@code seq}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if the {@code index} is negative or greater than or equal to * the length of {@code seq}. * @since 1.5 */ public static int codePointAt(char[] seq, int index) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length; if (index < 0 || index >= len) { throw new IndexOutOfBoundsException(); } char high = seq[index++]; if (index >= len) { return high; } char low = seq[index]; if (isSurrogatePair(high, low)) { return toCodePoint(high, low); } return high; } /** * Returns the code point at {@code index} in the specified array of * character units, where {@code index} has to be less than {@code limit}. * If the unit at {@code index} is a high-surrogate unit, {@code index + 1} * is less than {@code limit} and the unit at {@code index + 1} is a * low-surrogate unit, then the supplementary code point represented by the * pair is returned; otherwise the {@code char} value at {@code index} is * returned. * * @param seq * the source array of {@code char} units. * @param index * the position in {@code seq} from which to get the code point. * @param limit * the index after the last unit in {@code seq} that can be used. * @return the Unicode code point or {@code char} value at {@code index} in * {@code seq}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if {@code index < 0}, {@code index >= limit}, * {@code limit < 0} or if {@code limit} is greater than the * length of {@code seq}. * @since 1.5 */ public static int codePointAt(char[] seq, int index, int limit) { if (index < 0 || index >= limit || limit < 0 || limit > seq.length) { throw new IndexOutOfBoundsException(); } char high = seq[index++]; if (index >= limit) { return high; } char low = seq[index]; if (isSurrogatePair(high, low)) { return toCodePoint(high, low); } return high; } /** * Returns the code point that precedes {@code index} in the specified * sequence of character units. If the unit at {@code index - 1} is a * low-surrogate unit, {@code index - 2} is not negative and the unit at * {@code index - 2} is a high-surrogate unit, then the supplementary code * point represented by the pair is returned; otherwise the {@code char} * value at {@code index - 1} is returned. * * @param seq * the source sequence of {@code char} units. * @param index * the position in {@code seq} following the code * point that should be returned. * @return the Unicode code point or {@code char} value before {@code index} * in {@code seq}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if the {@code index} is less than 1 or greater than the * length of {@code seq}. * @since 1.5 */ public static int codePointBefore(CharSequence seq, int index) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length(); if (index < 1 || index > len) { throw new IndexOutOfBoundsException(); } char low = seq.charAt(--index); if (--index < 0) { return low; } char high = seq.charAt(index); if (isSurrogatePair(high, low)) { return toCodePoint(high, low); } return low; } /** * Returns the code point that precedes {@code index} in the specified * array of character units. If the unit at {@code index - 1} is a * low-surrogate unit, {@code index - 2} is not negative and the unit at * {@code index - 2} is a high-surrogate unit, then the supplementary code * point represented by the pair is returned; otherwise the {@code char} * value at {@code index - 1} is returned. * * @param seq * the source array of {@code char} units. * @param index * the position in {@code seq} following the code * point that should be returned. * @return the Unicode code point or {@code char} value before {@code index} * in {@code seq}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if the {@code index} is less than 1 or greater than the * length of {@code seq}. * @since 1.5 */ public static int codePointBefore(char[] seq, int index) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length; if (index < 1 || index > len) { throw new IndexOutOfBoundsException(); } char low = seq[--index]; if (--index < 0) { return low; } char high = seq[index]; if (isSurrogatePair(high, low)) { return toCodePoint(high, low); } return low; } /** * Returns the code point that precedes the {@code index} in the specified * array of character units and is not less than {@code start}. If the unit * at {@code index - 1} is a low-surrogate unit, {@code index - 2} is not * less than {@code start} and the unit at {@code index - 2} is a * high-surrogate unit, then the supplementary code point represented by the * pair is returned; otherwise the {@code char} value at {@code index - 1} * is returned. * * @param seq * the source array of {@code char} units. * @param index * the position in {@code seq} following the code point that * should be returned. * @param start * the index of the first element in {@code seq}. * @return the Unicode code point or {@code char} value before {@code index} * in {@code seq}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if the {@code index <= start}, {@code start < 0}, * {@code index} is greater than the length of {@code seq}, or * if {@code start} is equal or greater than the length of * {@code seq}. * @since 1.5 */ public static int codePointBefore(char[] seq, int index, int start) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length; if (index <= start || index > len || start < 0 || start >= len) { throw new IndexOutOfBoundsException(); } char low = seq[--index]; if (--index < start) { return low; } char high = seq[index]; if (isSurrogatePair(high, low)) { return toCodePoint(high, low); } return low; } /** * Converts the specified Unicode code point into a UTF-16 encoded sequence * and copies the value(s) into the char array {@code dst}, starting at * index {@code dstIndex}. * * @param codePoint * the Unicode code point to encode. * @param dst * the destination array to copy the encoded value into. * @param dstIndex * the index in {@code dst} from where to start copying. * @return the number of {@code char} value units copied into {@code dst}. * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. * @throws NullPointerException * if {@code dst} is {@code null}. * @throws IndexOutOfBoundsException * if {@code dstIndex} is negative, greater than or equal to * {@code dst.length} or equals {@code dst.length - 1} when * {@code codePoint} is a * {@link #isSupplementaryCodePoint(int) supplementary code point}. * @since 1.5 */ public static int toChars(int codePoint, char[] dst, int dstIndex) { checkValidCodePoint(codePoint); if (dst == null) { throw new NullPointerException("dst == null"); } if (dstIndex < 0 || dstIndex >= dst.length) { throw new IndexOutOfBoundsException(); } if (isSupplementaryCodePoint(codePoint)) { if (dstIndex == dst.length - 1) { throw new IndexOutOfBoundsException(); } // See RFC 2781, Section 2.1 // http://www.ietf.org/rfc/rfc2781.txt int cpPrime = codePoint - 0x10000; int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); int low = 0xDC00 | (cpPrime & 0x3FF); dst[dstIndex] = (char) high; dst[dstIndex + 1] = (char) low; return 2; } dst[dstIndex] = (char) codePoint; return 1; } /** * Converts the specified Unicode code point into a UTF-16 encoded sequence * and returns it as a char array. * * @param codePoint * the Unicode code point to encode. * @return the UTF-16 encoded char sequence. If {@code codePoint} is a * {@link #isSupplementaryCodePoint(int) supplementary code point}, * then the returned array contains two characters, otherwise it * contains just one character. * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. * @since 1.5 */ public static char[] toChars(int codePoint) { checkValidCodePoint(codePoint); if (isSupplementaryCodePoint(codePoint)) { int cpPrime = codePoint - 0x10000; int high = 0xD800 | ((cpPrime >> 10) & 0x3FF); int low = 0xDC00 | (cpPrime & 0x3FF); return new char[] { (char) high, (char) low }; } return new char[] { (char) codePoint }; } /** * Counts the number of Unicode code points in the subsequence of the * specified character sequence, as delineated by {@code beginIndex} and * {@code endIndex}. Any surrogate values with missing pair values will be * counted as one code point. * * @param seq * the {@code CharSequence} to look through. * @param beginIndex * the inclusive index to begin counting at. * @param endIndex * the exclusive index to stop counting at. * @return the number of Unicode code points. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if {@code beginIndex < 0}, {@code beginIndex > endIndex} or * if {@code endIndex} is greater than the length of {@code seq}. * @since 1.5 */ public static int codePointCount(CharSequence seq, int beginIndex, int endIndex) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length(); if (beginIndex < 0 || endIndex > len || beginIndex > endIndex) { throw new IndexOutOfBoundsException(); } int result = 0; for (int i = beginIndex; i < endIndex; i++) { char c = seq.charAt(i); if (isHighSurrogate(c)) { if (++i < endIndex) { c = seq.charAt(i); if (!isLowSurrogate(c)) { result++; } } } result++; } return result; } /** * Counts the number of Unicode code points in the subsequence of the * specified char array, as delineated by {@code offset} and {@code count}. * Any surrogate values with missing pair values will be counted as one code * point. * * @param seq * the char array to look through * @param offset * the inclusive index to begin counting at. * @param count * the number of {@code char} values to look through in * {@code seq}. * @return the number of Unicode code points. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if {@code offset < 0}, {@code count < 0} or if * {@code offset + count} is greater than the length of * {@code seq}. * @since 1.5 */ public static int codePointCount(char[] seq, int offset, int count) { Arrays.checkOffsetAndCount(seq.length, offset, count); int endIndex = offset + count; int result = 0; for (int i = offset; i < endIndex; i++) { char c = seq[i]; if (isHighSurrogate(c)) { if (++i < endIndex) { c = seq[i]; if (!isLowSurrogate(c)) { result++; } } } result++; } return result; } /** * Determines the index in the specified character sequence that is offset * {@code codePointOffset} code points from {@code index}. * * @param seq * the character sequence to find the index in. * @param index * the start index in {@code seq}. * @param codePointOffset * the number of code points to look backwards or forwards; may * be a negative or positive value. * @return the index in {@code seq} that is {@code codePointOffset} code * points away from {@code index}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if {@code index < 0}, {@code index} is greater than the * length of {@code seq}, or if there are not enough values in * {@code seq} to skip {@code codePointOffset} code points * forwards or backwards (if {@code codePointOffset} is * negative) from {@code index}. * @since 1.5 */ public static int offsetByCodePoints(CharSequence seq, int index, int codePointOffset) { if (seq == null) { throw new NullPointerException("seq == null"); } int len = seq.length(); if (index < 0 || index > len) { throw new IndexOutOfBoundsException(); } if (codePointOffset == 0) { return index; } if (codePointOffset > 0) { int codePoints = codePointOffset; int i = index; while (codePoints > 0) { codePoints--; if (i >= len) { throw new IndexOutOfBoundsException(); } if (isHighSurrogate(seq.charAt(i))) { int next = i + 1; if (next < len && isLowSurrogate(seq.charAt(next))) { i++; } } i++; } return i; } int codePoints = -codePointOffset; int i = index; while (codePoints > 0) { codePoints--; i--; if (i < 0) { throw new IndexOutOfBoundsException(); } if (isLowSurrogate(seq.charAt(i))) { int prev = i - 1; if (prev >= 0 && isHighSurrogate(seq.charAt(prev))) { i--; } } } return i; } /** * Determines the index in a subsequence of the specified character array * that is offset {@code codePointOffset} code points from {@code index}. * The subsequence is delineated by {@code start} and {@code count}. * * @param seq * the character array to find the index in. * @param start * the inclusive index that marks the beginning of the * subsequence. * @param count * the number of {@code char} values to include within the * subsequence. * @param index * the start index in the subsequence of the char array. * @param codePointOffset * the number of code points to look backwards or forwards; may * be a negative or positive value. * @return the index in {@code seq} that is {@code codePointOffset} code * points away from {@code index}. * @throws NullPointerException * if {@code seq} is {@code null}. * @throws IndexOutOfBoundsException * if {@code start < 0}, {@code count < 0}, * {@code index < start}, {@code index > start + count}, * {@code start + count} is greater than the length of * {@code seq}, or if there are not enough values in * {@code seq} to skip {@code codePointOffset} code points * forward or backward (if {@code codePointOffset} is * negative) from {@code index}. * @since 1.5 */ public static int offsetByCodePoints(char[] seq, int start, int count, int index, int codePointOffset) { Arrays.checkOffsetAndCount(seq.length, start, count); int end = start + count; if (index < start || index > end) { throw new IndexOutOfBoundsException(); } if (codePointOffset == 0) { return index; } if (codePointOffset > 0) { int codePoints = codePointOffset; int i = index; while (codePoints > 0) { codePoints--; if (i >= end) { throw new IndexOutOfBoundsException(); } if (isHighSurrogate(seq[i])) { int next = i + 1; if (next < end && isLowSurrogate(seq[next])) { i++; } } i++; } return i; } int codePoints = -codePointOffset; int i = index; while (codePoints > 0) { codePoints--; i--; if (i < start) { throw new IndexOutOfBoundsException(); } if (isLowSurrogate(seq[i])) { int prev = i - 1; if (prev >= start && isHighSurrogate(seq[prev])) { i--; } } } return i; } /** * Convenience method to determine the value of the specified character * {@code c} in the supplied radix. The value of {@code radix} must be * between MIN_RADIX and MAX_RADIX. * * @param c * the character to determine the value of. * @param radix * the radix. * @return the value of {@code c} in {@code radix} if {@code radix} lies * between {@link #MIN_RADIX} and {@link #MAX_RADIX}; -1 otherwise. */ public static int digit(char c, int radix) { return digit((int) c, radix); } /** * Convenience method to determine the value of the character * {@code codePoint} in the supplied radix. The value of {@code radix} must * be between MIN_RADIX and MAX_RADIX. * * @param codePoint * the character, including supplementary characters. * @param radix * the radix. * @return if {@code radix} lies between {@link #MIN_RADIX} and * {@link #MAX_RADIX} then the value of the character in the radix; * -1 otherwise. */ public static int digit(int codePoint, int radix) { if (radix < MIN_RADIX || radix > MAX_RADIX) { return -1; } if (codePoint < 128) { // Optimized for ASCII int result = -1; if ('0' <= codePoint && codePoint <= '9') { result = codePoint - '0'; } else if ('a' <= codePoint && codePoint <= 'z') { result = 10 + (codePoint - 'a'); } else if ('A' <= codePoint && codePoint <= 'Z') { result = 10 + (codePoint - 'A'); } return result < radix ? result : -1; } return digitImpl(codePoint, radix); } private static native int digitImpl(int codePoint, int radix); /** * Compares this object with the specified object and indicates if they are * equal. In order to be equal, {@code object} must be an instance of * {@code Character} and have the same char value as this object. * * @param object * the object to compare this double with. * @return {@code true} if the specified object is equal to this * {@code Character}; {@code false} otherwise. */ @Override public boolean equals(Object object) { return (object instanceof Character) && (((Character) object).value == value); } /** * Returns the character which represents the specified digit in the * specified radix. The {@code radix} must be between {@code MIN_RADIX} and * {@code MAX_RADIX} inclusive; {@code digit} must not be negative and * smaller than {@code radix}. If any of these conditions does not hold, 0 * is returned. * * @param digit * the integer value. * @param radix * the radix. * @return the character which represents the {@code digit} in the * {@code radix}. */ public static char forDigit(int digit, int radix) { if (MIN_RADIX <= radix && radix <= MAX_RADIX) { if (digit >= 0 && digit < radix) { return (char) (digit < 10 ? digit + '0' : digit + 'a' - 10); } } return 0; } /** * Returns a human-readable name for the given code point, * or null if the code point is unassigned. * * <p>As a fallback mechanism this method returns strings consisting of the Unicode * block name (with underscores replaced by spaces), a single space, and the uppercase * hex value of the code point, using as few digits as necessary. * * <p>Examples: * <ul> * <li>{@code Character.getName(0)} returns "NULL". * <li>{@code Character.getName('e')} returns "LATIN SMALL LETTER E". * <li>{@code Character.getName('\u0666')} returns "ARABIC-INDIC DIGIT SIX". * <li>{@code Character.getName(0xe000)} returns "PRIVATE USE AREA E000". * </ul> * * <p>Note that the exact strings returned will vary from release to release. * * @throws IllegalArgumentException if {@code codePoint} is not a valid code point. * @since 1.7 */ public static String getName(int codePoint) { checkValidCodePoint(codePoint); if (getType(codePoint) == Character.UNASSIGNED) { return null; } String result = getNameImpl(codePoint); if (result == null) { String blockName = Character.UnicodeBlock.of(codePoint).toString().replace('_', ' '); result = blockName + " " + IntegralToString.intToHexString(codePoint, true, 0); } return result; } private static native String getNameImpl(int codePoint); /** * Returns the numeric value of the specified Unicode character. * See {@link #getNumericValue(int)}. * * @param c the character * @return a non-negative numeric integer value if a numeric value for * {@code c} exists, -1 if there is no numeric value for {@code c}, * -2 if the numeric value can not be represented as an integer. */ public static int getNumericValue(char c) { return getNumericValue((int) c); } /** * Gets the numeric value of the specified Unicode code point. For example, * the code point '\u216B' stands for the Roman number XII, which has the * numeric value 12. * * <p>There are two points of divergence between this method and the Unicode * specification. This method treats the letters a-z (in both upper and lower * cases, and their full-width variants) as numbers from 10 to 35. The * Unicode specification also supports the idea of code points with non-integer * numeric values; this method does not (except to the extent of returning -2 * for such code points). * * @param codePoint the code point * @return a non-negative numeric integer value if a numeric value for * {@code codePoint} exists, -1 if there is no numeric value for * {@code codePoint}, -2 if the numeric value can not be * represented with an integer. */ public static int getNumericValue(int codePoint) { // This is both an optimization and papers over differences between Java and ICU. if (codePoint < 128) { if (codePoint >= '0' && codePoint <= '9') { return codePoint - '0'; } if (codePoint >= 'a' && codePoint <= 'z') { return codePoint - ('a' - 10); } if (codePoint >= 'A' && codePoint <= 'Z') { return codePoint - ('A' - 10); } return -1; } // Full-width uppercase A-Z. if (codePoint >= 0xff21 && codePoint <= 0xff3a) { return codePoint - 0xff17; } // Full-width lowercase a-z. if (codePoint >= 0xff41 && codePoint <= 0xff5a) { return codePoint - 0xff37; } return getNumericValueImpl(codePoint); } private static native int getNumericValueImpl(int codePoint); /** * Gets the general Unicode category of the specified character. * * @param c * the character to get the category of. * @return the Unicode category of {@code c}. */ public static int getType(char c) { return getType((int) c); } /** * Gets the general Unicode category of the specified code point. * * @param codePoint * the Unicode code point to get the category of. * @return the Unicode category of {@code codePoint}. */ public static int getType(int codePoint) { int type = getTypeImpl(codePoint); // The type values returned by ICU are not RI-compatible. The RI skips the value 17. if (type <= Character.FORMAT) { return type; } return (type + 1); } private static native int getTypeImpl(int codePoint); /** * Gets the Unicode directionality of the specified character. * * @param c * the character to get the directionality of. * @return the Unicode directionality of {@code c}. */ public static byte getDirectionality(char c) { return getDirectionality((int)c); } /** * Returns the Unicode directionality of the given code point. * This will be one of the {@code DIRECTIONALITY_} constants. * For characters whose directionality is undefined, or whose * directionality has no appropriate constant in this class, * {@code DIRECTIONALITY_UNDEFINED} is returned. */ public static byte getDirectionality(int codePoint) { if (getType(codePoint) == Character.UNASSIGNED) { return Character.DIRECTIONALITY_UNDEFINED; } byte directionality = getIcuDirectionality(codePoint); if (directionality >= 0 && directionality < DIRECTIONALITY.length) { return DIRECTIONALITY[directionality]; } return Character.DIRECTIONALITY_UNDEFINED; } /** * @hide - internal use only. */ public static native byte getIcuDirectionality(int codePoint); /** * Indicates whether the specified character is mirrored. * * @param c * the character to check. * @return {@code true} if {@code c} is mirrored; {@code false} * otherwise. */ public static boolean isMirrored(char c) { return isMirrored((int) c); } /** * Indicates whether the specified code point is mirrored. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is mirrored, {@code false} * otherwise. */ public static boolean isMirrored(int codePoint) { return isMirroredImpl(codePoint); } private static native boolean isMirroredImpl(int codePoint); @Override public int hashCode() { return value; } /** * Returns the high surrogate for the given code point. The result is meaningless if * the given code point is not a supplementary character. * @since 1.7 */ public static char highSurrogate(int codePoint) { return (char) ((codePoint >> 10) + 0xd7c0); } /** * Returns the low surrogate for the given code point. The result is meaningless if * the given code point is not a supplementary character. * @since 1.7 */ public static char lowSurrogate(int codePoint) { return (char) ((codePoint & 0x3ff) | 0xdc00); } /** * Returns true if the given code point is alphabetic. That is, * if it is in any of the Lu, Ll, Lt, Lm, Lo, Nl, or Other_Alphabetic categories. * @since 1.7 */ public static native boolean isAlphabetic(int codePoint); /** * Returns true if the given code point is in the Basic Multilingual Plane (BMP). * Such code points can be represented by a single {@code char}. * @since 1.7 */ public static boolean isBmpCodePoint(int codePoint) { return codePoint >= Character.MIN_VALUE && codePoint <= Character.MAX_VALUE; } /** * Indicates whether the specified character is defined in the Unicode * specification. * * @param c * the character to check. * @return {@code true} if the general Unicode category of the character is * not {@code UNASSIGNED}; {@code false} otherwise. */ public static boolean isDefined(char c) { return isDefinedImpl(c); } /** * Indicates whether the specified code point is defined in the Unicode * specification. * * @param codePoint * the code point to check. * @return {@code true} if the general Unicode category of the code point is * not {@code UNASSIGNED}; {@code false} otherwise. */ public static boolean isDefined(int codePoint) { return isDefinedImpl(codePoint); } private static native boolean isDefinedImpl(int codePoint); /** * Indicates whether the specified character is a digit. * * @param c * the character to check. * @return {@code true} if {@code c} is a digit; {@code false} * otherwise. */ public static boolean isDigit(char c) { return isDigit((int) c); } /** * Indicates whether the specified code point is a digit. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a digit; {@code false} * otherwise. */ public static boolean isDigit(int codePoint) { // Optimized case for ASCII if ('0' <= codePoint && codePoint <= '9') { return true; } if (codePoint < 1632) { return false; } return isDigitImpl(codePoint); } private static native boolean isDigitImpl(int codePoint); /** * Indicates whether the specified character is ignorable in a Java or * Unicode identifier. * * @param c * the character to check. * @return {@code true} if {@code c} is ignorable; {@code false} otherwise. */ public static boolean isIdentifierIgnorable(char c) { return isIdentifierIgnorable((int) c); } /** * Returns true if the given code point is a CJKV ideographic character. * @since 1.7 */ public static native boolean isIdeographic(int codePoint); /** * Indicates whether the specified code point is ignorable in a Java or * Unicode identifier. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is ignorable; {@code false} * otherwise. */ public static boolean isIdentifierIgnorable(int codePoint) { // This is both an optimization and papers over differences between Java and ICU. if (codePoint < 0x600) { return (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || (codePoint >= 0x7f && codePoint <= 0x9f) || (codePoint == 0xad); } return isIdentifierIgnorableImpl(codePoint); } private static native boolean isIdentifierIgnorableImpl(int codePoint); /** * Indicates whether the specified character is an ISO control character. * * @param c * the character to check. * @return {@code true} if {@code c} is an ISO control character; * {@code false} otherwise. */ public static boolean isISOControl(char c) { return isISOControl((int) c); } /** * Indicates whether the specified code point is an ISO control character. * * @param c * the code point to check. * @return {@code true} if {@code c} is an ISO control character; * {@code false} otherwise. */ public static boolean isISOControl(int c) { return (c >= 0 && c <= 0x1f) || (c >= 0x7f && c <= 0x9f); } /** * Indicates whether the specified character is a valid part of a Java * identifier other than the first character. * * @param c * the character to check. * @return {@code true} if {@code c} is valid as part of a Java identifier; * {@code false} otherwise. */ public static boolean isJavaIdentifierPart(char c) { return isJavaIdentifierPart((int) c); } /** * Indicates whether the specified code point is a valid part of a Java * identifier other than the first character. * * @param codePoint * the code point to check. * @return {@code true} if {@code c} is valid as part of a Java identifier; * {@code false} otherwise. */ public static boolean isJavaIdentifierPart(int codePoint) { // Use precomputed bitmasks to optimize the ASCII range. if (codePoint < 64) { return (0x3ff00100fffc1ffL & (1L << codePoint)) != 0; } else if (codePoint < 128) { return (0x87fffffe87fffffeL & (1L << (codePoint - 64))) != 0; } int type = getType(codePoint); return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION || (type >= DECIMAL_DIGIT_NUMBER && type <= LETTER_NUMBER) || type == COMBINING_SPACING_MARK || type == NON_SPACING_MARK || (codePoint >= 0 && codePoint <= 8) || (codePoint >= 0xe && codePoint <= 0x1b) || (codePoint >= 0x7f && codePoint <= 0x9f) || type == FORMAT; } /** * Indicates whether the specified character is a valid first character for * a Java identifier. * * @param c * the character to check. * @return {@code true} if {@code c} is a valid first character of a Java * identifier; {@code false} otherwise. */ public static boolean isJavaIdentifierStart(char c) { return isJavaIdentifierStart((int) c); } /** * Indicates whether the specified code point is a valid first character for * a Java identifier. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a valid start of a Java * identifier; {@code false} otherwise. */ public static boolean isJavaIdentifierStart(int codePoint) { // Use precomputed bitmasks to optimize the ASCII range. if (codePoint < 64) { return (codePoint == '$'); // There's only one character in this range. } else if (codePoint < 128) { return (0x7fffffe87fffffeL & (1L << (codePoint - 64))) != 0; } int type = getType(codePoint); return (type >= UPPERCASE_LETTER && type <= OTHER_LETTER) || type == CURRENCY_SYMBOL || type == CONNECTOR_PUNCTUATION || type == LETTER_NUMBER; } /** * Indicates whether the specified character is a Java letter. * * @param c * the character to check. * @return {@code true} if {@code c} is a Java letter; {@code false} * otherwise. * @deprecated Use {@link #isJavaIdentifierStart(char)} instead. */ @Deprecated public static boolean isJavaLetter(char c) { return isJavaIdentifierStart(c); } /** * Indicates whether the specified character is a Java letter or digit * character. * * @param c * the character to check. * @return {@code true} if {@code c} is a Java letter or digit; * {@code false} otherwise. * @deprecated Use {@link #isJavaIdentifierPart(char)} instead. */ @Deprecated public static boolean isJavaLetterOrDigit(char c) { return isJavaIdentifierPart(c); } /** * Indicates whether the specified character is a letter. * * @param c * the character to check. * @return {@code true} if {@code c} is a letter; {@code false} otherwise. */ public static boolean isLetter(char c) { return isLetter((int) c); } /** * Indicates whether the specified code point is a letter. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a letter; {@code false} * otherwise. */ public static boolean isLetter(int codePoint) { if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { return true; } if (codePoint < 128) { return false; } return isLetterImpl(codePoint); } private static native boolean isLetterImpl(int codePoint); /** * Indicates whether the specified character is a letter or a digit. * * @param c * the character to check. * @return {@code true} if {@code c} is a letter or a digit; {@code false} * otherwise. */ public static boolean isLetterOrDigit(char c) { return isLetterOrDigit((int) c); } /** * Indicates whether the specified code point is a letter or a digit. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a letter or a digit; * {@code false} otherwise. */ public static boolean isLetterOrDigit(int codePoint) { // Optimized case for ASCII if (('A' <= codePoint && codePoint <= 'Z') || ('a' <= codePoint && codePoint <= 'z')) { return true; } if ('0' <= codePoint && codePoint <= '9') { return true; } if (codePoint < 128) { return false; } return isLetterOrDigitImpl(codePoint); } private static native boolean isLetterOrDigitImpl(int codePoint); /** * Indicates whether the specified character is a lower case letter. * * @param c * the character to check. * @return {@code true} if {@code c} is a lower case letter; {@code false} * otherwise. */ public static boolean isLowerCase(char c) { return isLowerCase((int) c); } /** * Indicates whether the specified code point is a lower case letter. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a lower case letter; * {@code false} otherwise. */ public static boolean isLowerCase(int codePoint) { // Optimized case for ASCII if ('a' <= codePoint && codePoint <= 'z') { return true; } if (codePoint < 128) { return false; } return isLowerCaseImpl(codePoint); } private static native boolean isLowerCaseImpl(int codePoint); /** * Use {@link #isWhitespace(char)} instead. * @deprecated Use {@link #isWhitespace(char)} instead. */ @Deprecated public static boolean isSpace(char c) { return c == '\n' || c == '\t' || c == '\f' || c == '\r' || c == ' '; } /** * See {@link #isSpaceChar(int)}. */ public static boolean isSpaceChar(char c) { return isSpaceChar((int) c); } /** * Returns true if the given code point is a Unicode space character. * The exact set of characters considered as whitespace varies with Unicode version. * Note that non-breaking spaces are considered whitespace. * Note also that line separators are not considered whitespace; see {@link #isWhitespace} * for an alternative. */ public static boolean isSpaceChar(int codePoint) { // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. // SPACE or NO-BREAK SPACE? if (codePoint == 0x20 || codePoint == 0xa0) { return true; } if (codePoint < 0x1000) { return false; } // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? if (codePoint == 0x1680 || codePoint == 0x180e) { return true; } if (codePoint < 0x2000) { return false; } if (codePoint <= 0xffff) { // Other whitespace from General Punctuation... return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x202f || codePoint == 0x205f || codePoint == 0x3000; // ...or CJK Symbols and Punctuation? } // Let icu4c worry about non-BMP code points. return isSpaceCharImpl(codePoint); } private static native boolean isSpaceCharImpl(int codePoint); /** * Indicates whether the specified character is a titlecase character. * * @param c * the character to check. * @return {@code true} if {@code c} is a titlecase character, {@code false} * otherwise. */ public static boolean isTitleCase(char c) { return isTitleCaseImpl(c); } /** * Indicates whether the specified code point is a titlecase character. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a titlecase character, * {@code false} otherwise. */ public static boolean isTitleCase(int codePoint) { return isTitleCaseImpl(codePoint); } private static native boolean isTitleCaseImpl(int codePoint); /** * Indicates whether the specified character is valid as part of a Unicode * identifier other than the first character. * * @param c * the character to check. * @return {@code true} if {@code c} is valid as part of a Unicode * identifier; {@code false} otherwise. */ public static boolean isUnicodeIdentifierPart(char c) { return isUnicodeIdentifierPartImpl(c); } /** * Indicates whether the specified code point is valid as part of a Unicode * identifier other than the first character. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is valid as part of a Unicode * identifier; {@code false} otherwise. */ public static boolean isUnicodeIdentifierPart(int codePoint) { return isUnicodeIdentifierPartImpl(codePoint); } private static native boolean isUnicodeIdentifierPartImpl(int codePoint); /** * Indicates whether the specified character is a valid initial character * for a Unicode identifier. * * @param c * the character to check. * @return {@code true} if {@code c} is a valid first character for a * Unicode identifier; {@code false} otherwise. */ public static boolean isUnicodeIdentifierStart(char c) { return isUnicodeIdentifierStartImpl(c); } /** * Indicates whether the specified code point is a valid initial character * for a Unicode identifier. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a valid first character for * a Unicode identifier; {@code false} otherwise. */ public static boolean isUnicodeIdentifierStart(int codePoint) { return isUnicodeIdentifierStartImpl(codePoint); } private static native boolean isUnicodeIdentifierStartImpl(int codePoint); /** * Indicates whether the specified character is an upper case letter. * * @param c * the character to check. * @return {@code true} if {@code c} is a upper case letter; {@code false} * otherwise. */ public static boolean isUpperCase(char c) { return isUpperCase((int) c); } /** * Indicates whether the specified code point is an upper case letter. * * @param codePoint * the code point to check. * @return {@code true} if {@code codePoint} is a upper case letter; * {@code false} otherwise. */ public static boolean isUpperCase(int codePoint) { // Optimized case for ASCII if ('A' <= codePoint && codePoint <= 'Z') { return true; } if (codePoint < 128) { return false; } return isUpperCaseImpl(codePoint); } private static native boolean isUpperCaseImpl(int codePoint); /** * See {@link #isWhitespace(int)}. */ public static boolean isWhitespace(char c) { return isWhitespace((int) c); } /** * Returns true if the given code point is a Unicode whitespace character. * The exact set of characters considered as whitespace varies with Unicode version. * Note that non-breaking spaces are not considered whitespace. * Note also that line separators are considered whitespace; see {@link #isSpaceChar} * for an alternative. */ public static boolean isWhitespace(int codePoint) { // We don't just call into icu4c because of the JNI overhead. Ideally we'd fix that. // Any ASCII whitespace character? if ((codePoint >= 0x1c && codePoint <= 0x20) || (codePoint >= 0x09 && codePoint <= 0x0d)) { return true; } if (codePoint < 0x1000) { return false; } // OGHAM SPACE MARK or MONGOLIAN VOWEL SEPARATOR? if (codePoint == 0x1680 || codePoint == 0x180e) { return true; } if (codePoint < 0x2000) { return false; } // Exclude General Punctuation's non-breaking spaces (which includes FIGURE SPACE). if (codePoint == 0x2007 || codePoint == 0x202f) { return false; } if (codePoint <= 0xffff) { // Other whitespace from General Punctuation... return codePoint <= 0x200a || codePoint == 0x2028 || codePoint == 0x2029 || codePoint == 0x205f || codePoint == 0x3000; // ...or CJK Symbols and Punctuation? } // Let icu4c worry about non-BMP code points. return isWhitespaceImpl(codePoint); } private static native boolean isWhitespaceImpl(int codePoint); /** * Reverses the order of the first and second byte in the specified * character. * * @param c * the character to reverse. * @return the character with reordered bytes. */ public static char reverseBytes(char c) { return (char)((c<<8) | (c>>8)); } /** * Returns the lower case equivalent for the specified character if the * character is an upper case letter. Otherwise, the specified character is * returned unchanged. * * @param c * the character * @return if {@code c} is an upper case character then its lower case * counterpart, otherwise just {@code c}. */ public static char toLowerCase(char c) { return (char) toLowerCase((int) c); } /** * Returns the lower case equivalent for the specified code point if it is * an upper case letter. Otherwise, the specified code point is returned * unchanged. * * @param codePoint * the code point to check. * @return if {@code codePoint} is an upper case character then its lower * case counterpart, otherwise just {@code codePoint}. */ public static int toLowerCase(int codePoint) { // Optimized case for ASCII if ('A' <= codePoint && codePoint <= 'Z') { return (char) (codePoint + ('a' - 'A')); } if (codePoint < 192) { return codePoint; } return toLowerCaseImpl(codePoint); } private static native int toLowerCaseImpl(int codePoint); @Override public String toString() { return String.valueOf(value); } /** * Converts the specified character to its string representation. * * @param value * the character to convert. * @return the character converted to a string. */ public static String toString(char value) { return String.valueOf(value); } /** * Returns the title case equivalent for the specified character if it * exists. Otherwise, the specified character is returned unchanged. * * @param c * the character to convert. * @return the title case equivalent of {@code c} if it exists, otherwise * {@code c}. */ public static char toTitleCase(char c) { return (char) toTitleCaseImpl(c); } /** * Returns the title case equivalent for the specified code point if it * exists. Otherwise, the specified code point is returned unchanged. * * @param codePoint * the code point to convert. * @return the title case equivalent of {@code codePoint} if it exists, * otherwise {@code codePoint}. */ public static int toTitleCase(int codePoint) { return toTitleCaseImpl(codePoint); } private static native int toTitleCaseImpl(int codePoint); /** * Returns the upper case equivalent for the specified character if the * character is a lower case letter. Otherwise, the specified character is * returned unchanged. * * @param c * the character to convert. * @return if {@code c} is a lower case character then its upper case * counterpart, otherwise just {@code c}. */ public static char toUpperCase(char c) { return (char) toUpperCase((int) c); } /** * Returns the upper case equivalent for the specified code point if the * code point is a lower case letter. Otherwise, the specified code point is * returned unchanged. * * @param codePoint * the code point to convert. * @return if {@code codePoint} is a lower case character then its upper * case counterpart, otherwise just {@code codePoint}. */ public static int toUpperCase(int codePoint) { // Optimized case for ASCII if ('a' <= codePoint && codePoint <= 'z') { return (char) (codePoint - ('a' - 'A')); } if (codePoint < 181) { return codePoint; } return toUpperCaseImpl(codePoint); } private static native int toUpperCaseImpl(int codePoint); }