// This file was generated AUTOMATICALLY from a template file Fri Jun 20 12:51:09 PDT 2003 /* @(#)Character.java.template 1.7 03/01/13 * * Copyright 1990-2008 Sun Microsystems, Inc. All Rights Reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version * 2 only, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License version 2 for more details (a copy is * included at /legal/license.txt). * * You should have received a copy of the GNU General Public License * version 2 along with this work; if not, write to the Free Software * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA * 02110-1301 USA * * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa * Clara, CA 95054 or visit www.sun.com if you need additional * information or have any questions. * */ package java.lang; /** * The <code>Character</code> class wraps a value of the primitive * type <code>char</code> in an object. An object of type * <code>Character</code> contains a single field whose type is * <code>char</code>. * <p> * In addition, this class provides several methods for determining * a character's category (lowercase letter, digit, etc.) and for converting * characters from uppercase to lowercase and vice versa. * <p> * Character information is based on the Unicode Standard, version 3.0. * <p> * The methods and data of class <code>Character</code> are defined by * the information in the <i>UnicodeData</i> file that is part of the * Unicode Character Database maintained by the Unicode * Consortium. This file specifies various properties including name * and general category for every defined Unicode code point or * character range. * <p> * The file and its description are available from the Unicode Consortium at: * <ul> * <li><a href="http://www.unicode.org">http://www.unicode.org</a> * </ul> * * @author Lee Boynton * @author Guy Steele * @author Akira Tanaka * @since 1.0 */ public final class Character extends Object implements java.io.Serializable, Comparable { /** * The minimum radix available for conversion to and from strings. * The constant value of this field is the smallest value permitted * for the radix argument in radix-conversion methods such as the * <code>digit</code> method, the <code>forDigit</code> * method, and the <code>toString</code> method of class * <code>Integer</code>. * * @see java.lang.Character#digit(char, int) * @see java.lang.Character#forDigit(int, int) * @see java.lang.Integer#toString(int, int) * @see java.lang.Integer#valueOf(java.lang.String) */ public static final int MIN_RADIX = 2; /** * The maximum radix available for conversion to and from strings. * The constant value of this field is the largest value permitted * for the radix argument in radix-conversion methods such as the * <code>digit</code> method, the <code>forDigit</code> * method, and the <code>toString</code> method of class * <code>Integer</code>. * * @see java.lang.Character#digit(char, int) * @see java.lang.Character#forDigit(int, int) * @see java.lang.Integer#toString(int, int) * @see java.lang.Integer#valueOf(java.lang.String) */ public static final int MAX_RADIX = 36; /** * The constant value of this field is the smallest value of type * <code>char</code>, <code>'\u0000'</code>. * * @since 1.0.2 */ public static final char MIN_VALUE = '\u0000'; /** * The constant value of this field is the largest value of type * <code>char</code>, <code>'\uFFFF'</code>. * * @since 1.0.2 */ public static final char MAX_VALUE = '\uffff'; /** * The <code>Class</code> instance representing the primitive type * <code>char</code>. * * @since 1.1 */ public static final Class TYPE = Class.getPrimitiveClass("char"); /* * Normative general types */ /* * General character types */ /** * General category "Cn" in the Unicode specification. * @since 1.1 */ public static final byte UNASSIGNED = 0; /** * General category "Lu" in the Unicode specification. * @since 1.1 */ public static final byte UPPERCASE_LETTER = 1; /** * General category "Ll" in the Unicode specification. * @since 1.1 */ public static final byte LOWERCASE_LETTER = 2; /** * General category "Lt" in the Unicode specification. * @since 1.1 */ public static final byte TITLECASE_LETTER = 3; /** * General category "Lm" in the Unicode specification. * @since 1.1 */ public static final byte MODIFIER_LETTER = 4; /** * General category "Lo" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_LETTER = 5; /** * General category "Mn" in the Unicode specification. * @since 1.1 */ public static final byte NON_SPACING_MARK = 6; /** * General category "Me" in the Unicode specification. * @since 1.1 */ public static final byte ENCLOSING_MARK = 7; /** * General category "Mc" in the Unicode specification. * @since 1.1 */ public static final byte COMBINING_SPACING_MARK = 8; /** * General category "Nd" in the Unicode specification. * @since 1.1 */ public static final byte DECIMAL_DIGIT_NUMBER = 9; /** * General category "Nl" in the Unicode specification. * @since 1.1 */ public static final byte LETTER_NUMBER = 10; /** * General category "No" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_NUMBER = 11; /** * General category "Zs" in the Unicode specification. * @since 1.1 */ public static final byte SPACE_SEPARATOR = 12; /** * General category "Zl" in the Unicode specification. * @since 1.1 */ public static final byte LINE_SEPARATOR = 13; /** * General category "Zp" in the Unicode specification. * @since 1.1 */ public static final byte PARAGRAPH_SEPARATOR = 14; /** * General category "Cc" in the Unicode specification. * @since 1.1 */ public static final byte CONTROL = 15; /** * General category "Cf" in the Unicode specification. * @since 1.1 */ public static final byte FORMAT = 16; /** * General category "Co" in the Unicode specification. * @since 1.1 */ public static final byte PRIVATE_USE = 18; /** * General category "Cs" in the Unicode specification. * @since 1.1 */ public static final byte SURROGATE = 19; /** * General category "Pd" in the Unicode specification. * @since 1.1 */ public static final byte DASH_PUNCTUATION = 20; /** * General category "Ps" in the Unicode specification. * @since 1.1 */ public static final byte START_PUNCTUATION = 21; /** * General category "Pe" in the Unicode specification. * @since 1.1 */ public static final byte END_PUNCTUATION = 22; /** * General category "Pc" in the Unicode specification. * @since 1.1 */ public static final byte CONNECTOR_PUNCTUATION = 23; /** * General category "Po" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_PUNCTUATION = 24; /** * General category "Sm" in the Unicode specification. * @since 1.1 */ public static final byte MATH_SYMBOL = 25; /** * General category "Sc" in the Unicode specification. * @since 1.1 */ public static final byte CURRENCY_SYMBOL = 26; /** * General category "Sk" in the Unicode specification. * @since 1.1 */ public static final byte MODIFIER_SYMBOL = 27; /** * General category "So" in the Unicode specification. * @since 1.1 */ public static final byte OTHER_SYMBOL = 28; /** * General category "Pi" in the Unicode specification. * @since 1.4 */ public static final byte INITIAL_QUOTE_PUNCTUATION = 29; /** * General category "Pf" in the Unicode specification. * @since 1.4 */ public static final byte FINAL_QUOTE_PUNCTUATION = 30; /** * Error or non-char flag * @since 1.4 */ static final char CHAR_ERROR = '\uFFFF'; /** * Undefined bidirectional character type. Undefined <code>char</code> * values have undefined directionality in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_UNDEFINED = -1; /** * Strong bidirectional character type "L" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT = 0; /** * Strong bidirectional character type "R" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT = 1; /** * Strong bidirectional character type "AL" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC = 2; /** * Weak bidirectional character type "EN" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER = 3; /** * Weak bidirectional character type "ES" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR = 4; /** * Weak bidirectional character type "ET" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR = 5; /** * Weak bidirectional character type "AN" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_ARABIC_NUMBER = 6; /** * Weak bidirectional character type "CS" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_COMMON_NUMBER_SEPARATOR = 7; /** * Weak bidirectional character type "NSM" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_NONSPACING_MARK = 8; /** * Weak bidirectional character type "BN" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_BOUNDARY_NEUTRAL = 9; /** * Neutral bidirectional character type "B" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_PARAGRAPH_SEPARATOR = 10; /** * Neutral bidirectional character type "S" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_SEGMENT_SEPARATOR = 11; /** * Neutral bidirectional character type "WS" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_WHITESPACE = 12; /** * Neutral bidirectional character type "ON" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_OTHER_NEUTRALS = 13; /** * Strong bidirectional character type "LRE" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING = 14; /** * Strong bidirectional character type "LRO" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE = 15; /** * Strong bidirectional character type "RLE" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING = 16; /** * Strong bidirectional character type "RLO" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE = 17; /** * Weak bidirectional character type "PDF" in the Unicode specification. * @since 1.4 */ public static final byte DIRECTIONALITY_POP_DIRECTIONAL_FORMAT = 18; // Maximum character handled by internal fast-path code which // avoids initializing large tables. // Note: performance of this "fast-path" code may be sub-optimal // in negative cases for some accessors due to complicated ranges. // Should revisit after optimization of table initialization. private static final int FAST_PATH_MAX = 255; /** * Instances of this class represent particular subsets of the Unicode * character set. The only family of subsets defined in the * <code>Character</code> class is <code>{@link Character.UnicodeBlock * UnicodeBlock}</code>. Other portions of the Java API may define other * subsets for their own purposes. * * @since 1.2 */ public static class Subset { private String name; /** * Constructs a new <code>Subset</code> instance. * * @exception NullPointerException if name is <code>null</code> * @param name The name of this subset */ protected Subset(String name) { if (name == null) { throw new NullPointerException("name"); } this.name = name; } /** * Compares two <code>Subset</code> objects for equality. * This method returns <code>true</code> if and only if * <code>this</code> and the argument refer to the same * object; since this method is <code>final</code>, this * guarantee holds for all subclasses. */ public final boolean equals(Object obj) { return (this == obj); } /** * Returns the standard hash code as defined by the * <code>{@link Object#hashCode}</code> method. This method * is <code>final</code> in order to ensure that the * <code>equals</code> and <code>hashCode</code> methods will * be consistent in all subclasses. */ public final int hashCode() { return super.hashCode(); } /** * Returns the name of this subset. */ public final String toString() { return name; } } /** * A family of character subsets representing the character blocks in the * Unicode specification. Character blocks generally define characters * used for a specific script or purpose. A character is contained by * at most one Unicode block. * * @since 1.2 */ public static final class UnicodeBlock extends Subset { private UnicodeBlock(String name) { super(name); } /** * Constant for the Unicode character block of the same name. */ public static final UnicodeBlock BASIC_LATIN = new UnicodeBlock("BASIC_LATIN"), LATIN_1_SUPPLEMENT = new UnicodeBlock("LATIN_1_SUPPLEMENT"), LATIN_EXTENDED_A = new UnicodeBlock("LATIN_EXTENDED_A"), LATIN_EXTENDED_B = new UnicodeBlock("LATIN_EXTENDED_B"), IPA_EXTENSIONS = new UnicodeBlock("IPA_EXTENSIONS"), SPACING_MODIFIER_LETTERS = new UnicodeBlock("SPACING_MODIFIER_LETTERS"), COMBINING_DIACRITICAL_MARKS = new UnicodeBlock("COMBINING_DIACRITICAL_MARKS"), GREEK = new UnicodeBlock("GREEK"), CYRILLIC = new UnicodeBlock("CYRILLIC"), ARMENIAN = new UnicodeBlock("ARMENIAN"), HEBREW = new UnicodeBlock("HEBREW"), ARABIC = new UnicodeBlock("ARABIC"), DEVANAGARI = new UnicodeBlock("DEVANAGARI"), BENGALI = new UnicodeBlock("BENGALI"), GURMUKHI = new UnicodeBlock("GURMUKHI"), GUJARATI = new UnicodeBlock("GUJARATI"), ORIYA = new UnicodeBlock("ORIYA"), TAMIL = new UnicodeBlock("TAMIL"), TELUGU = new UnicodeBlock("TELUGU"), KANNADA = new UnicodeBlock("KANNADA"), MALAYALAM = new UnicodeBlock("MALAYALAM"), THAI = new UnicodeBlock("THAI"), LAO = new UnicodeBlock("LAO"), TIBETAN = new UnicodeBlock("TIBETAN"), GEORGIAN = new UnicodeBlock("GEORGIAN"), HANGUL_JAMO = new UnicodeBlock("HANGUL_JAMO"), LATIN_EXTENDED_ADDITIONAL = new UnicodeBlock("LATIN_EXTENDED_ADDITIONAL"), GREEK_EXTENDED = new UnicodeBlock("GREEK_EXTENDED"), GENERAL_PUNCTUATION = new UnicodeBlock("GENERAL_PUNCTUATION"), SUPERSCRIPTS_AND_SUBSCRIPTS = new UnicodeBlock("SUPERSCRIPTS_AND_SUBSCRIPTS"), CURRENCY_SYMBOLS = new UnicodeBlock("CURRENCY_SYMBOLS"), COMBINING_MARKS_FOR_SYMBOLS = new UnicodeBlock("COMBINING_MARKS_FOR_SYMBOLS"), LETTERLIKE_SYMBOLS = new UnicodeBlock("LETTERLIKE_SYMBOLS"), NUMBER_FORMS = new UnicodeBlock("NUMBER_FORMS"), ARROWS = new UnicodeBlock("ARROWS"), MATHEMATICAL_OPERATORS = new UnicodeBlock("MATHEMATICAL_OPERATORS"), MISCELLANEOUS_TECHNICAL = new UnicodeBlock("MISCELLANEOUS_TECHNICAL"), CONTROL_PICTURES = new UnicodeBlock("CONTROL_PICTURES"), OPTICAL_CHARACTER_RECOGNITION = new UnicodeBlock("OPTICAL_CHARACTER_RECOGNITION"), ENCLOSED_ALPHANUMERICS = new UnicodeBlock("ENCLOSED_ALPHANUMERICS"), BOX_DRAWING = new UnicodeBlock("BOX_DRAWING"), BLOCK_ELEMENTS = new UnicodeBlock("BLOCK_ELEMENTS"), GEOMETRIC_SHAPES = new UnicodeBlock("GEOMETRIC_SHAPES"), MISCELLANEOUS_SYMBOLS = new UnicodeBlock("MISCELLANEOUS_SYMBOLS"), DINGBATS = new UnicodeBlock("DINGBATS"), CJK_SYMBOLS_AND_PUNCTUATION = new UnicodeBlock("CJK_SYMBOLS_AND_PUNCTUATION"), HIRAGANA = new UnicodeBlock("HIRAGANA"), KATAKANA = new UnicodeBlock("KATAKANA"), BOPOMOFO = new UnicodeBlock("BOPOMOFO"), HANGUL_COMPATIBILITY_JAMO = new UnicodeBlock("HANGUL_COMPATIBILITY_JAMO"), KANBUN = new UnicodeBlock("KANBUN"), ENCLOSED_CJK_LETTERS_AND_MONTHS = new UnicodeBlock("ENCLOSED_CJK_LETTERS_AND_MONTHS"), CJK_COMPATIBILITY = new UnicodeBlock("CJK_COMPATIBILITY"), CJK_UNIFIED_IDEOGRAPHS = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS"), HANGUL_SYLLABLES = new UnicodeBlock("HANGUL_SYLLABLES"), SURROGATES_AREA = new UnicodeBlock("SURROGATES_AREA"), PRIVATE_USE_AREA = new UnicodeBlock("PRIVATE_USE_AREA"), CJK_COMPATIBILITY_IDEOGRAPHS = new UnicodeBlock("CJK_COMPATIBILITY_IDEOGRAPHS"), ALPHABETIC_PRESENTATION_FORMS = new UnicodeBlock("ALPHABETIC_PRESENTATION_FORMS"), ARABIC_PRESENTATION_FORMS_A = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_A"), COMBINING_HALF_MARKS = new UnicodeBlock("COMBINING_HALF_MARKS"), CJK_COMPATIBILITY_FORMS = new UnicodeBlock("CJK_COMPATIBILITY_FORMS"), SMALL_FORM_VARIANTS = new UnicodeBlock("SMALL_FORM_VARIANTS"), ARABIC_PRESENTATION_FORMS_B = new UnicodeBlock("ARABIC_PRESENTATION_FORMS_B"), HALFWIDTH_AND_FULLWIDTH_FORMS = new UnicodeBlock("HALFWIDTH_AND_FULLWIDTH_FORMS"), SPECIALS = new UnicodeBlock("SPECIALS"); /** * Constant for the Unicode character block of the same name. * * @since 1.4 */ public static final UnicodeBlock SYRIAC = new UnicodeBlock("SYRIAC"), THAANA = new UnicodeBlock("THAANA"), SINHALA = new UnicodeBlock("SINHALA"), MYANMAR = new UnicodeBlock("MYANMAR"), ETHIOPIC = new UnicodeBlock("ETHIOPIC"), CHEROKEE = new UnicodeBlock("CHEROKEE"), UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS = new UnicodeBlock("UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS"), OGHAM = new UnicodeBlock("OGHAM"), RUNIC = new UnicodeBlock("RUNIC"), KHMER = new UnicodeBlock("KHMER"), MONGOLIAN = new UnicodeBlock("MONGOLIAN"), BRAILLE_PATTERNS = new UnicodeBlock("BRAILLE_PATTERNS"), CJK_RADICALS_SUPPLEMENT = new UnicodeBlock("CJK_RADICALS_SUPPLEMENT"), KANGXI_RADICALS = new UnicodeBlock("KANGXI_RADICALS"), IDEOGRAPHIC_DESCRIPTION_CHARACTERS = new UnicodeBlock("IDEOGRAPHIC_DESCRIPTION_CHARACTERS"), BOPOMOFO_EXTENDED = new UnicodeBlock("BOPOMOFO_EXTENDED"), CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A = new UnicodeBlock("CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A"), YI_SYLLABLES = new UnicodeBlock("YI_SYLLABLES"), YI_RADICALS = new UnicodeBlock("YI_RADICALS"); private static final char blockStarts[] = { '\u0000', // Basic Latin '\u0080', // Latin-1 Supplement '\u0100', // Latin Extended-A '\u0180', // Latin Extended-B '\u0250', // IPA Extensions '\u02B0', // Spacing Modifier Letters '\u0300', // Combining Diacritical Marks '\u0370', // Greek '\u0400', // Cyrillic '\u0500', // unassigned '\u0530', // Armenian '\u0590', // Hebrew '\u0600', // Arabic '\u0700', // Syriac '\u0750', // unassigned '\u0780', // Thaana '\u07C0', // unassigned '\u0900', // Devanagari '\u0980', // Bengali '\u0A00', // Gurmukhi '\u0A80', // Gujarati '\u0B00', // Oriya '\u0B80', // Tamil '\u0C00', // Telugu '\u0C80', // Kannada '\u0D00', // Malayalam '\u0D80', // Sinhala '\u0E00', // Thai '\u0E80', // Lao '\u0F00', // Tibetan '\u1000', // Myanmar '\u10A0', // Georgian '\u1100', // Hangul Jamo '\u1200', // Ethiopic '\u1380', // unassigned '\u13A0', // Cherokee '\u1400', // Unified Canadian Aboriginal Syllabics '\u1680', // Ogham '\u16A0', // Runic '\u1700', // unassigned '\u1780', // Khmer '\u1800', // Mongolian '\u18B0', // unassigned '\u1E00', // Latin Extended Additional '\u1F00', // Greek Extended '\u2000', // General Punctuation '\u2070', // Superscripts and Subscripts '\u20A0', // Currency Symbols '\u20D0', // Combining Marks for Symbols '\u2100', // Letterlike Symbols '\u2150', // Number Forms '\u2190', // Arrows '\u2200', // Mathematical Operators '\u2300', // Miscellaneous Technical '\u2400', // Control Pictures '\u2440', // Optical Character Recognition '\u2460', // Enclosed Alphanumerics '\u2500', // Box Drawing '\u2580', // Block Elements '\u25A0', // Geometric Shapes '\u2600', // Miscellaneous Symbols '\u2700', // Dingbats '\u27C0', // unassigned '\u2800', // Braille Patterns '\u2900', // unassigned '\u2E80', // CJK Radicals Supplement '\u2F00', // Kangxi Radicals '\u2FE0', // unassigned '\u2FF0', // Ideographic Description Characters '\u3000', // CJK Symbols and Punctuation '\u3040', // Hiragana '\u30A0', // Katakana '\u3100', // Bopomofo '\u3130', // Hangul Compatibility Jamo '\u3190', // Kanbun '\u31A0', // Bopomofo Extended '\u31C0', // unassigned '\u3200', // Enclosed CJK Letters and Months '\u3300', // CJK Compatibility '\u3400', // CJK Unified Ideographs Extension A '\u4DB6', // unassigned '\u4E00', // CJK Unified Ideographs '\uA000', // Yi Syllables '\uA490', // Yi Radicals '\uA4D0', // unassigned '\uAC00', // Hangul Syllables '\uD7A4', // unassigned '\uD800', // Surrogates '\uE000', // Private Use '\uF900', // CJK Compatibility Ideographs '\uFB00', // Alphabetic Presentation Forms '\uFB50', // Arabic Presentation Forms-A '\uFE00', // unassigned '\uFE20', // Combining Half Marks '\uFE30', // CJK Compatibility Forms '\uFE50', // Small Form Variants '\uFE70', // Arabic Presentation Forms-B '\uFEFF', // Specials '\uFF00', // Halfwidth and Fullwidth Forms '\uFFF0', // Specials '\uFFFE', // non-characters }; private static final UnicodeBlock[] blocks = { BASIC_LATIN, LATIN_1_SUPPLEMENT, LATIN_EXTENDED_A, LATIN_EXTENDED_B, IPA_EXTENSIONS, SPACING_MODIFIER_LETTERS, COMBINING_DIACRITICAL_MARKS, GREEK, CYRILLIC, null, ARMENIAN, HEBREW, ARABIC, SYRIAC, null, THAANA, null, DEVANAGARI, BENGALI, GURMUKHI, GUJARATI, ORIYA, TAMIL, TELUGU, KANNADA, MALAYALAM, SINHALA, THAI, LAO, TIBETAN, MYANMAR, GEORGIAN, HANGUL_JAMO, ETHIOPIC, null, CHEROKEE, UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS, OGHAM, RUNIC, null, KHMER, MONGOLIAN, null, LATIN_EXTENDED_ADDITIONAL, GREEK_EXTENDED, GENERAL_PUNCTUATION, SUPERSCRIPTS_AND_SUBSCRIPTS, CURRENCY_SYMBOLS, COMBINING_MARKS_FOR_SYMBOLS, LETTERLIKE_SYMBOLS, NUMBER_FORMS, ARROWS, MATHEMATICAL_OPERATORS, MISCELLANEOUS_TECHNICAL, CONTROL_PICTURES, OPTICAL_CHARACTER_RECOGNITION, ENCLOSED_ALPHANUMERICS, BOX_DRAWING, BLOCK_ELEMENTS, GEOMETRIC_SHAPES, MISCELLANEOUS_SYMBOLS, DINGBATS, null, BRAILLE_PATTERNS, null, CJK_RADICALS_SUPPLEMENT, KANGXI_RADICALS, null, IDEOGRAPHIC_DESCRIPTION_CHARACTERS, CJK_SYMBOLS_AND_PUNCTUATION, HIRAGANA, KATAKANA, BOPOMOFO, HANGUL_COMPATIBILITY_JAMO, KANBUN, BOPOMOFO_EXTENDED, null, ENCLOSED_CJK_LETTERS_AND_MONTHS, CJK_COMPATIBILITY, CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A, null, CJK_UNIFIED_IDEOGRAPHS, YI_SYLLABLES, YI_RADICALS, null, HANGUL_SYLLABLES, null, SURROGATES_AREA, PRIVATE_USE_AREA, CJK_COMPATIBILITY_IDEOGRAPHS, ALPHABETIC_PRESENTATION_FORMS, ARABIC_PRESENTATION_FORMS_A, null, COMBINING_HALF_MARKS, CJK_COMPATIBILITY_FORMS, SMALL_FORM_VARIANTS, ARABIC_PRESENTATION_FORMS_B, SPECIALS, HALFWIDTH_AND_FULLWIDTH_FORMS, SPECIALS, null, }; /** * Returns the object representing the Unicode block containing the * given character, or <code>null</code> if the character is not a * member of a defined block. * * @param c The character in question * @return The <code>UnicodeBlock</code> instance representing the * Unicode block of which this character is a member, or * <code>null</code> if the character is not a member of any * Unicode block */ public static UnicodeBlock of(char c) { int top, bottom, current; bottom = 0; top = blockStarts.length; current = top/2; // invariant: top > current >= bottom && ch >= unicodeBlockStarts[bottom] while (top - bottom > 1) { if (c >= blockStarts[current]) { bottom = current; } else { top = current; } current = (top + bottom) / 2; } return blocks[current]; } } /** * The value of the <code>Character</code>. * * @serial */ private char value; /** use serialVersionUID from JDK 1.0.2 for interoperability */ private static final long serialVersionUID = 3786198910865385080L; /** * Constructs a newly allocated <code>Character</code> object that * represents the specified <code>char</code> value. * * @param value the value to be represented by the * <code>Character</code> object. */ public Character(char value) { this.value = value; } /** * Returns the value of this <code>Character</code> object. * @return the primitive <code>char</code> value represented by * this object. */ public char charValue() { return value; } /** * Returns a hash code for this <code>Character</code>. * @return a hash code value for this object. */ public int hashCode() { return (int)value; } /** * Compares this object against the specified object. * The result is <code>true</code> if and only if the argument is not * <code>null</code> and is a <code>Character</code> object that * represents the same <code>char</code> value as this object. * * @param obj the object to compare with. * @return <code>true</code> if the objects are the same; * <code>false</code> otherwise. */ public boolean equals(Object obj) { if (obj instanceof Character) { return value == ((Character)obj).charValue(); } return false; } /** * Returns a <code>String</code> object representing this * <code>Character</code>'s value. The result is a string of * length 1 whose sole component is the primitive * <code>char</code> value represented by this * <code>Character</code> object. * * @return a string representation of this object. */ public String toString() { char buf[] = {value}; return String.valueOf(buf); } /** * Returns a <code>String</code> object representing the * specified <code>char</code>. The result is a string of length * 1 consisting solely of the specified <code>char</code>. * * @param c the <code>char</code> to be converted * @return the string representation of the specified <code>char</code> * @since 1.4 */ public static String toString(char c) { return String.valueOf(c); } /** * Determines if the specified character is a lowercase character. * <p> * A character is lowercase if its general category type, provided * by <code>Character.getType(ch)</code>, is * <code>LOWERCASE_LETTER</code>. * <p> * The following are examples of lowercase characters: * <p><blockquote><pre> * a b c d e f g h i j k l m n o p q r s t u v w x y z * '\u00DF' '\u00E0' '\u00E1' '\u00E2' '\u00E3' '\u00E4' '\u00E5' '\u00E6' * '\u00E7' '\u00E8' '\u00E9' '\u00EA' '\u00EB' '\u00EC' '\u00ED' '\u00EE' * '\u00EF' '\u00F0' '\u00F1' '\u00F2' '\u00F3' '\u00F4' '\u00F5' '\u00F6' * '\u00F8' '\u00F9' '\u00FA' '\u00FB' '\u00FC' '\u00FD' '\u00FE' '\u00FF' * </pre></blockquote> * <p> Many other Unicode characters are lowercase too. * <p> * * @param ch the character to be tested. * @return <code>true</code> if the character is lowercase; * <code>false</code> otherwise. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#getType(char) */ public static boolean isLowerCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isLowerCase(ch); } else { return CharacterData.isLowerCase(ch); } } /** * Determines if the specified character is an uppercase character. * <p> * A character is uppercase if its general category type, provided by * <code>Character.getType(ch)</code>, is <code>UPPERCASE_LETTER</code>. * <p> * The following are examples of uppercase characters: * <p><blockquote><pre> * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z * '\u00C0' '\u00C1' '\u00C2' '\u00C3' '\u00C4' '\u00C5' '\u00C6' '\u00C7' * '\u00C8' '\u00C9' '\u00CA' '\u00CB' '\u00CC' '\u00CD' '\u00CE' '\u00CF' * '\u00D0' '\u00D1' '\u00D2' '\u00D3' '\u00D4' '\u00D5' '\u00D6' '\u00D8' * '\u00D9' '\u00DA' '\u00DB' '\u00DC' '\u00DD' '\u00DE' * </pre></blockquote> * <p> Many other Unicode characters are uppercase too.<p> * * @param ch the character to be tested. * @return <code>true</code> if the character is uppercase; * <code>false</code> otherwise. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#toUpperCase(char) * @see java.lang.Character#getType(char) * @since 1.0 */ public static boolean isUpperCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isUpperCase(ch); } else { return CharacterData.isUpperCase(ch); } } /** * Determines if the specified character is a titlecase character. * <p> * A character is a titlecase character if its general * category type, provided by <code>Character.getType(ch)</code>, * is <code>TITLECASE_LETTER</code>. * <p> * Some characters look like pairs of Latin letters. For example, there * is an uppercase letter that looks like "LJ" and has a corresponding * lowercase letter that looks like "lj". A third form, which looks like "Lj", * is the appropriate form to use when rendering a word in lowercase * with initial capitals, as for a book title. * <p> * These are some of the Unicode characters for which this method returns * <code>true</code>: * <ul> * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code> * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code> * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code> * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code> * </ul> * <p> Many other Unicode characters are titlecase too.<p> * * @param ch the character to be tested. * @return <code>true</code> if the character is titlecase; * <code>false</code> otherwise. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toTitleCase(char) * @see java.lang.Character#getType(char) * @since 1.0.2 */ public static boolean isTitleCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isTitleCase(ch); } else { return CharacterData.isTitleCase(ch); } } /** * Determines if the specified character is a digit. * <p> * A character is a digit if its general category type, provided * by <code>Character.getType(ch)</code>, is * <code>DECIMAL_DIGIT_NUMBER</code>. * <p> * Some Unicode character ranges that contain digits: * <ul> * <li><code>'\u0030'</code> through <code>'\u0039'</code>, * ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>) * <li><code>'\u0660'</code> through <code>'\u0669'</code>, * Arabic-Indic digits * <li><code>'\u06F0'</code> through <code>'\u06F9'</code>, * Extended Arabic-Indic digits * <li><code>'\u0966'</code> through <code>'\u096F'</code>, * Devanagari digits * <li><code>'\uFF10'</code> through <code>'\uFF19'</code>, * Fullwidth digits * </ul> * * Many other character ranges contain digits as well. * * @param ch the character to be tested. * @return <code>true</code> if the character is a digit; * <code>false</code> otherwise. * @see java.lang.Character#digit(char, int) * @see java.lang.Character#forDigit(int, int) * @see java.lang.Character#getType(char) */ public static boolean isDigit(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isDigit(ch); } else { return CharacterData.isDigit(ch); } } /** * Determines if a character is defined in Unicode. * <p> * A character is defined if at least one of the following is true: * <ul> * <li>It has an entry in the UnicodeData file. * <li>It has a value in a range defined by the UnicodeData file. * </ul> * * @param ch the character to be tested * @return <code>true</code> if the character has a defined meaning * in Unicode; <code>false</code> otherwise. * @see java.lang.Character#isDigit(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#isUpperCase(char) * @since 1.0.2 */ public static boolean isDefined(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isDefined(ch); } else { return CharacterData.isDefined(ch); } } /** * Determines if the specified character is a letter. * <p> * A character is considered to be a letter if its general * category type, provided by <code>Character.getType(ch)</code>, * is any of the following: * <ul> * <li> <code>UPPERCASE_LETTER</code> * <li> <code>LOWERCASE_LETTER</code> * <li> <code>TITLECASE_LETTER</code> * <li> <code>MODIFIER_LETTER</code> * <li> <code>OTHER_LETTER</code> * </ul> * * Not all letters have case. Many characters are * letters but are neither uppercase nor lowercase nor titlecase. * * @param ch the character to be tested. * @return <code>true</code> if the character is a letter; * <code>false</code> otherwise. * @see java.lang.Character#isDigit(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isJavaLetter(char) * @see java.lang.Character#isJavaLetterOrDigit(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @see java.lang.Character#isUpperCase(char) */ public static boolean isLetter(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isLetter(ch); } else { return CharacterData.isLetter(ch); } } /** * Determines if the specified character is a letter or digit. * <p> * A character is considered to be a letter or digit if either * <code>Character.isLetter(char ch)</code> or * <code>Character.isDigit(char ch)</code> returns * <code>true</code> for the character. * * @param ch the character to be tested. * @return <code>true</code> if the character is a letter or digit; * <code>false</code> otherwise. * @see java.lang.Character#isDigit(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isJavaLetter(char) * @see java.lang.Character#isJavaLetterOrDigit(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.0.2 */ public static boolean isLetterOrDigit(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isLetterOrDigit(ch); } else { return CharacterData.isLetterOrDigit(ch); } } /** * Determines if the specified character is permissible as the first * character in a Java identifier. * <p> * A character may start a Java identifier if and only if * one of the following is true: * <ul> * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> * <li> ch is a currency symbol (such as "$") * <li> ch is a connecting punctuation character (such as "_"). * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character may start a Java * identifier; <code>false</code> otherwise. * @see java.lang.Character#isJavaLetterOrDigit(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @since 1.02 * deprecated Replaced by isJavaIdentifierStart(char). */ /* * public static boolean isJavaLetter(char ch) { * return isJavaIdentifierStart(ch); * } */ /** * Determines if the specified character may be part of a Java * identifier as other than the first character. * <p> * A character may be part of a Java identifier if and only if any * of the following are true: * <ul> * <li> it is a letter * <li> it is a currency symbol (such as <code>'$'</code>) * <li> it is a connecting punctuation character (such as <code>'_'</code>) * <li> it is a digit * <li> it is a numeric letter (such as a Roman numeral character) * <li> it is a combining mark * <li> it is a non-spacing mark * <li> <code>isIdentifierIgnorable</code> returns * <code>true</code> for the character. * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character may be part of a * Java identifier; <code>false</code> otherwise. * @see java.lang.Character#isJavaLetter(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @see java.lang.Character#isIdentifierIgnorable(char) * @since 1.02 * deprecated Replaced by isJavaIdentifierPart(char). */ /* * public static boolean isJavaLetterOrDigit(char ch) { * * return isJavaIdentifierPart(ch); * } */ /** * Determines if the specified character is * permissible as the first character in a Java identifier. * <p> * A character may start a Java identifier if and only if * one of the following conditions is true: * <ul> * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> * <li> {@link #getType(char) getType(ch)} returns <code>LETTER_NUMBER</code> * <li> ch is a currency symbol (such as "$") * <li> ch is a connecting punctuation character (such as "_"). * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character may start a Java identifier; * <code>false</code> otherwise. * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @since 1.1 */ public static boolean isJavaIdentifierStart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isJavaIdentifierStart(ch); } else { return CharacterData.isJavaIdentifierStart(ch); } } /** * Determines if the specified character may be part of a Java * identifier as other than the first character. * <p> * A character may be part of a Java identifier if any of the following * are true: * <ul> * <li> it is a letter * <li> it is a currency symbol (such as <code>'$'</code>) * <li> it is a connecting punctuation character (such as <code>'_'</code>) * <li> it is a digit * <li> it is a numeric letter (such as a Roman numeral character) * <li> it is a combining mark * <li> it is a non-spacing mark * <li> <code>isIdentifierIgnorable</code> returns * <code>true</code> for the character * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character may be part of a * Java identifier; <code>false</code> otherwise. * @see java.lang.Character#isIdentifierIgnorable(char) * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.1 */ public static boolean isJavaIdentifierPart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isJavaIdentifierPart(ch); } else { return CharacterData.isJavaIdentifierPart(ch); } } /** * Determines if the specified character is permissible as the * first character in a Unicode identifier. * <p> * A character may start a Unicode identifier if and only if * one of the following conditions is true: * <ul> * <li> {@link #isLetter(char) isLetter(ch)} returns <code>true</code> * <li> {@link #getType(char) getType(ch)} returns * <code>LETTER_NUMBER</code>. * </ul> * @param ch the character to be tested. * @return <code>true</code> if the character may start a Unicode * identifier; <code>false</code> otherwise. * @see java.lang.Character#isJavaIdentifierStart(char) * @see java.lang.Character#isLetter(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.1 */ public static boolean isUnicodeIdentifierStart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isUnicodeIdentifierStart(ch); } else { return CharacterData.isUnicodeIdentifierStart(ch); } } /** * Determines if the specified character may be part of a Unicode * identifier as other than the first character. * <p> * A character may be part of a Unicode identifier if and only if * one of the following statements is true: * <ul> * <li> it is a letter * <li> it is a connecting punctuation character (such as <code>'_'</code>) * <li> it is a digit * <li> it is a numeric letter (such as a Roman numeral character) * <li> it is a combining mark * <li> it is a non-spacing mark * <li> <code>isIdentifierIgnorable</code> returns * <code>true</code> for this character. * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character may be part of a * Unicode identifier; <code>false</code> otherwise. * @see java.lang.Character#isIdentifierIgnorable(char) * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isLetterOrDigit(char) * @see java.lang.Character#isUnicodeIdentifierStart(char) * @since 1.1 */ public static boolean isUnicodeIdentifierPart(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isUnicodeIdentifierPart(ch); } else { return CharacterData.isUnicodeIdentifierPart(ch); } } /** * Determines if the specified character should be regarded as * an ignorable character in a Java identifier or a Unicode identifier. * <p> * The following Unicode characters are ignorable in a Java identifier * or a Unicode identifier: * <ul> * <li>ISO control characters that are not whitespace * <ul> * <li><code>'\u0000'</code> through <code>'\u0008'</code> * <li><code>'\u000E'</code> through <code>'\u001B'</code> * <li><code>'\u007F'</code> through <code>'\u009F'</code> * </ul> * * <li>all characters that have the <code>FORMAT</code> general * category value * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character is an ignorable control * character that may be part of a Java or Unicode identifier; * <code>false</code> otherwise. * @see java.lang.Character#isJavaIdentifierPart(char) * @see java.lang.Character#isUnicodeIdentifierPart(char) * @since 1.1 */ public static boolean isIdentifierIgnorable(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isIdentifierIgnorable(ch); } else { return CharacterData.isIdentifierIgnorable(ch); } } /** * Converts the character argument to lowercase using case * mapping information from the UnicodeData file. * <p> * Note that * <code>Character.isLowerCase(Character.toLowerCase(ch))</code> * does not always return <code>true</code> for some ranges of * characters, particularly those that are symbols or ideographs. * * @param ch the character to be converted. * @return the lowercase equivalent of the character, if any; * otherwise, the character itself. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toTitleCase(char) * @see java.lang.Character#toUpperCase(char) */ public static char toLowerCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toLowerCase(ch); } else { return CharacterData.toLowerCase(ch); } } /** * Converts the character argument to uppercase using case mapping * information from the UnicodeData file. * <p> * Note that * <code>Character.isUpperCase(Character.toUpperCase(ch))</code> * does not always return <code>true</code> for some ranges of * characters, particularly those that are symbols or ideographs. * * @param ch the character to be converted. * @return the uppercase equivalent of the character, if any; * otherwise, the character itself. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#toTitleCase(char) */ public static char toUpperCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toUpperCase(ch); } else { return CharacterData.toUpperCase(ch); } } /** * Converts the character argument to titlecase using case mapping * information from the UnicodeData file. If a character has no * explicit titlecase mapping and is not itself a titlecase char * according to UnicodeData, then the uppercase mapping is * returned as an equivalent titlecase mapping. If the * <code>char</code> argument is already a titlecase * <code>char</code>, the same <code>char</code> value will be * returned. * <p> * Note that * <code>Character.isTitleCase(Character.toTitleCase(ch))</code> * does not always return <code>true</code> for some ranges of * characters. * * @param ch the character to be converted. * @return the titlecase equivalent of the character, if any; * otherwise, the character itself. * @see java.lang.Character#isTitleCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#toUpperCase(char) * @since 1.0.2 */ public static char toTitleCase(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toTitleCase(ch); } else { return CharacterData.toTitleCase(ch); } } /** * Returns the numeric value of the character <code>ch</code> in the * specified radix. * <p> * If the radix is not in the range <code>MIN_RADIX</code> <= * <code>radix</code> <= <code>MAX_RADIX</code> or if the * value of <code>ch</code> is not a valid digit in the specified * radix, <code>-1</code> is returned. A character is a valid digit * if at least one of the following is true: * <ul> * <li>The method <code>isDigit</code> is <code>true</code> of the character * and the Unicode decimal digit value of the character (or its * single-character decomposition) is less than the specified radix. * In this case the decimal digit value is returned. * <li>The character is one of the uppercase Latin letters * <code>'A'</code> through <code>'Z'</code> and its code is less than * <code>radix + 'A' - 10</code>. * In this case, <code>ch - 'A' + 10</code> * is returned. * <li>The character is one of the lowercase Latin letters * <code>'a'</code> through <code>'z'</code> and its code is less than * <code>radix + 'a' - 10</code>. * In this case, <code>ch - 'a' + 10</code> * is returned. * </ul> * * @param ch the character to be converted. * @param radix the radix. * @return the numeric value represented by the character in the * specified radix. * @see java.lang.Character#forDigit(int, int) * @see java.lang.Character#isDigit(char) */ public static int digit(char ch, int radix) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.digit(ch, radix); } else { return CharacterData.digit(ch, radix); } } /** * Returns the <code>int</code> value that the specified Unicode * character represents. For example, the character * <code>'\u216C'</code> (the roman numeral fifty) will return * an int with a value of 50. * <p> * The letters A-Z in their uppercase (<code>'\u0041'</code> through * <code>'\u005A'</code>), lowercase * (<code>'\u0061'</code> through <code>'\u007A'</code>), and * full width variant (<code>'\uFF21'</code> through * <code>'\uFF3A'</code> and <code>'\uFF41'</code> through * <code>'\uFF5A'</code>) forms have numeric values from 10 * through 35. This is independent of the Unicode specification, * which does not assign numeric values to these <code>char</code> * values. * <p> * If the character does not have a numeric value, then -1 is returned. * If the character has a numeric value that cannot be represented as a * nonnegative integer (for example, a fractional value), then -2 * is returned. * * @param ch the character to be converted. * @return the numeric value of the character, as a nonnegative <code>int</code> * value; -2 if the character has a numeric value that is not a * nonnegative integer; -1 if the character has no numeric value. * @see java.lang.Character#forDigit(int, int) * @see java.lang.Character#isDigit(char) * @since 1.1 */ public static int getNumericValue(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.getNumericValue(ch); } else { return CharacterData.getNumericValue(ch); } } /** * Determines if the specified character is ISO-LATIN-1 white space. * This method returns <code>true</code> for the following five * characters only: * <table> * <tr><td><code>'\t'</code></td> <td><code>'\u0009'</code></td> * <td><code>HORIZONTAL TABULATION</code></td></tr> * <tr><td><code>'\n'</code></td> <td><code>'\u000A'</code></td> * <td><code>NEW LINE</code></td></tr> * <tr><td><code>'\f'</code></td> <td><code>'\u000C'</code></td> * <td><code>FORM FEED</code></td></tr> * <tr><td><code>'\r'</code></td> <td><code>'\u000D'</code></td> * <td><code>CARRIAGE RETURN</code></td></tr> * <tr><td><code>' '</code></td> <td><code>'\u0020'</code></td> * <td><code>SPACE</code></td></tr> * </table> * * @param ch the character to be tested. * @return <code>true</code> if the character is ISO-LATIN-1 white * space; <code>false</code> otherwise. * @see java.lang.Character#isSpaceChar(char) * @see java.lang.Character#isWhitespace(char) * deprecated Replaced by isWhitespace(char). */ /** * public static boolean isSpace(char ch) { * return (ch <= 0x0020) && * (((((1L << 0x0009) | * (1L << 0x000A) | * (1L << 0x000C) | * (1L << 0x000D) | * (1L << 0x0020)) >> ch) & 1L) != 0); * } */ /** * Determines if the specified character is a Unicode space character. * A character is considered to be a space character if and only if * it is specified to be a space character by the Unicode standard. This * method returns true if the character's general category type is any of * the following: * <ul> * <li> <code>SPACE_SEPARATOR</code> * <li> <code>LINE_SEPARATOR</code> * <li> <code>PARAGRAPH_SEPARATOR</code> * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character is a space character; * <code>false</code> otherwise. * @see java.lang.Character#isWhitespace(char) * @since 1.1 */ public static boolean isSpaceChar(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isSpaceChar(ch); } else { return CharacterData.isSpaceChar(ch); } } /** * Determines if the specified character is white space according to Java. * A character is a Java whitespace character if and only if it satisfies * one of the following criteria: * <ul> * <li> It is a Unicode space character (<code>SPACE_SEPARATOR</code>, * <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>) * but is not also a non-breaking space (<code>'\u00A0'</code>, * <code>'\u2007'</code>, <code>'\u202F'</code>). * <li> It is <code>'\u0009'</code>, HORIZONTAL TABULATION. * <li> It is <code>'\u000A'</code>, LINE FEED. * <li> It is <code>'\u000B'</code>, VERTICAL TABULATION. * <li> It is <code>'\u000C'</code>, FORM FEED. * <li> It is <code>'\u000D'</code>, CARRIAGE RETURN. * <li> It is <code>'\u001C'</code>, FILE SEPARATOR. * <li> It is <code>'\u001D'</code>, GROUP SEPARATOR. * <li> It is <code>'\u001E'</code>, RECORD SEPARATOR. * <li> It is <code>'\u001F'</code>, UNIT SEPARATOR. * </ul> * * @param ch the character to be tested. * @return <code>true</code> if the character is a Java whitespace * character; <code>false</code> otherwise. * @see java.lang.Character#isSpaceChar(char) * @since 1.1 */ public static boolean isWhitespace(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isWhitespace(ch); } else { return CharacterData.isWhitespace(ch); } } /** * Determines if the specified character is an ISO control * character. A character is considered to be an ISO control * character if its code is in the range <code>'\u0000'</code> * through <code>'\u001F'</code> or in the range * <code>'\u007F'</code> through <code>'\u009F'</code>. * * @param ch the character to be tested. * @return <code>true</code> if the character is an ISO control character; * <code>false</code> otherwise. * * @see java.lang.Character#isSpaceChar(char) * @see java.lang.Character#isWhitespace(char) * @since 1.1 */ public static boolean isISOControl(char ch) { return (ch <= 0x009F) && ((ch <= 0x001F) || (ch >= 0x007F)); } /** * Returns a value indicating a character's general category. * * @param ch the character to be tested. * @return a value of type <code>int</code> representing the * character's general category. * @see java.lang.Character#COMBINING_SPACING_MARK * @see java.lang.Character#CONNECTOR_PUNCTUATION * @see java.lang.Character#CONTROL * @see java.lang.Character#CURRENCY_SYMBOL * @see java.lang.Character#DASH_PUNCTUATION * @see java.lang.Character#DECIMAL_DIGIT_NUMBER * @see java.lang.Character#ENCLOSING_MARK * @see java.lang.Character#END_PUNCTUATION * @see java.lang.Character#FINAL_QUOTE_PUNCTUATION * @see java.lang.Character#FORMAT * @see java.lang.Character#INITIAL_QUOTE_PUNCTUATION * @see java.lang.Character#LETTER_NUMBER * @see java.lang.Character#LINE_SEPARATOR * @see java.lang.Character#LOWERCASE_LETTER * @see java.lang.Character#MATH_SYMBOL * @see java.lang.Character#MODIFIER_LETTER * @see java.lang.Character#MODIFIER_SYMBOL * @see java.lang.Character#NON_SPACING_MARK * @see java.lang.Character#OTHER_LETTER * @see java.lang.Character#OTHER_NUMBER * @see java.lang.Character#OTHER_PUNCTUATION * @see java.lang.Character#OTHER_SYMBOL * @see java.lang.Character#PARAGRAPH_SEPARATOR * @see java.lang.Character#PRIVATE_USE * @see java.lang.Character#SPACE_SEPARATOR * @see java.lang.Character#START_PUNCTUATION * @see java.lang.Character#SURROGATE * @see java.lang.Character#TITLECASE_LETTER * @see java.lang.Character#UNASSIGNED * @see java.lang.Character#UPPERCASE_LETTER * @since 1.1 */ public static int getType(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.getType(ch); } else { return CharacterData.getType(ch); } } /** * Determines the character representation for a specific digit in * the specified radix. If the value of <code>radix</code> is not a * valid radix, or the value of <code>digit</code> is not a valid * digit in the specified radix, the null character * (<code>'\u0000'</code>) is returned. * <p> * The <code>radix</code> argument is valid if it is greater than or * equal to <code>MIN_RADIX</code> and less than or equal to * <code>MAX_RADIX</code>. The <code>digit</code> argument is valid if * <code>0 <=digit < radix</code>. * <p> * If the digit is less than 10, then * <code>'0' + digit</code> is returned. Otherwise, the value * <code>'a' + digit - 10</code> is returned. * * @param digit the number to convert to a character. * @param radix the radix. * @return the <code>char</code> representation of the specified digit * in the specified radix. * @see java.lang.Character#MIN_RADIX * @see java.lang.Character#MAX_RADIX * @see java.lang.Character#digit(char, int) */ public static char forDigit(int digit, int radix) { if ((digit >= radix) || (digit < 0)) { return '\0'; } if ((radix < MIN_RADIX) || (radix > MAX_RADIX)) { return '\0'; } if (digit < 10) { return (char)('0' + digit); } return (char)('a' - 10 + digit); } /** * Returns the Unicode directionality property for the given * character. Character directionality is used to calculate the * visual ordering of text. The directionality value of undefined * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>. * * @param ch <code>char</code> for which the directionality property * is requested. * @return the directionality property of the <code>char</code> value. * * @see Character#DIRECTIONALITY_UNDEFINED * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_ARABIC * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_SEPARATOR * @see Character#DIRECTIONALITY_EUROPEAN_NUMBER_TERMINATOR * @see Character#DIRECTIONALITY_ARABIC_NUMBER * @see Character#DIRECTIONALITY_COMMON_NUMBER_SEPARATOR * @see Character#DIRECTIONALITY_NONSPACING_MARK * @see Character#DIRECTIONALITY_BOUNDARY_NEUTRAL * @see Character#DIRECTIONALITY_PARAGRAPH_SEPARATOR * @see Character#DIRECTIONALITY_SEGMENT_SEPARATOR * @see Character#DIRECTIONALITY_WHITESPACE * @see Character#DIRECTIONALITY_OTHER_NEUTRALS * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_EMBEDDING * @see Character#DIRECTIONALITY_LEFT_TO_RIGHT_OVERRIDE * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_EMBEDDING * @see Character#DIRECTIONALITY_RIGHT_TO_LEFT_OVERRIDE * @see Character#DIRECTIONALITY_POP_DIRECTIONAL_FORMAT * @since 1.4 */ public static byte getDirectionality(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.getDirectionality(ch); } else { return CharacterData.getDirectionality(ch); } } /** * Determines whether the character is mirrored according to the * Unicode specification. Mirrored characters should have their * glyphs horizontally mirrored when displayed in text that is * right-to-left. For example, <code>'\u0028'</code> LEFT * PARENTHESIS is semantically defined to be an <i>opening * parenthesis</i>. This will appear as a "(" in text that is * left-to-right but as a ")" in text that is right-to-left. * * @param ch <code>char</code> for which the mirrored property is requested * @return <code>true</code> if the char is mirrored, <code>false</code> * if the <code>char</code> is not mirrored or is not defined. * @since 1.4 */ public static boolean isMirrored(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.isMirrored(ch); } else { return CharacterData.isMirrored(ch); } } /** * Compares two <code>Character</code> objects numerically. * * @param anotherCharacter the <code>Character</code> to be compared. * @return the value <code>0</code> if the argument <code>Character</code> * is equal to this <code>Character</code>; a value less than * <code>0</code> if this <code>Character</code> is numerically less * than the <code>Character</code> argument; and a value greater than * <code>0</code> if this <code>Character</code> is numerically greater * than the <code>Character</code> argument (unsigned comparison). * Note that this is strictly a numerical comparison; it is not * locale-dependent. * @since 1.2 */ public int compareTo(Character anotherCharacter) { return this.value - anotherCharacter.value; } /** * Compares this <code>Character</code> object to another object. * If the object is a <code>Character</code>, this function * behaves like <code>compareTo(Character)</code>. Otherwise, it * throws a <code>ClassCastException</code> (as * <code>Character</code> objects are comparable only to other * <code>Character</code> objects). * * @param o the <code>Object</code> to be compared. * @return the value <code>0</code> if the argument is a <code>Character</code> * numerically equal to this <code>Character</code>; a value less than * <code>0</code> if the argument is a <code>Character</code> numerically * greater than this <code>Character</code>; and a value greater than * <code>0</code> if the argument is a <code>Character</code> numerically * less than this <code>Character</code>. * @exception <code>ClassCastException</code> if the argument is not a * <code>Character</code>. * @see java.lang.Comparable * @since 1.2 */ public int compareTo(Object o) { return compareTo((Character)o); } /** * Converts the character argument to uppercase using case mapping * information from the UnicodeData file. * <p> * * @param ch the <code>char</code> to be converted. * @return either the uppercase equivalent of the character, if * any, or an error flag (<code>Character.CHAR_ERROR</code>) * that indicates that a 1:M <code>char</code> mapping exists. * @see java.lang.Character#isLowerCase(char) * @see java.lang.Character#isUpperCase(char) * @see java.lang.Character#toLowerCase(char) * @see java.lang.Character#toTitleCase(char) * @since 1.4 */ static char toUpperCaseEx(char ch) { if (ch <= FAST_PATH_MAX) { return CharacterDataLatin1.toUpperCaseEx(ch); } else { return CharacterData.toUpperCaseEx(ch); } } /** * Converts the <code>char</code> argument to uppercase using case * mapping information from the SpecialCasing file in the Unicode * specification. If a character has no explicit uppercase * mapping, then the <code>char</code> itself is returned in the * <code>char[]</code>. * * @param ch the <code>char</code> to uppercase * @return a <code>char[]</code> with the uppercased character. * @since 1.4 */ static char[] sharpsMap = new char[] {'S', 'S'}; static char[] toUpperCaseCharArray(char ch) { char[] upperMap = {ch}; if (ch <= FAST_PATH_MAX) { if (ch == '\u00DF') { upperMap = sharpsMap; } // else ch -> ch } else { int location = findInCharMap(ch); if (location != -1) { upperMap = CharacterData.charMap[location][1]; } } return upperMap; } /** * Finds the character in the uppercase mapping table. * * @param ch the <code>char</code> to search * @return the index location ch in the table or -1 if not found * @since 1.4 */ static int findInCharMap(char ch) { int top, bottom, current; bottom = 0; top = CharacterData.charMap.length; current = top/2; // invariant: top > current >= bottom && ch >= CharacterData.charMap[bottom][0] while (top - bottom > 1) { if (ch >= CharacterData.charMap[current][0][0]) { bottom = current; } else { top = current; } current = (top + bottom) / 2; } if (ch == CharacterData.charMap[current][0][0]) return current; else return -1; } }