/* * Copyright (c) 2005, Oracle and/or its affiliates. All rights reserved. * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER. * * This code is free software; you can redistribute it and/or modify it * under the terms of the GNU General Public License version 2 only, as * published by the Free Software Foundation. Oracle designates this * particular file as subject to the "Classpath" exception as provided * by Oracle in the LICENSE file that accompanied this code. * * This code is distributed in the hope that it will be useful, but WITHOUT * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License * version 2 for more details (a copy is included in the LICENSE file that * accompanied this code). * * You should have received a copy of the GNU General Public License version * 2 along with this work; if not, write to the Free Software Foundation, * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. * * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA * or visit www.oracle.com if you need additional information or have any * questions. */ /* ******************************************************************************* * (C) Copyright IBM Corp. 1996-2005 - All Rights Reserved * * * * The original version of this source code and documentation is copyrighted * * and owned by IBM, These materials are provided under terms of a License * * Agreement between IBM and Sun. This technology is protected by multiple * * US and International patents. This notice and attribution to IBM may not * * to removed. * ******************************************************************************* */ package sun.text.normalizer; import java.lang.ref.SoftReference; import java.util.HashMap; import java.util.Locale; import java.util.Map; /** * <p> * The UCharacter class provides extensions to the * <a href=http://java.sun.com/j2se/1.3/docs/api/java/lang/Character.html> * java.lang.Character</a> class. These extensions provide support for * Unicode 3.2 properties and together with the <a href=../text/UTF16.html>UTF16</a> * class, provide support for supplementary characters (those with code * points above U+FFFF). * </p> * <p> * Code points are represented in these API using ints. While it would be * more convenient in Java to have a separate primitive datatype for them, * ints suffice in the meantime. * </p> * <p> * To use this class please add the jar file name icu4j.jar to the * class path, since it contains data files which supply the information used * by this file.<br> * E.g. In Windows <br> * <code>set CLASSPATH=%CLASSPATH%;$JAR_FILE_PATH/ucharacter.jar</code>.<br> * Otherwise, another method would be to copy the files uprops.dat and * unames.icu from the icu4j source subdirectory * <i>$ICU4J_SRC/src/com.ibm.icu.impl.data</i> to your class directory * <i>$ICU4J_CLASS/com.ibm.icu.impl.data</i>. * </p> * <p> * Aside from the additions for UTF-16 support, and the updated Unicode 3.1 * properties, the main differences between UCharacter and Character are: * <ul> * <li> UCharacter is not designed to be a char wrapper and does not have * APIs to which involves management of that single char.<br> * These include: * <ul> * <li> char charValue(), * <li> int compareTo(java.lang.Character, java.lang.Character), etc. * </ul> * <li> UCharacter does not include Character APIs that are deprecated, not * does it include the Java-specific character information, such as * boolean isJavaIdentifierPart(char ch). * <li> Character maps characters 'A' - 'Z' and 'a' - 'z' to the numeric * values '10' - '35'. UCharacter also does this in digit and * getNumericValue, to adhere to the java semantics of these * methods. New methods unicodeDigit, and * getUnicodeNumericValue do not treat the above code points * as having numeric values. This is a semantic change from ICU4J 1.3.1. * </ul> * <p> * Further detail differences can be determined from the program * <a href = http://oss.software.ibm.com/developerworks/opensource/cvs/icu4j/~checkout~/icu4j/src/com/ibm/icu/dev/test/lang/UCharacterCompare.java> * com.ibm.icu.dev.test.lang.UCharacterCompare</a> * </p> * <p> * This class is not subclassable * </p> * @author Syn Wee Quek * @stable ICU 2.1 * @see com.ibm.icu.lang.UCharacterEnums */ public final class UCharacter { /** * Numeric Type constants. * @see UProperty#NUMERIC_TYPE * @stable ICU 2.4 */ public static interface NumericType { /** * @stable ICU 2.4 */ public static final int NONE = 0; /** * @stable ICU 2.4 */ public static final int DECIMAL = 1; /** * @stable ICU 2.4 */ public static final int DIGIT = 2; /** * @stable ICU 2.4 */ public static final int NUMERIC = 3; /** * @stable ICU 2.4 */ public static final int COUNT = 4; } /** * Hangul Syllable Type constants. * * @see UProperty#HANGUL_SYLLABLE_TYPE * @stable ICU 2.6 */ public static interface HangulSyllableType { /** * @stable ICU 2.6 */ public static final int NOT_APPLICABLE = 0; /*[NA]*/ /*See note !!*/ /** * @stable ICU 2.6 */ public static final int LEADING_JAMO = 1; /*[L]*/ /** * @stable ICU 2.6 */ public static final int VOWEL_JAMO = 2; /*[V]*/ /** * @stable ICU 2.6 */ public static final int TRAILING_JAMO = 3; /*[T]*/ /** * @stable ICU 2.6 */ public static final int LV_SYLLABLE = 4; /*[LV]*/ /** * @stable ICU 2.6 */ public static final int LVT_SYLLABLE = 5; /*[LVT]*/ /** * @stable ICU 2.6 */ public static final int COUNT = 6; } /** * [Sun] This interface moved from UCharacterEnums.java. * * 'Enum' for the CharacterCategory constants. These constants are * compatible in name <b>but not in value</b> with those defined in * <code>java.lang.Character</code>. * @see UCharacterCategory * @draft ICU 3.0 * @deprecated This is a draft API and might change in a future release of ICU. */ public static interface ECharacterCategory { /** * Character type Lu * @stable ICU 2.1 */ public static final int UPPERCASE_LETTER = 1; /** * Character type Lt * @stable ICU 2.1 */ public static final int TITLECASE_LETTER = 3; /** * Character type Lo * @stable ICU 2.1 */ public static final int OTHER_LETTER = 5; } // public data members ----------------------------------------------- /** * The lowest Unicode code point value. * @stable ICU 2.1 */ public static final int MIN_VALUE = UTF16.CODEPOINT_MIN_VALUE; /** * The highest Unicode code point value (scalar value) according to the * Unicode Standard. * This is a 21-bit value (21 bits, rounded up).<br> * Up-to-date Unicode implementation of java.lang.Character.MIN_VALUE * @stable ICU 2.1 */ public static final int MAX_VALUE = UTF16.CODEPOINT_MAX_VALUE; /** * The minimum value for Supplementary code points * @stable ICU 2.1 */ public static final int SUPPLEMENTARY_MIN_VALUE = UTF16.SUPPLEMENTARY_MIN_VALUE; /** * Special value that is returned by getUnicodeNumericValue(int) when no * numeric value is defined for a code point. * @stable ICU 2.4 * @see #getUnicodeNumericValue */ public static final double NO_NUMERIC_VALUE = -123456789; // public methods ---------------------------------------------------- /** * Retrieves the numeric value of a decimal digit code point. * <br>This method observes the semantics of * <code>java.lang.Character.digit()</code>. Note that this * will return positive values for code points for which isDigit * returns false, just like java.lang.Character. * <br><em>Semantic Change:</em> In release 1.3.1 and * prior, this did not treat the European letters as having a * digit value, and also treated numeric letters and other numbers as * digits. * This has been changed to conform to the java semantics. * <br>A code point is a valid digit if and only if: * <ul> * <li>ch is a decimal digit or one of the european letters, and * <li>the value of ch is less than the specified radix. * </ul> * @param ch the code point to query * @param radix the radix * @return the numeric value represented by the code point in the * specified radix, or -1 if the code point is not a decimal digit * or if its value is too large for the radix * @stable ICU 2.1 */ public static int digit(int ch, int radix) { // when ch is out of bounds getProperty == 0 int props = getProperty(ch); if (getNumericType(props) != NumericType.DECIMAL) { return (radix <= 10) ? -1 : getEuropeanDigit(ch); } // if props == 0, it will just fall through and return -1 if (isNotExceptionIndicator(props)) { // not contained in exception data // getSignedValue is just shifting so we can check for the sign // first // Optimization // int result = UCharacterProperty.getSignedValue(props); // if (result >= 0) { // return result; // } if (props >= 0) { return UCharacterProperty.getSignedValue(props); } } else { int index = UCharacterProperty.getExceptionIndex(props); if (PROPERTY_.hasExceptionValue(index, UCharacterProperty.EXC_NUMERIC_VALUE_)) { int result = PROPERTY_.getException(index, UCharacterProperty.EXC_NUMERIC_VALUE_); if (result >= 0) { return result; } } } if (radix > 10) { int result = getEuropeanDigit(ch); if (result >= 0 && result < radix) { return result; } } return -1; } /** * <p>Get the numeric value for a Unicode code point as defined in the * Unicode Character Database.</p> * <p>A "double" return type is necessary because some numeric values are * fractions, negative, or too large for int.</p> * <p>For characters without any numeric values in the Unicode Character * Database, this function will return NO_NUMERIC_VALUE.</p> * <p><em>API Change:</em> In release 2.2 and prior, this API has a * return type int and returns -1 when the argument ch does not have a * corresponding numeric value. This has been changed to synch with ICU4C * </p> * This corresponds to the ICU4C function u_getNumericValue. * @param ch Code point to get the numeric value for. * @return numeric value of ch, or NO_NUMERIC_VALUE if none is defined. * @stable ICU 2.4 */ public static double getUnicodeNumericValue(int ch) { // equivalent to c version double u_getNumericValue(UChar32 c) int props = PROPERTY_.getProperty(ch); int numericType = getNumericType(props); if (numericType > NumericType.NONE && numericType < NumericType.COUNT) { if (isNotExceptionIndicator(props)) { return UCharacterProperty.getSignedValue(props); } else { int index = UCharacterProperty.getExceptionIndex(props); boolean nex = false; boolean dex = false; double numerator = 0; if (PROPERTY_.hasExceptionValue(index, UCharacterProperty.EXC_NUMERIC_VALUE_)) { int num = PROPERTY_.getException(index, UCharacterProperty.EXC_NUMERIC_VALUE_); // There are special values for huge numbers that are // powers of ten. genprops/store.c documents: // if numericValue = 0x7fffff00 + x then // numericValue = 10 ^ x if (num >= NUMERATOR_POWER_LIMIT_) { num &= 0xff; // 10^x without math.h numerator = Math.pow(10, num); } else { numerator = num; } nex = true; } double denominator = 0; if (PROPERTY_.hasExceptionValue(index, UCharacterProperty.EXC_DENOMINATOR_VALUE_)) { denominator = PROPERTY_.getException(index, UCharacterProperty.EXC_DENOMINATOR_VALUE_); // faster path not in c if (numerator != 0) { return numerator / denominator; } dex = true; } if (nex) { if (dex) { return numerator / denominator; } return numerator; } if (dex) { return 1 / denominator; } } } return NO_NUMERIC_VALUE; } /** * Returns a value indicating a code point's Unicode category. * Up-to-date Unicode implementation of java.lang.Character.getType() * except for the above mentioned code points that had their category * changed.<br> * Return results are constants from the interface * <a href=UCharacterCategory.html>UCharacterCategory</a><br> * <em>NOTE:</em> the UCharacterCategory values are <em>not</em> compatible with * those returned by java.lang.Character.getType. UCharacterCategory values * match the ones used in ICU4C, while java.lang.Character type * values, though similar, skip the value 17.</p> * @param ch code point whose type is to be determined * @return category which is a value of UCharacterCategory * @stable ICU 2.1 */ public static int getType(int ch) { return getProperty(ch) & UCharacterProperty.TYPE_MASK; } //// for StringPrep /** * Returns a code point corresponding to the two UTF16 characters. * @param lead the lead char * @param trail the trail char * @return code point if surrogate characters are valid. * @exception IllegalArgumentException thrown when argument characters do * not form a valid codepoint * @stable ICU 2.1 */ public static int getCodePoint(char lead, char trail) { if (lead >= UTF16.LEAD_SURROGATE_MIN_VALUE && lead <= UTF16.LEAD_SURROGATE_MAX_VALUE && trail >= UTF16.TRAIL_SURROGATE_MIN_VALUE && trail <= UTF16.TRAIL_SURROGATE_MAX_VALUE) { return UCharacterProperty.getRawSupplementary(lead, trail); } throw new IllegalArgumentException("Illegal surrogate characters"); } //// for StringPrep /** * Returns the Bidirection property of a code point. * For example, 0x0041 (letter A) has the LEFT_TO_RIGHT directional * property.<br> * Result returned belongs to the interface * <a href=UCharacterDirection.html>UCharacterDirection</a> * @param ch the code point to be determined its direction * @return direction constant from UCharacterDirection. * @stable ICU 2.1 */ public static int getDirection(int ch) { // when ch is out of bounds getProperty == 0 return (getProperty(ch) >> BIDI_SHIFT_) & BIDI_MASK_AFTER_SHIFT_; } /** * The given string is mapped to its case folding equivalent according to * UnicodeData.txt and CaseFolding.txt; if any character has no case * folding equivalent, the character itself is returned. * "Full", multiple-code point case folding mappings are returned here. * For "simple" single-code point mappings use the API * foldCase(int ch, boolean defaultmapping). * @param str the String to be converted * @param defaultmapping Indicates if all mappings defined in * CaseFolding.txt is to be used, otherwise the * mappings for dotted I and dotless i marked with * 'I' in CaseFolding.txt will be skipped. * @return the case folding equivalent of the character, if * any; otherwise the character itself. * @see #foldCase(int, boolean) * @stable ICU 2.1 */ public static String foldCase(String str, boolean defaultmapping) { int size = str.length(); StringBuffer result = new StringBuffer(size); int offset = 0; int ch; // case mapping loop while (offset < size) { ch = UTF16.charAt(str, offset); offset += UTF16.getCharCount(ch); int props = PROPERTY_.getProperty(ch); if (isNotExceptionIndicator(props)) { int type = UCharacterProperty.TYPE_MASK & props; if (type == ECharacterCategory.UPPERCASE_LETTER || type == ECharacterCategory.TITLECASE_LETTER) { ch += UCharacterProperty.getSignedValue(props); } } else { int index = UCharacterProperty.getExceptionIndex(props); if (PROPERTY_.hasExceptionValue(index, UCharacterProperty.EXC_CASE_FOLDING_)) { int exception = PROPERTY_.getException(index, UCharacterProperty.EXC_CASE_FOLDING_); if (exception != 0) { PROPERTY_.getFoldCase(exception & LAST_CHAR_MASK_, exception >> SHIFT_24_, result); } else { // special case folding mappings, hardcoded if (ch != 0x49 && ch != 0x130) { // return ch itself because there is no special // mapping for it UTF16.append(result, ch); continue; } if (defaultmapping) { // default mappings if (ch == 0x49) { // 0049; C; 0069; # LATIN CAPITAL LETTER I result.append( UCharacterProperty.LATIN_SMALL_LETTER_I_); } else if (ch == 0x130) { // 0130; F; 0069 0307; // # LATIN CAPITAL LETTER I WITH DOT ABOVE result.append( UCharacterProperty.LATIN_SMALL_LETTER_I_); result.append((char)0x307); } } else { // Turkic mappings if (ch == 0x49) { // 0049; T; 0131; # LATIN CAPITAL LETTER I result.append((char)0x131); } else if (ch == 0x130) { // 0130; T; 0069; // # LATIN CAPITAL LETTER I WITH DOT ABOVE result.append( UCharacterProperty.LATIN_SMALL_LETTER_I_); } } } // do not fall through to the output of c continue; } else { if (PROPERTY_.hasExceptionValue(index, UCharacterProperty.EXC_LOWERCASE_)) { ch = PROPERTY_.getException(index, UCharacterProperty.EXC_LOWERCASE_); } } } // handle 1:1 code point mappings from UnicodeData.txt UTF16.append(result, ch); } return result.toString(); } /** * <p>Get the "age" of the code point.</p> * <p>The "age" is the Unicode version when the code point was first * designated (as a non-character or for Private Use) or assigned a * character. * <p>This can be useful to avoid emitting code points to receiving * processes that do not accept newer characters.</p> * <p>The data is from the UCD file DerivedAge.txt.</p> * @param ch The code point. * @return the Unicode version number * @stable ICU 2.6 */ public static VersionInfo getAge(int ch) { if (ch < MIN_VALUE || ch > MAX_VALUE) { throw new IllegalArgumentException("Codepoint out of bounds"); } return PROPERTY_.getAge(ch); } /** * <p>Gets the property value for an Unicode property type of a code point. * Also returns binary and mask property values.</p> * <p>Unicode, especially in version 3.2, defines many more properties than * the original set in UnicodeData.txt.</p> * <p>The properties APIs are intended to reflect Unicode properties as * defined in the Unicode Character Database (UCD) and Unicode Technical * Reports (UTR). For details about the properties see * http://www.unicode.org/.</p> * <p>For names of Unicode properties see the UCD file PropertyAliases.txt. * </p> * <pre> * Sample usage: * int ea = UCharacter.getIntPropertyValue(c, UProperty.EAST_ASIAN_WIDTH); * int ideo = UCharacter.getIntPropertyValue(c, UProperty.IDEOGRAPHIC); * boolean b = (ideo == 1) ? true : false; * </pre> * @param ch code point to test. * @param type UProperty selector constant, identifies which binary * property to check. Must be * UProperty.BINARY_START <= type < UProperty.BINARY_LIMIT or * UProperty.INT_START <= type < UProperty.INT_LIMIT or * UProperty.MASK_START <= type < UProperty.MASK_LIMIT. * @return numeric value that is directly the property value or, * for enumerated properties, corresponds to the numeric value of * the enumerated constant of the respective property value * enumeration type (cast to enum type if necessary). * Returns 0 or 1 (for false / true) for binary Unicode properties. * Returns a bit-mask for mask properties. * Returns 0 if 'type' is out of bounds or if the Unicode version * does not have data for the property at all, or not for this code * point. * @see UProperty * @see #hasBinaryProperty * @see #getIntPropertyMinValue * @see #getIntPropertyMaxValue * @see #getUnicodeVersion * @stable ICU 2.4 */ public static int getIntPropertyValue(int ch, int type) { /* * For Normalizer with Unicode 3.2, this method is called only for * HANGUL_SYLLABLE_TYPE in UnicodeSet.addPropertyStarts(). */ if (type == UProperty.HANGUL_SYLLABLE_TYPE) { /* purely algorithmic; hardcode known characters, check for assigned new ones */ if(ch<NormalizerImpl.JAMO_L_BASE) { /* NA */ } else if(ch<=0x11ff) { /* Jamo range */ if(ch<=0x115f) { /* Jamo L range, HANGUL CHOSEONG ... */ if(ch==0x115f || ch<=0x1159 || getType(ch)==ECharacterCategory.OTHER_LETTER) { return HangulSyllableType.LEADING_JAMO; } } else if(ch<=0x11a7) { /* Jamo V range, HANGUL JUNGSEONG ... */ if(ch<=0x11a2 || getType(ch)==ECharacterCategory.OTHER_LETTER) { return HangulSyllableType.VOWEL_JAMO; } } else { /* Jamo T range */ if(ch<=0x11f9 || getType(ch)==ECharacterCategory.OTHER_LETTER) { return HangulSyllableType.TRAILING_JAMO; } } } else if((ch-=NormalizerImpl.HANGUL_BASE)<0) { /* NA */ } else if(ch<NormalizerImpl.HANGUL_COUNT) { /* Hangul syllable */ return ch%NormalizerImpl.JAMO_T_COUNT==0 ? HangulSyllableType.LV_SYLLABLE : HangulSyllableType.LVT_SYLLABLE; } } return 0; /* NA */ } // private variables ------------------------------------------------- /** * Database storing the sets of character property */ private static final UCharacterProperty PROPERTY_; /** * For optimization */ private static final char[] PROPERTY_TRIE_INDEX_; private static final char[] PROPERTY_TRIE_DATA_; private static final int[] PROPERTY_DATA_; private static final int PROPERTY_INITIAL_VALUE_; // block to initialise character property database static { try { PROPERTY_ = UCharacterProperty.getInstance(); PROPERTY_TRIE_INDEX_ = PROPERTY_.m_trieIndex_; PROPERTY_TRIE_DATA_ = PROPERTY_.m_trieData_; PROPERTY_DATA_ = PROPERTY_.m_property_; PROPERTY_INITIAL_VALUE_ = PROPERTY_DATA_[PROPERTY_.m_trieInitialValue_]; } catch (Exception e) { throw new RuntimeException(e.getMessage()); } } /** * To get the last character out from a data type */ private static final int LAST_CHAR_MASK_ = 0xFFFF; /** * To get the last byte out from a data type */ // private static final int LAST_BYTE_MASK_ = 0xFF; /** * Shift 16 bits */ // private static final int SHIFT_16_ = 16; /** * Shift 24 bits */ private static final int SHIFT_24_ = 24; /** * Shift to get numeric type */ private static final int NUMERIC_TYPE_SHIFT_ = 12; /** * Mask to get numeric type */ private static final int NUMERIC_TYPE_MASK_ = 0x7 << NUMERIC_TYPE_SHIFT_; /** * Shift to get bidi bits */ private static final int BIDI_SHIFT_ = 6; /** * Mask to be applied after shifting to get bidi bits */ private static final int BIDI_MASK_AFTER_SHIFT_ = 0x1F; /** * <p>Numerator power limit. * There are special values for huge numbers that are powers of ten.</p> * <p>c version genprops/store.c documents: * if numericValue = 0x7fffff00 + x then numericValue = 10 ^ x</p> */ private static final int NUMERATOR_POWER_LIMIT_ = 0x7fffff00; /** * Integer properties mask and shift values for joining type. * Equivalent to icu4c UPROPS_JT_MASK. */ private static final int JOINING_TYPE_MASK_ = 0x00003800; /** * Integer properties mask and shift values for joining type. * Equivalent to icu4c UPROPS_JT_SHIFT. */ private static final int JOINING_TYPE_SHIFT_ = 11; /** * Integer properties mask and shift values for joining group. * Equivalent to icu4c UPROPS_JG_MASK. */ private static final int JOINING_GROUP_MASK_ = 0x000007e0; /** * Integer properties mask and shift values for joining group. * Equivalent to icu4c UPROPS_JG_SHIFT. */ private static final int JOINING_GROUP_SHIFT_ = 5; /** * Integer properties mask for decomposition type. * Equivalent to icu4c UPROPS_DT_MASK. */ private static final int DECOMPOSITION_TYPE_MASK_ = 0x0000001f; /** * Integer properties mask and shift values for East Asian cell width. * Equivalent to icu4c UPROPS_EA_MASK */ private static final int EAST_ASIAN_MASK_ = 0x00038000; /** * Integer properties mask and shift values for East Asian cell width. * Equivalent to icu4c UPROPS_EA_SHIFT */ private static final int EAST_ASIAN_SHIFT_ = 15; /** * Integer properties mask and shift values for line breaks. * Equivalent to icu4c UPROPS_LB_MASK */ private static final int LINE_BREAK_MASK_ = 0x007C0000; /** * Integer properties mask and shift values for line breaks. * Equivalent to icu4c UPROPS_LB_SHIFT */ private static final int LINE_BREAK_SHIFT_ = 18; /** * Integer properties mask and shift values for blocks. * Equivalent to icu4c UPROPS_BLOCK_MASK */ private static final int BLOCK_MASK_ = 0x00007f80; /** * Integer properties mask and shift values for blocks. * Equivalent to icu4c UPROPS_BLOCK_SHIFT */ private static final int BLOCK_SHIFT_ = 7; /** * Integer properties mask and shift values for scripts. * Equivalent to icu4c UPROPS_SHIFT_MASK */ private static final int SCRIPT_MASK_ = 0x0000007f; // private constructor ----------------------------------------------- ///CLOVER:OFF /** * Private constructor to prevent instantiation */ private UCharacter() { } ///CLOVER:ON // private methods --------------------------------------------------- /** * Getting the digit values of characters like 'A' - 'Z', normal, * half-width and full-width. This method assumes that the other digit * characters are checked by the calling method. * @param ch character to test * @return -1 if ch is not a character of the form 'A' - 'Z', otherwise * its corresponding digit will be returned. */ private static int getEuropeanDigit(int ch) { if ((ch > 0x7a && ch < 0xff21) || ch < 0x41 || (ch > 0x5a && ch < 0x61) || ch > 0xff5a || (ch > 0xff31 && ch < 0xff41)) { return -1; } if (ch <= 0x7a) { // ch >= 0x41 or ch < 0x61 return ch + 10 - ((ch <= 0x5a) ? 0x41 : 0x61); } // ch >= 0xff21 if (ch <= 0xff3a) { return ch + 10 - 0xff21; } // ch >= 0xff41 && ch <= 0xff5a return ch + 10 - 0xff41; } /** * Gets the numeric type of the property argument * @param props 32 bit property * @return the numeric type */ private static int getNumericType(int props) { return (props & NUMERIC_TYPE_MASK_) >> NUMERIC_TYPE_SHIFT_; } /** * Checks if the property value has a exception indicator * @param props 32 bit property value * @return true if property does not have a exception indicator, false * otherwise */ private static boolean isNotExceptionIndicator(int props) { return (props & UCharacterProperty.EXCEPTION_MASK) == 0; } /** * Gets the property value at the index. * This is optimized. * Note this is alittle different from CharTrie the index m_trieData_ * is never negative. * This is a duplicate of UCharacterProperty.getProperty. For optimization * purposes, this method calls the trie data directly instead of through * UCharacterProperty.getProperty. * @param ch code point whose property value is to be retrieved * @return property value of code point * @stable ICU 2.6 */ private static int getProperty(int ch) { if (ch < UTF16.LEAD_SURROGATE_MIN_VALUE || (ch > UTF16.LEAD_SURROGATE_MAX_VALUE && ch < UTF16.SUPPLEMENTARY_MIN_VALUE)) { // BMP codepoint try { // using try for < 0 ch is faster than using an if statement return PROPERTY_DATA_[ PROPERTY_TRIE_DATA_[ (PROPERTY_TRIE_INDEX_[ch >> 5] << 2) + (ch & 0x1f)]]; } catch (ArrayIndexOutOfBoundsException e) { return PROPERTY_INITIAL_VALUE_; } } if (ch <= UTF16.LEAD_SURROGATE_MAX_VALUE) { // surrogate return PROPERTY_DATA_[ PROPERTY_TRIE_DATA_[ (PROPERTY_TRIE_INDEX_[(0x2800 >> 5) + (ch >> 5)] << 2) + (ch & 0x1f)]]; } // for optimization if (ch <= UTF16.CODEPOINT_MAX_VALUE) { // look at the construction of supplementary characters // trail forms the ends of it. return PROPERTY_DATA_[PROPERTY_.m_trie_.getSurrogateValue( UTF16.getLeadSurrogate(ch), (char)(ch & 0x3ff))]; } // return m_dataOffset_ if there is an error, in this case we return // the default value: m_initialValue_ // we cannot assume that m_initialValue_ is at offset 0 // this is for optimization. return PROPERTY_INITIAL_VALUE_; } }