/*
* To change this template, choose Tools | Templates
* and open the template in the editor.
*/
package org.albite.lang;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
/**
*
* @author albus
*/
public class AlbiteCharacter {
public static final byte
UNASSIGNED = 0,
UPPERCASE_LETTER = 1,
LOWERCASE_LETTER = 2,
TITLECASE_LETTER = 3,
MODIFIER_LETTER = 4,
OTHER_LETTER = 5,
NON_SPACING_MARK = 6,
ENCLOSING_MARK = 7,
COMBINING_SPACING_MARK = 8,
DECIMAL_DIGIT_NUMBER = 9,
LETTER_NUMBER = 10,
OTHER_NUMBER = 11,
SPACE_SEPARATOR = 12,
LINE_SEPARATOR = 13,
PARAGRAPH_SEPARATOR = 14,
CONTROL = 15,
FORMAT = 16,
PRIVATE_USE = 18,
SURROGATE = 19,
DASH_PUNCTUATION = 20,
START_PUNCTUATION = 21,
END_PUNCTUATION = 22,
CONNECTOR_PUNCTUATION = 23,
OTHER_PUNCTUATION = 24,
MATH_SYMBOL = 25,
CURRENCY_SYMBOL = 26,
MODIFIER_SYMBOL = 27,
OTHER_SYMBOL = 28;
public static final byte[] X = new byte[1024];
public static final short[] Y = new short[4032];
public static final int[] A = new int[632];
static {
/*
* Load data from external file
*/
InputStream is = (new Object()).getClass()
.getResourceAsStream("/res/charmap.bin");
if (is != null) {
DataInputStream in = new DataInputStream(is);
try {
try {
for (int i = 0; i < 1024; i++) {
X[i] = in.readByte();
}
for (int i = 0; i < 4032; i++) {
Y[i] = in.readShort();
}
for (int i = 0; i < 632; i++) {
A[i] = in.readInt();
}
} finally {
in.close();
}
} catch (IOException e) {}
}
}
/**
* Returns a value indicating a character category.
*
* @param ch the character to be tested.
* @return a value of type int, the character category.
* @see java.lang.Character#COMBINING_SPACING_MARK
* @see java.lang.Character#CONNECTOR_PUNCTUATION
* @see java.lang.Character#CONTROL
* @see java.lang.Character#CURRENCY_SYMBOL
* @see java.lang.Character#DASH_PUNCTUATION
* @see java.lang.Character#DECIMAL_DIGIT_NUMBER
* @see java.lang.Character#ENCLOSING_MARK
* @see java.lang.Character#END_PUNCTUATION
* @see java.lang.Character#FORMAT
* @see java.lang.Character#LETTER_NUMBER
* @see java.lang.Character#LINE_SEPARATOR
* @see java.lang.Character#LOWERCASE_LETTER
* @see java.lang.Character#MATH_SYMBOL
* @see java.lang.Character#MODIFIER_LETTER
* @see java.lang.Character#MODIFIER_SYMBOL
* @see java.lang.Character#NON_SPACING_MARK
* @see java.lang.Character#OTHER_LETTER
* @see java.lang.Character#OTHER_NUMBER
* @see java.lang.Character#OTHER_PUNCTUATION
* @see java.lang.Character#OTHER_SYMBOL
* @see java.lang.Character#PARAGRAPH_SEPARATOR
* @see java.lang.Character#PRIVATE_USE
* @see java.lang.Character#SPACE_SEPARATOR
* @see java.lang.Character#START_PUNCTUATION
* @see java.lang.Character#SURROGATE
* @see java.lang.Character#TITLECASE_LETTER
* @see java.lang.Character#UNASSIGNED
* @see java.lang.Character#UPPERCASE_LETTER
* @since JDK1.1
*/
public static int getType(final char ch) {
return A[Y[(X[ch>>6]<<5)|((ch>>1)&0x1F)]|(ch&0x1)] & 0x1F;
}
/**
* Determines if the specified character is a letter or digit.
* For a more complete specification that encompasses all Unicode
* characters, see Gosling, Joy, and Steele, <i>The Java Language
* Specification</i>.
*
* <p> A character is considered to be a letter if and only if
* it is specified to be a letter or a digit by the Unicode 2.0 standard
* (category "Lu", "Ll", "Lt", "Lm", "Lo", or "Nd" in the Unicode
* specification data file). In other words, isLetterOrDigit is true
* of a character if and only if either isLetter is true of the character
* or isDigit is true of the character.
*
* @param ch the character to be tested.
* @return <code>true</code> if the character is a letter or digit;
* <code>false</code> otherwise.
* @see java.lang.Character#isDigit(char)
* @see java.lang.Character#isJavaIdentifierPart(char)
* @see java.lang.Character#isJavaLetter(char)
* @see java.lang.Character#isJavaLetterOrDigit(char)
* @see java.lang.Character#isLetter(char)
* @see java.lang.Character#isUnicodeIdentifierPart(char)
* @since JDK1.0.2
*/
public static boolean isLetterOrDigit(final char ch) {
return (((((1 << UPPERCASE_LETTER) |
(1 << LOWERCASE_LETTER) |
(1 << TITLECASE_LETTER) |
(1 << MODIFIER_LETTER) |
(1 << OTHER_LETTER) |
(1 << DECIMAL_DIGIT_NUMBER))
>> (A[Y[(X[ch>>6]<<5)|((ch>>1)&0x1F)]|(ch&0x1)] & 0x1F)) & 1) != 0);
}
/**
* Determines if the specified character is a letter. For a
* more complete specification that encompasses all Unicode
* characters, see Gosling, Joy, and Steele, <i>The Java Language
* Specification</i>.
*
* <p> A character is considered to be a letter if and only if
* it is specified to be a letter by the Unicode 2.0 standard
* (category "Lu", "Ll", "Lt", "Lm", or "Lo" in the Unicode
* specification data file).
*
* <p> Note that most ideographic characters are considered
* to be letters (category "Lo") for this purpose.
*
* <p> Note also that not all letters have case: many Unicode characters are
* letters but are neither uppercase nor lowercase nor titlecase.
*
* @param ch the character to be tested.
* @return <code>true</code> if the character is a letter;
* <code>false</code> otherwise.
* @see java.lang.Character#isDigit(char)
* @see java.lang.Character#isJavaIdentifierStart(char)
* @see java.lang.Character#isJavaLetter(char)
* @see java.lang.Character#isJavaLetterOrDigit(char)
* @see java.lang.Character#isLetterOrDigit(char)
* @see java.lang.Character#isLowerCase(char)
* @see java.lang.Character#isTitleCase(char)
* @see java.lang.Character#isUnicodeIdentifierStart(char)
* @see java.lang.Character#isUpperCase(char)
*/
public static boolean isLetter(final char ch) {
return (((((1 << UPPERCASE_LETTER) |
(1 << LOWERCASE_LETTER) |
(1 << TITLECASE_LETTER) |
(1 << MODIFIER_LETTER) |
(1 << OTHER_LETTER))
>> (A[Y[(X[ch>>6]<<5)|((ch>>1)&0x1F)]|(ch&0x1)] & 0x1F)) & 1) != 0);
}
/**
* Determines if the specified character is a digit.
* A character is considered to be a digit if it is not in the range
* <code>'\u2000' <= ch <= '\u2FFF'</code>
* and its Unicode name contains the word
* "<code>DIGIT</code>". For a more complete
* specification that encompasses all Unicode characters that are
* defined as digits, see Gosling, Joy, and Steele, <i>The Java
* Language Specification</i>.
* <p>
* These are the ranges of Unicode characters that are considered digits:
* <table>
* <tr><td>0x0030 through 0x0039</td>
* <td>ISO-LATIN-1 digits ('0' through '9')</td></tr>
* <tr><td>0x0660 through 0x0669</td> <td>Arabic-Indic digits</td></tr>
* <tr><td>0x06F0 through 0x06F9</td>
* <td>Extended Arabic-Indic digits</td></tr>
* <tr><td>0x0966 through 0x096F</td> <td>Devanagari digits</td></tr>
* <tr><td>0x09E6 through 0x09EF</td> <td>Bengali digits</td></tr>
* <tr><td>0x0A66 through 0x0A6F</td> <td>Gurmukhi digits</td></tr>
* <tr><td>0x0AE6 through 0x0AEF</td> <td>Gujarati digits</td></tr>
* <tr><td>0x0B66 through 0x0B6F</td> <td>Oriya digits</td></tr>
* <tr><td>0x0BE7 through 0x0BEF</td> <td>Tamil digits</td></tr>
* <tr><td>0x0C66 through 0x0C6F</td> <td>Telugu digits</td></tr>
* <tr><td>0x0CE6 through 0x0CEF</td> <td>Kannada digits</td></tr>
* <tr><td>0x0D66 through 0x0D6F</td> <td>Malayalam digits</td></tr>
* <tr><td>0x0E50 through 0x0E59</td> <td>Thai digits</td></tr>
* <tr><td>0x0ED0 through 0x0ED9</td> <td>Lao digits</td></tr>
* <tr><td>0x0F20 through 0x0F29</td> <td>Tibetan digits</td></tr>
* <tr><td>0xFF10 through 0xFF19</td> <td>Fullwidth digits</td></tr>
* </table>
*
* @param ch the character to be tested.
* @return <code>true</code> if the character is a digit;
* <code>false</code> otherwise.
* @see java.lang.Character#digit(char, int)
* @see java.lang.Character#forDigit(int, int)
*/
public static boolean isDigit(final char ch) {
return (A[Y[(X[ch>>6]<<5)|((ch>>1)&0x1F)]|(ch&0x1)] & 0x1F) == DECIMAL_DIGIT_NUMBER;
}
/**
* The given character is mapped to its lowercase equivalent; if the
* character has no lowercase equivalent, the character itself is
* returned.
* <p>
* A character has a lowercase equivalent if and only if a lowercase
* mapping is specified for the character in the Unicode attribute
* table.
* <p>
* Note that some Unicode characters in the range
* <code>'\u2000'</code> to <code>'\u2FFF'</code> have lowercase
* mappings; this method does map such characters to their lowercase
* equivalents even though the method <code>isUpperCase</code> does
* not return <code>true</code> for such characters.
*
* @param ch the character to be converted.
* @return the lowercase equivalent of the character, if any;
* otherwise the character itself.
* @see java.lang.Character#isLowerCase(char)
* @see java.lang.Character#isUpperCase(char)
* @see java.lang.Character#toTitleCase(char)
* @see java.lang.Character#toUpperCase(char)
*/
public static char toLowerCase(final char ch) {
final int val = A[Y[(X[ch>>6]<<5)|((ch>>1)&0x1F)]|(ch&0x1)];
if ((val & 0x00200000) != 0) {
return (char)(ch + (val >> 22));
} else {
return ch;
}
}
public static char[] toLowerCase(final char[] ch) {
final char[] res = new char[ch.length];
for (int i = 0; i < ch.length; i++) {
res[i] = toLowerCase(ch[i]);
}
return res;
}
public static String toLowerCase(final String s) {
return new String(toLowerCase(s.toCharArray()));
}
}