package edu.stanford.nlp.util;
/**
* Character-level utilities.
*
*
* @author Dan Klein
* @author Spence Green
*/
public final class Characters {
/** Only static methods */
private Characters() {}
// TODO(spenceg) This method used to cache the lookup, in this package,
// but actually the valueOf method performs internal caching. This method
// should be removed.
public static Character getCharacter(char c) {
return Character.valueOf(c);
}
/**
* Map a String to an array of type Character.
*
* @param s The String to map
* @return An array of Character
*/
public static Character[] asCharacterArray(String s) {
Character[] split = new Character[s.length()];
for (int i = 0; i < split.length; i++) {
split[i] = getCharacter(s.charAt(i));
}
return split;
}
/**
* Returns a string representation of a character's unicode
* block.
*
* @param c
* @return
*/
public static String unicodeBlockStringOf(char c) {
Character.Subset block = Character.UnicodeBlock.of(c);
return block == null ? "Undefined" : block.toString();
}
/**
* Returns true if a character is punctuation, and false
* otherwise.
*
* @param c
* @return
*/
public static boolean isPunctuation(char c) {
int cType = Character.getType(c);
return cType == Character.START_PUNCTUATION ||
cType == Character.END_PUNCTUATION ||
cType == Character.OTHER_PUNCTUATION ||
cType == Character.CONNECTOR_PUNCTUATION ||
cType == Character.DASH_PUNCTUATION ||
cType == Character.INITIAL_QUOTE_PUNCTUATION ||
cType == Character.FINAL_QUOTE_PUNCTUATION;
}
/**
* Returns true if a character is a symbol, and false
* otherwise.
*
* @param c
* @return
*/
public static boolean isSymbol(char c) {
int cType = Character.getType(c);
return cType == Character.MATH_SYMBOL ||
cType == Character.CURRENCY_SYMBOL ||
cType == Character.MODIFIER_SYMBOL ||
cType == Character.OTHER_SYMBOL;
}
/**
* Returns true if a character is a control character, and
* false otherwise.
*
* @param c
* @return
*/
public static boolean isControl(char c) {
return Character.getType(c) == Character.CONTROL;
}
}