// $Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
// $Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
package ee.esutoniagodesu.util.lang.lingv;
/**
* JapaneseCharacter contains static functions to do various tests
* on characters to determine if it is one of the various types of
* characters used in the japanese writing system.
* <p/>
* There are also a functions to translate between Katakana, Hiragana,
* and Romaji.
*
* @author Duane J. May <djmay@mayhoo.com>
* @version $Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
*/
public final class JapaneseCharacter {
/**
* Version information
*/
private final static String VERSION =
"$Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $";
/**
* Determines if this character is a Japanese Kana.
*/
public static boolean isKana(char c) {
return (isHiragana(c) || isKatakana(c));
}
/**
* Determines if this character is one of the Japanese Hiragana.
*/
public static boolean isHiragana(char c) {
return (('\u3041' <= c) && (c <= '\u309e'));
}
/**
* Determines if this character is one of the Japanese Katakana.
*/
public static boolean isKatakana(char c) {
return (isHalfwidthKatakana(c) || isFullwidthKatakana(c));
}
/**
* Determines if this character is a Half width Katakana.
*/
public static boolean isHalfwidthKatakana(char c) {
return (('\uff66' <= c) && (c <= '\uff9d'));
}
/**
* Determines if this character is a Full width Katakana.
*/
public static boolean isFullwidthKatakana(char c) {
return (('\u30a1' <= c) && (c <= '\u30fe'));
}
/**
* Determines if this character is a Kanji character.
*/
public static boolean isKanji(char c) {
if (('\u4e00' <= c) && (c <= '\u9fa5')) {
return true;
}
if (('\u3005' <= c) && (c <= '\u3007')) {
return true;
}
return false;
}
/**
* Determines if this character could be used as part of
* a romaji character.
*/
public static boolean isRomaji(char c) {
if (('\u0041' <= c) && (c <= '\u0090'))
return true;
else if (('\u0061' <= c) && (c <= '\u007a'))
return true;
else if (('\u0021' <= c) && (c <= '\u003a'))
return true;
else if (('\u0041' <= c) && (c <= '\u005a'))
return true;
else
return false;
}
/**
* Translates this character into the equivalent Katakana character.
* The function only operates on Hiragana and always returns the
* Full width version of the Katakana. If the character is outside the
* Hiragana then the origianal character is returned.
*/
public static char toKatakana(char c) {
if (isHiragana(c)) {
return (char) (c + 0x60);
}
return c;
}
/**
* Translates this character into the equivalent Hiragana character.
* The function only operates on Katakana characters
* If the character is outside the Full width or Half width
* Katakana then the origianal character is returned.
*/
public static char toHiragana(char c) {
if (isFullwidthKatakana(c)) {
return (char) (c - 0x60);
} else if (isHalfwidthKatakana(c)) {
return (char) (c - 0xcf25);
}
return c;
}
/**
* Translates this character into the equivalent Romaji character.
* The function only operates on Hiragana and Katakana characters
* If the character is outside the given range then
* the origianal character is returned.
* <p/>
* The resulting string is lowercase if the input was Hiragana and
* UPPERCASE if the input was Katakana.
*/
public static String toRomaji(char c) {
if (isHiragana(c)) {
return lookupRomaji(c);
} else if (isKatakana(c)) {
c = toHiragana(c);
String str = lookupRomaji(c);
return str.toUpperCase();
}
return String.valueOf(c);
}
/**
* The array used to map hirgana to romaji.
*/
protected static String romaji[] = {
"a", "a",
"i", "i",
"u", "u",
"e", "e",
"o", "o",
"ka", "ga",
"ki", "gi",
"ku", "gu",
"ke", "ge",
"ko", "go",
"sa", "za",
"shi", "ji",
"su", "zu",
"se", "ze",
"so", "zo",
"ta", "da",
"chi", "ji",
"tsu", "tsu", "zu",
"te", "de",
"to", "do",
"na",
"ni",
"nu",
"ne",
"no",
"ha", "ba", "pa",
"hi", "bi", "pi",
"fu", "bu", "pu",
"he", "be", "pe",
"ho", "bo", "po",
"ma",
"mi",
"mu",
"me",
"mo",
"a", "ya",
"u", "yu",
"o", "yo",
"ra",
"ri",
"ru",
"re",
"ro",
"wa", "wa",
"wi", "we",
"o",
"n",
"v",
"ka",
"ke"
};
/**
* Access the array to return the correct romaji string.
*/
private static String lookupRomaji(char c) {
return romaji[c - 0x3041];
}
/**
* @return hiraganast katakanasse või vastupidi
*/
public static String convertKana(String input) {
if (input == null || input.length() == 0) return "";
StringBuilder out = new StringBuilder();
char ch = input.charAt(0);
if (JapaneseCharacter.isHiragana(ch)) { // convert to hiragana to katakana
for (int i = 0; i < input.length(); i++) {
out.append(JapaneseCharacter.toKatakana(input.charAt(i)));
}
} else if (JapaneseCharacter.isKatakana(ch)) { // convert to katakana to hiragana
for (int i = 0; i < input.length(); i++) {
out.append(JapaneseCharacter.toHiragana(input.charAt(i)));
}
} else { // do nothing if neither
return input;
}
return out.toString();
}
}