package moviescraper.doctord.controller.languagetranslation;
/**
* JapaneseCharacter contains static functions to do various tests
* on characters to determine if it is one of the various types of
* characters used in the japanese writing system.
* <p/>
* There are also a functions to translate between Katakana, Hiragana,
* and Romaji.
*
* @author Duane J. May <djmay@mayhoo.com>
* @version $Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $
* @since 10:37 AM - 6/3/14
*
* @see <a href="http://sourceforge.net/projects/kanjixml/">http://sourceforge.net/projects/kanjixml/</a>
*/
public class JapaneseCharacter {
/**
* Version information
*/
@SuppressWarnings("unused")
private final static String VERSION =
"$Id: JapaneseCharacter.java,v 1.2 2002/04/20 18:10:24 djmay Exp $";
/**
* Determines if this character is a Japanese Kana.
*/
public static boolean isKana(char c) {
return (isHiragana(c) || isKatakana(c));
}
/**
* Determines if this character is one of the Japanese Hiragana.
*/
public static boolean isHiragana(char c) {
return (('\u3041' <= c) && (c <= '\u309e'));
}
/**
* Determines if this character is one of the Japanese Katakana.
*/
public static boolean isKatakana(char c) {
return (isHalfWidthKatakana(c) || isFullWidthKatakana(c));
}
/**
* Determines if this character is a Half width Katakana.
*/
public static boolean isHalfWidthKatakana(char c) {
return (('\uff66' <= c) && (c <= '\uff9d'));
}
/**
* Determines if this character is a Full width Katakana.
*/
public static boolean isFullWidthKatakana(char c) {
return (('\u30a1' <= c) && (c <= '\u30fe'));
}
/**
* Determines if this character is a Kanji character.
*/
public static boolean isKanji(char c) {
if (('\u4e00' <= c) && (c <= '\u9fa5')) {
return true;
}
if (('\u3005' <= c) && (c <= '\u3007')) {
return true;
}
return false;
}
/**
* Determines if this character could be used as part of
* a romaji character.
*/
public static boolean isRomaji(char c) {
if (('\u0041' <= c) && (c <= '\u0090'))
return true;
else if (('\u0061' <= c) && (c <= '\u007a'))
return true;
else if (('\u0021' <= c) && (c <= '\u003a'))
return true;
else if (('\u0041' <= c) && (c <= '\u005a'))
return true;
else
return false;
}
/**
* Translates this character into the equivalent Katakana character.
* The function only operates on Hiragana and always returns the
* Full width version of the Katakana. If the character is outside the
* Hiragana then the origianal character is returned.
*/
public static char toKatakana(char c) {
if (isHiragana(c)) {
return (char) (c + 0x60);
}
return c;
}
/**
* Translates this character into the equivalent Hiragana character.
* The function only operates on Katakana characters
* If the character is outside the Full width or Half width
* Katakana then the origianal character is returned.
*/
public static char toHiragana(char c) {
if (isFullWidthKatakana(c)) {
return (char) (c - 0x60);
} else if (isHalfWidthKatakana(c)) {
return (char) (c - 0xcf25);
}
return c;
}
/**
* Translates this character into the equivalent Romaji character.
* The function only operates on Hiragana and Katakana characters
* If the character is outside the given range then
* the origianal character is returned.
* <p/>
* The resulting string is lowercase if the input was Hiragana and
* UPPERCASE if the input was Katakana.
*/
public static String toRomaji(char c) {
if (isHiragana(c)) {
return lookupRomaji(c);
} else if (isKatakana(c)) {
c = toHiragana(c);
String str = lookupRomaji(c);
return str.toUpperCase();
}
return String.valueOf(c);
}
/**
* The array used to map hirgana to romaji.
*/
protected static String romaji[] = {
"a", "a",
"i", "i",
"u", "u",
"e", "e",
"o", "o",
"ka", "ga",
"ki", "gi",
"ku", "gu",
"ke", "ge",
"ko", "go",
"sa", "za",
"shi", "ji",
"su", "zu",
"se", "ze",
"so", "zo",
"ta", "da",
"chi", "ji",
"tsu", "tsu", "zu",
"te", "de",
"to", "do",
"na",
"ni",
"nu",
"ne",
"no",
"ha", "ba", "pa",
"hi", "bi", "pi",
"fu", "bu", "pu",
"he", "be", "pe",
"ho", "bo", "po",
"ma",
"mi",
"mu",
"me",
"mo",
"a", "ya",
"u", "yu",
"o", "yo",
"ra",
"ri",
"ru",
"re",
"ro",
"wa", "wa",
"wi", "we",
"o",
"n",
"v",
"ka",
"ke"
};
/**
* Access the array to return the correct romaji string.
*/
private static String lookupRomaji(char c) {
return romaji[c - 0x3041];
}
public static String convertToRomaji(String input)
{
if (input == null || input.length() == 0)
return "";
StringBuilder out = new StringBuilder();
for (int i = 0; i < input.length(); i++)
{
char ch = input.charAt(i);
if(JapaneseCharacter.isHiragana(ch))
out.append(JapaneseCharacter.toRomaji(ch));
else if(JapaneseCharacter.isKatakana(ch))
out.append(JapaneseCharacter.toRomaji(ch));
else return null;
}
return out.toString();
}
/**
* Returns true if one or more letters in the word are katakana, kanji, or hiragana
*/
public static boolean containsJapaneseLetter(String word)
{
for(int i = 0; i < word.length(); i++)
{
char currentChar = word.charAt(i);
if(JapaneseCharacter.isHiragana(currentChar) || JapaneseCharacter.isKanji(currentChar)
|| JapaneseCharacter.isKatakana(currentChar))
{
return true;
}
}
return false;
}
}