/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.ja.util; import java.io.IOException; import java.util.HashMap; /** * Utility class for english translations of morphological data, * used only for debugging. */ public class ToStringUtil { // a translation map for parts of speech, only used for reflectWith private static final HashMap<String,String> posTranslations = new HashMap<>(); static { posTranslations.put("名詞", "noun"); posTranslations.put("名詞-一般", "noun-common"); posTranslations.put("名詞-固有名詞", "noun-proper"); posTranslations.put("名詞-固有名詞-一般", "noun-proper-misc"); posTranslations.put("名詞-固有名詞-人名", "noun-proper-person"); posTranslations.put("名詞-固有名詞-人名-一般", "noun-proper-person-misc"); posTranslations.put("名詞-固有名詞-人名-姓", "noun-proper-person-surname"); posTranslations.put("名詞-固有名詞-人名-名", "noun-proper-person-given_name"); posTranslations.put("名詞-固有名詞-組織", "noun-proper-organization"); posTranslations.put("名詞-固有名詞-地域", "noun-proper-place"); posTranslations.put("名詞-固有名詞-地域-一般", "noun-proper-place-misc"); posTranslations.put("名詞-固有名詞-地域-国", "noun-proper-place-country"); posTranslations.put("名詞-代名詞", "noun-pronoun"); posTranslations.put("名詞-代名詞-一般", "noun-pronoun-misc"); posTranslations.put("名詞-代名詞-縮約", "noun-pronoun-contraction"); posTranslations.put("名詞-副詞可能", "noun-adverbial"); posTranslations.put("名詞-サ変接続", "noun-verbal"); posTranslations.put("名詞-形容動詞語幹", "noun-adjective-base"); posTranslations.put("名詞-数", "noun-numeric"); posTranslations.put("名詞-非自立", "noun-affix"); posTranslations.put("名詞-非自立-一般", "noun-affix-misc"); posTranslations.put("名詞-非自立-副詞可能", "noun-affix-adverbial"); posTranslations.put("名詞-非自立-助動詞語幹", "noun-affix-aux"); posTranslations.put("名詞-非自立-形容動詞語幹", "noun-affix-adjective-base"); posTranslations.put("名詞-特殊", "noun-special"); posTranslations.put("名詞-特殊-助動詞語幹", "noun-special-aux"); posTranslations.put("名詞-接尾", "noun-suffix"); posTranslations.put("名詞-接尾-一般", "noun-suffix-misc"); posTranslations.put("名詞-接尾-人名", "noun-suffix-person"); posTranslations.put("名詞-接尾-地域", "noun-suffix-place"); posTranslations.put("名詞-接尾-サ変接続", "noun-suffix-verbal"); posTranslations.put("名詞-接尾-助動詞語幹", "noun-suffix-aux"); posTranslations.put("名詞-接尾-形容動詞語幹", "noun-suffix-adjective-base"); posTranslations.put("名詞-接尾-副詞可能", "noun-suffix-adverbial"); posTranslations.put("名詞-接尾-助数詞", "noun-suffix-classifier"); posTranslations.put("名詞-接尾-特殊", "noun-suffix-special"); posTranslations.put("名詞-接続詞的", "noun-suffix-conjunctive"); posTranslations.put("名詞-動詞非自立的", "noun-verbal_aux"); posTranslations.put("名詞-引用文字列", "noun-quotation"); posTranslations.put("名詞-ナイ形容詞語幹", "noun-nai_adjective"); posTranslations.put("接頭詞", "prefix"); posTranslations.put("接頭詞-名詞接続", "prefix-nominal"); posTranslations.put("接頭詞-動詞接続", "prefix-verbal"); posTranslations.put("接頭詞-形容詞接続", "prefix-adjectival"); posTranslations.put("接頭詞-数接続", "prefix-numerical"); posTranslations.put("動詞", "verb"); posTranslations.put("動詞-自立", "verb-main"); posTranslations.put("動詞-非自立", "verb-auxiliary"); posTranslations.put("動詞-接尾", "verb-suffix"); posTranslations.put("形容詞", "adjective"); posTranslations.put("形容詞-自立", "adjective-main"); posTranslations.put("形容詞-非自立", "adjective-auxiliary"); posTranslations.put("形容詞-接尾", "adjective-suffix"); posTranslations.put("副詞", "adverb"); posTranslations.put("副詞-一般", "adverb-misc"); posTranslations.put("副詞-助詞類接続", "adverb-particle_conjunction"); posTranslations.put("連体詞", "adnominal"); posTranslations.put("接続詞", "conjunction"); posTranslations.put("助詞", "particle"); posTranslations.put("助詞-格助詞", "particle-case"); posTranslations.put("助詞-格助詞-一般", "particle-case-misc"); posTranslations.put("助詞-格助詞-引用", "particle-case-quote"); posTranslations.put("助詞-格助詞-連語", "particle-case-compound"); posTranslations.put("助詞-接続助詞", "particle-conjunctive"); posTranslations.put("助詞-係助詞", "particle-dependency"); posTranslations.put("助詞-副助詞", "particle-adverbial"); posTranslations.put("助詞-間投助詞", "particle-interjective"); posTranslations.put("助詞-並立助詞", "particle-coordinate"); posTranslations.put("助詞-終助詞", "particle-final"); posTranslations.put("助詞-副助詞/並立助詞/終助詞", "particle-adverbial/conjunctive/final"); posTranslations.put("助詞-連体化", "particle-adnominalizer"); posTranslations.put("助詞-副詞化", "particle-adnominalizer"); posTranslations.put("助詞-特殊", "particle-special"); posTranslations.put("助動詞", "auxiliary-verb"); posTranslations.put("感動詞", "interjection"); posTranslations.put("記号", "symbol"); posTranslations.put("記号-一般", "symbol-misc"); posTranslations.put("記号-句点", "symbol-period"); posTranslations.put("記号-読点", "symbol-comma"); posTranslations.put("記号-空白", "symbol-space"); posTranslations.put("記号-括弧開", "symbol-open_bracket"); posTranslations.put("記号-括弧閉", "symbol-close_bracket"); posTranslations.put("記号-アルファベット", "symbol-alphabetic"); posTranslations.put("その他", "other"); posTranslations.put("その他-間投", "other-interjection"); posTranslations.put("フィラー", "filler"); posTranslations.put("非言語音", "non-verbal"); posTranslations.put("語断片", "fragment"); posTranslations.put("未知語", "unknown"); } /** * Get the english form of a POS tag */ public static String getPOSTranslation(String s) { return posTranslations.get(s); } // a translation map for inflection types, only used for reflectWith private static final HashMap<String,String> inflTypeTranslations = new HashMap<>(); static { inflTypeTranslations.put("*", "*"); inflTypeTranslations.put("形容詞・アウオ段", "adj-group-a-o-u"); inflTypeTranslations.put("形容詞・イ段", "adj-group-i"); inflTypeTranslations.put("形容詞・イイ", "adj-group-ii"); inflTypeTranslations.put("不変化型", "non-inflectional"); inflTypeTranslations.put("特殊・タ", "special-da"); inflTypeTranslations.put("特殊・ダ", "special-ta"); inflTypeTranslations.put("文語・ゴトシ", "classical-gotoshi"); inflTypeTranslations.put("特殊・ジャ", "special-ja"); inflTypeTranslations.put("特殊・ナイ", "special-nai"); inflTypeTranslations.put("五段・ラ行特殊", "5-row-cons-r-special"); inflTypeTranslations.put("特殊・ヌ", "special-nu"); inflTypeTranslations.put("文語・キ", "classical-ki"); inflTypeTranslations.put("特殊・タイ", "special-tai"); inflTypeTranslations.put("文語・ベシ", "classical-beshi"); inflTypeTranslations.put("特殊・ヤ", "special-ya"); inflTypeTranslations.put("文語・マジ", "classical-maji"); inflTypeTranslations.put("下二・タ行", "2-row-lower-cons-t"); inflTypeTranslations.put("特殊・デス", "special-desu"); inflTypeTranslations.put("特殊・マス", "special-masu"); inflTypeTranslations.put("五段・ラ行アル", "5-row-aru"); inflTypeTranslations.put("文語・ナリ", "classical-nari"); inflTypeTranslations.put("文語・リ", "classical-ri"); inflTypeTranslations.put("文語・ケリ", "classical-keri"); inflTypeTranslations.put("文語・ル", "classical-ru"); inflTypeTranslations.put("五段・カ行イ音便", "5-row-cons-k-i-onbin"); inflTypeTranslations.put("五段・サ行", "5-row-cons-s"); inflTypeTranslations.put("一段", "1-row"); inflTypeTranslations.put("五段・ワ行促音便", "5-row-cons-w-cons-onbin"); inflTypeTranslations.put("五段・マ行", "5-row-cons-m"); inflTypeTranslations.put("五段・タ行", "5-row-cons-t"); inflTypeTranslations.put("五段・ラ行", "5-row-cons-r"); inflTypeTranslations.put("サ変・−スル", "irregular-suffix-suru"); inflTypeTranslations.put("五段・ガ行", "5-row-cons-g"); inflTypeTranslations.put("サ変・−ズル", "irregular-suffix-zuru"); inflTypeTranslations.put("五段・バ行", "5-row-cons-b"); inflTypeTranslations.put("五段・ワ行ウ音便", "5-row-cons-w-u-onbin"); inflTypeTranslations.put("下二・ダ行", "2-row-lower-cons-d"); inflTypeTranslations.put("五段・カ行促音便ユク", "5-row-cons-k-cons-onbin-yuku"); inflTypeTranslations.put("上二・ダ行", "2-row-upper-cons-d"); inflTypeTranslations.put("五段・カ行促音便", "5-row-cons-k-cons-onbin"); inflTypeTranslations.put("一段・得ル", "1-row-eru"); inflTypeTranslations.put("四段・タ行", "4-row-cons-t"); inflTypeTranslations.put("五段・ナ行", "5-row-cons-n"); inflTypeTranslations.put("下二・ハ行", "2-row-lower-cons-h"); inflTypeTranslations.put("四段・ハ行", "4-row-cons-h"); inflTypeTranslations.put("四段・バ行", "4-row-cons-b"); inflTypeTranslations.put("サ変・スル", "irregular-suru"); inflTypeTranslations.put("上二・ハ行", "2-row-upper-cons-h"); inflTypeTranslations.put("下二・マ行", "2-row-lower-cons-m"); inflTypeTranslations.put("四段・サ行", "4-row-cons-s"); inflTypeTranslations.put("下二・ガ行", "2-row-lower-cons-g"); inflTypeTranslations.put("カ変・来ル", "kuru-kanji"); inflTypeTranslations.put("一段・クレル", "1-row-kureru"); inflTypeTranslations.put("下二・得", "2-row-lower-u"); inflTypeTranslations.put("カ変・クル", "kuru-kana"); inflTypeTranslations.put("ラ変", "irregular-cons-r"); inflTypeTranslations.put("下二・カ行", "2-row-lower-cons-k"); } /** * Get the english form of inflection type */ public static String getInflectionTypeTranslation(String s) { return inflTypeTranslations.get(s); } // a translation map for inflection forms, only used for reflectWith private static final HashMap<String,String> inflFormTranslations = new HashMap<>(); static { inflFormTranslations.put("*", "*"); inflFormTranslations.put("基本形", "base"); inflFormTranslations.put("文語基本形", "classical-base"); inflFormTranslations.put("未然ヌ接続", "imperfective-nu-connection"); inflFormTranslations.put("未然ウ接続", "imperfective-u-connection"); inflFormTranslations.put("連用タ接続", "conjunctive-ta-connection"); inflFormTranslations.put("連用テ接続", "conjunctive-te-connection"); inflFormTranslations.put("連用ゴザイ接続", "conjunctive-gozai-connection"); inflFormTranslations.put("体言接続", "uninflected-connection"); inflFormTranslations.put("仮定形", "subjunctive"); inflFormTranslations.put("命令e", "imperative-e"); inflFormTranslations.put("仮定縮約1", "conditional-contracted-1"); inflFormTranslations.put("仮定縮約2", "conditional-contracted-2"); inflFormTranslations.put("ガル接続", "garu-connection"); inflFormTranslations.put("未然形", "imperfective"); inflFormTranslations.put("連用形", "conjunctive"); inflFormTranslations.put("音便基本形", "onbin-base"); inflFormTranslations.put("連用デ接続", "conjunctive-de-connection"); inflFormTranslations.put("未然特殊", "imperfective-special"); inflFormTranslations.put("命令i", "imperative-i"); inflFormTranslations.put("連用ニ接続", "conjunctive-ni-connection"); inflFormTranslations.put("命令yo", "imperative-yo"); inflFormTranslations.put("体言接続特殊", "adnominal-special"); inflFormTranslations.put("命令ro", "imperative-ro"); inflFormTranslations.put("体言接続特殊2", "uninflected-special-connection-2"); inflFormTranslations.put("未然レル接続", "imperfective-reru-connection"); inflFormTranslations.put("現代基本形", "modern-base"); inflFormTranslations.put("基本形-促音便", "base-onbin"); // not sure about this } /** * Get the english form of inflected form */ public static String getInflectedFormTranslation(String s) { return inflFormTranslations.get(s); } /** * Romanize katakana with modified hepburn */ public static String getRomanization(String s) { StringBuilder out = new StringBuilder(); try { getRomanization(out, s); } catch (IOException bogus) { throw new RuntimeException(bogus); } return out.toString(); } /** * Romanize katakana with modified hepburn */ // TODO: now that this is used by readingsfilter and not just for // debugging, fix this to really be a scheme that works best with IMEs public static void getRomanization(Appendable builder, CharSequence s) throws IOException { final int len = s.length(); for (int i = 0; i < len; i++) { // maximum lookahead: 3 char ch = s.charAt(i); char ch2 = (i < len - 1) ? s.charAt(i + 1) : 0; char ch3 = (i < len - 2) ? s.charAt(i + 2) : 0; main: switch (ch) { case 'ッ': switch (ch2) { case 'カ': case 'キ': case 'ク': case 'ケ': case 'コ': builder.append('k'); break main; case 'サ': case 'シ': case 'ス': case 'セ': case 'ソ': builder.append('s'); break main; case 'タ': case 'チ': case 'ツ': case 'テ': case 'ト': builder.append('t'); break main; case 'パ': case 'ピ': case 'プ': case 'ペ': case 'ポ': builder.append('p'); break main; } break; case 'ア': builder.append('a'); break; case 'イ': if (ch2 == 'ィ') { builder.append("yi"); i++; } else if (ch2 == 'ェ') { builder.append("ye"); i++; } else { builder.append('i'); } break; case 'ウ': switch(ch2) { case 'ァ': builder.append("wa"); i++; break; case 'ィ': builder.append("wi"); i++; break; case 'ゥ': builder.append("wu"); i++; break; case 'ェ': builder.append("we"); i++; break; case 'ォ': builder.append("wo"); i++; break; case 'ュ': builder.append("wyu"); i++; break; default: builder.append('u'); break; } break; case 'エ': builder.append('e'); break; case 'オ': if (ch2 == 'ウ') { builder.append('ō'); i++; } else { builder.append('o'); } break; case 'カ': builder.append("ka"); break; case 'キ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("kyō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("kyū"); i += 2; } else if (ch2 == 'ャ') { builder.append("kya"); i++; } else if (ch2 == 'ョ') { builder.append("kyo"); i++; } else if (ch2 == 'ュ') { builder.append("kyu"); i++; } else if (ch2 == 'ェ') { builder.append("kye"); i++; } else { builder.append("ki"); } break; case 'ク': switch(ch2) { case 'ァ': builder.append("kwa"); i++; break; case 'ィ': builder.append("kwi"); i++; break; case 'ェ': builder.append("kwe"); i++; break; case 'ォ': builder.append("kwo"); i++; break; case 'ヮ': builder.append("kwa"); i++; break; default: builder.append("ku"); break; } break; case 'ケ': builder.append("ke"); break; case 'コ': if (ch2 == 'ウ') { builder.append("kō"); i++; } else { builder.append("ko"); } break; case 'サ': builder.append("sa"); break; case 'シ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("shō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("shū"); i += 2; } else if (ch2 == 'ャ') { builder.append("sha"); i++; } else if (ch2 == 'ョ') { builder.append("sho"); i++; } else if (ch2 == 'ュ') { builder.append("shu"); i++; } else if (ch2 == 'ェ') { builder.append("she"); i++; } else { builder.append("shi"); } break; case 'ス': if (ch2 == 'ィ') { builder.append("si"); i++; } else { builder.append("su"); } break; case 'セ': builder.append("se"); break; case 'ソ': if (ch2 == 'ウ') { builder.append("sō"); i++; } else { builder.append("so"); } break; case 'タ': builder.append("ta"); break; case 'チ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("chō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("chū"); i += 2; } else if (ch2 == 'ャ') { builder.append("cha"); i++; } else if (ch2 == 'ョ') { builder.append("cho"); i++; } else if (ch2 == 'ュ') { builder.append("chu"); i++; } else if (ch2 == 'ェ') { builder.append("che"); i++; } else { builder.append("chi"); } break; case 'ツ': if (ch2 == 'ァ') { builder.append("tsa"); i++; } else if (ch2 == 'ィ') { builder.append("tsi"); i++; } else if (ch2 == 'ェ') { builder.append("tse"); i++; } else if (ch2 == 'ォ') { builder.append("tso"); i++; } else if (ch2 == 'ュ') { builder.append("tsyu"); i++; } else { builder.append("tsu"); } break; case 'テ': if (ch2 == 'ィ') { builder.append("ti"); i++; } else if (ch2 == 'ゥ') { builder.append("tu"); i++; } else if (ch2 == 'ュ') { builder.append("tyu"); i++; } else { builder.append("te"); } break; case 'ト': if (ch2 == 'ウ') { builder.append("tō"); i++; } else if (ch2 == 'ゥ') { builder.append("tu"); i++; } else { builder.append("to"); } break; case 'ナ': builder.append("na"); break; case 'ニ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("nyō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("nyū"); i += 2; } else if (ch2 == 'ャ') { builder.append("nya"); i++; } else if (ch2 == 'ョ') { builder.append("nyo"); i++; } else if (ch2 == 'ュ') { builder.append("nyu"); i++; } else if (ch2 == 'ェ') { builder.append("nye"); i++; } else { builder.append("ni"); } break; case 'ヌ': builder.append("nu"); break; case 'ネ': builder.append("ne"); break; case 'ノ': if (ch2 == 'ウ') { builder.append("nō"); i++; } else { builder.append("no"); } break; case 'ハ': builder.append("ha"); break; case 'ヒ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("hyō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("hyū"); i += 2; } else if (ch2 == 'ャ') { builder.append("hya"); i++; } else if (ch2 == 'ョ') { builder.append("hyo"); i++; } else if (ch2 == 'ュ') { builder.append("hyu"); i++; } else if (ch2 == 'ェ') { builder.append("hye"); i++; } else { builder.append("hi"); } break; case 'フ': if (ch2 == 'ャ') { builder.append("fya"); i++; } else if (ch2 == 'ュ') { builder.append("fyu"); i++; } else if (ch2 == 'ィ' && ch3 == 'ェ') { builder.append("fye"); i+=2; } else if (ch2 == 'ョ') { builder.append("fyo"); i++; } else if (ch2 == 'ァ') { builder.append("fa"); i++; } else if (ch2 == 'ィ') { builder.append("fi"); i++; } else if (ch2 == 'ェ') { builder.append("fe"); i++; } else if (ch2 == 'ォ') { builder.append("fo"); i++; } else { builder.append("fu"); } break; case 'ヘ': builder.append("he"); break; case 'ホ': if (ch2 == 'ウ') { builder.append("hō"); i++; } else if (ch2 == 'ゥ') { builder.append("hu"); i++; } else { builder.append("ho"); } break; case 'マ': builder.append("ma"); break; case 'ミ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("myō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("myū"); i += 2; } else if (ch2 == 'ャ') { builder.append("mya"); i++; } else if (ch2 == 'ョ') { builder.append("myo"); i++; } else if (ch2 == 'ュ') { builder.append("myu"); i++; } else if (ch2 == 'ェ') { builder.append("mye"); i++; } else { builder.append("mi"); } break; case 'ム': builder.append("mu"); break; case 'メ': builder.append("me"); break; case 'モ': if (ch2 == 'ウ') { builder.append("mō"); i++; } else { builder.append("mo"); } break; case 'ヤ': builder.append("ya"); break; case 'ユ': builder.append("yu"); break; case 'ヨ': if (ch2 == 'ウ') { builder.append("yō"); i++; } else { builder.append("yo"); } break; case 'ラ': if (ch2 == '゜') { builder.append("la"); i++; } else { builder.append("ra"); } break; case 'リ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("ryō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("ryū"); i += 2; } else if (ch2 == 'ャ') { builder.append("rya"); i++; } else if (ch2 == 'ョ') { builder.append("ryo"); i++; } else if (ch2 == 'ュ') { builder.append("ryu"); i++; } else if (ch2 == 'ェ') { builder.append("rye"); i++; } else if (ch2 == '゜') { builder.append("li"); i++; } else { builder.append("ri"); } break; case 'ル': if (ch2 == '゜') { builder.append("lu"); i++; } else { builder.append("ru"); } break; case 'レ': if (ch2 == '゜') { builder.append("le"); i++; } else { builder.append("re"); } break; case 'ロ': if (ch2 == 'ウ') { builder.append("rō"); i++; } else if (ch2 == '゜') { builder.append("lo"); i++; } else { builder.append("ro"); } break; case 'ワ': builder.append("wa"); break; case 'ヰ': builder.append("i"); break; case 'ヱ': builder.append("e"); break; case 'ヲ': builder.append("o"); break; case 'ン': switch (ch2) { case 'バ': case 'ビ': case 'ブ': case 'ベ': case 'ボ': case 'パ': case 'ピ': case 'プ': case 'ペ': case 'ポ': case 'マ': case 'ミ': case 'ム': case 'メ': case 'モ': builder.append('m'); break main; case 'ヤ': case 'ユ': case 'ヨ': case 'ア': case 'イ': case 'ウ': case 'エ': case 'オ': builder.append("n'"); break main; default: builder.append("n"); break main; } case 'ガ': builder.append("ga"); break; case 'ギ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("gyō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("gyū"); i += 2; } else if (ch2 == 'ャ') { builder.append("gya"); i++; } else if (ch2 == 'ョ') { builder.append("gyo"); i++; } else if (ch2 == 'ュ') { builder.append("gyu"); i++; } else if (ch2 == 'ェ') { builder.append("gye"); i++; } else { builder.append("gi"); } break; case 'グ': switch(ch2) { case 'ァ': builder.append("gwa"); i++; break; case 'ィ': builder.append("gwi"); i++; break; case 'ェ': builder.append("gwe"); i++; break; case 'ォ': builder.append("gwo"); i++; break; case 'ヮ': builder.append("gwa"); i++; break; default: builder.append("gu"); break; } break; case 'ゲ': builder.append("ge"); break; case 'ゴ': if (ch2 == 'ウ') { builder.append("gō"); i++; } else { builder.append("go"); } break; case 'ザ': builder.append("za"); break; case 'ジ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("jō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("jū"); i += 2; } else if (ch2 == 'ャ') { builder.append("ja"); i++; } else if (ch2 == 'ョ') { builder.append("jo"); i++; } else if (ch2 == 'ュ') { builder.append("ju"); i++; } else if (ch2 == 'ェ') { builder.append("je"); i++; } else { builder.append("ji"); } break; case 'ズ': if (ch2 == 'ィ') { builder.append("zi"); i++; } else { builder.append("zu"); } break; case 'ゼ': builder.append("ze"); break; case 'ゾ': if (ch2 == 'ウ') { builder.append("zō"); i++; } else { builder.append("zo"); } break; case 'ダ': builder.append("da"); break; case 'ヂ': // TODO: investigate all this if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("jō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("jū"); i += 2; } else if (ch2 == 'ャ') { builder.append("ja"); i++; } else if (ch2 == 'ョ') { builder.append("jo"); i++; } else if (ch2 == 'ュ') { builder.append("ju"); i++; } else if (ch2 == 'ェ') { builder.append("je"); i++; } else { builder.append("ji"); } break; case 'ヅ': builder.append("zu"); break; case 'デ': if (ch2 == 'ィ') { builder.append("di"); i++; } else if (ch2 == 'ュ') { builder.append("dyu"); i++; } else { builder.append("de"); } break; case 'ド': if (ch2 == 'ウ') { builder.append("dō"); i++; } else if (ch2 == 'ゥ') { builder.append("du"); i++; } else { builder.append("do"); } break; case 'バ': builder.append("ba"); break; case 'ビ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("byō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("byū"); i += 2; } else if (ch2 == 'ャ') { builder.append("bya"); i++; } else if (ch2 == 'ョ') { builder.append("byo"); i++; } else if (ch2 == 'ュ') { builder.append("byu"); i++; } else if (ch2 == 'ェ') { builder.append("bye"); i++; } else { builder.append("bi"); } break; case 'ブ': builder.append("bu"); break; case 'ベ': builder.append("be"); break; case 'ボ': if (ch2 == 'ウ') { builder.append("bō"); i++; } else { builder.append("bo"); } break; case 'パ': builder.append("pa"); break; case 'ピ': if (ch2 == 'ョ' && ch3 == 'ウ') { builder.append("pyō"); i += 2; } else if (ch2 == 'ュ' && ch3 == 'ウ') { builder.append("pyū"); i += 2; } else if (ch2 == 'ャ') { builder.append("pya"); i++; } else if (ch2 == 'ョ') { builder.append("pyo"); i++; } else if (ch2 == 'ュ') { builder.append("pyu"); i++; } else if (ch2 == 'ェ') { builder.append("pye"); i++; } else { builder.append("pi"); } break; case 'プ': builder.append("pu"); break; case 'ペ': builder.append("pe"); break; case 'ポ': if (ch2 == 'ウ') { builder.append("pō"); i++; } else { builder.append("po"); } break; case 'ヷ': builder.append("va"); break; case 'ヸ': builder.append("vi"); break; case 'ヹ': builder.append("ve"); break; case 'ヺ': builder.append("vo"); break; case 'ヴ': if (ch2 == 'ィ' && ch3 == 'ェ') { builder.append("vye"); i+= 2; } else { builder.append('v'); } break; case 'ァ': builder.append('a'); break; case 'ィ': builder.append('i'); break; case 'ゥ': builder.append('u'); break; case 'ェ': builder.append('e'); break; case 'ォ': builder.append('o'); break; case 'ヮ': builder.append("wa"); break; case 'ャ': builder.append("ya"); break; case 'ュ': builder.append("yu"); break; case 'ョ': builder.append("yo"); break; case 'ー': break; default: builder.append(ch); } } } }