/*
* Copyright (C) 2011 WebSquared Inc. http://websqrd.com
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
*/
package org.fastcatsearch.util.lang.ko;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class HangulUtil {
private static Logger logger = LoggerFactory.getLogger(HangulUtil.class);
public static final String CHOSUNG_LIST = "ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎ"; //19
public static final String JUNGSUNG_LIST = "ㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ"; //21
public static final String JONGSUNG_LIST = " ㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ"; //28
private static final char unicodeHangulBase = '\uAC00';
private static final char unicodeHangulLast = '\uD7A3';
public static String decomposeHangul(String keyword) {
StringBuffer decomposed = new StringBuffer();
for (int i = 0; i < keyword.length(); i++) {
char ch = keyword.charAt(i);
if(Character.isWhitespace(ch)){
continue;
}else if (ch < unicodeHangulBase || ch > unicodeHangulLast) {
decomposed.append(ch);
} else {
int unicode = ch - unicodeHangulBase;
int choSung = unicode / (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
unicode = unicode % (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
int jungSung = unicode / JONGSUNG_LIST.length();
int jongSung = unicode % JONGSUNG_LIST.length();
decomposed.append(CHOSUNG_LIST.charAt(choSung));
if(jungSung >= 0){
decomposed.append(JUNGSUNG_LIST.charAt(jungSung));
}
//종성이 없는 경우는 무시. 0포함안함.
if(jongSung > 0){
decomposed.append(JONGSUNG_LIST.charAt(jongSung));
}
}
}
return decomposed.toString();
}
// 초중종성의 조합을 만든다.
public static String makeHangulPrefix(String keyword, char delimiter) {
StringBuffer candidate = new StringBuffer();
StringBuffer prefix = new StringBuffer();
for (int i = 0; i < keyword.length(); i++) {
char ch = keyword.charAt(i);
if (ch < unicodeHangulBase || ch > unicodeHangulLast) {
prefix.append(ch);
candidate.append(prefix);
} else {
// Character is composed of {Chosung+Jungsung} OR
// {Chosung+Jungsung+Jongsung}
int unicode = ch - unicodeHangulBase;
int choSung = unicode / (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
// 1. add prefix+chosung
candidate.append(prefix);
candidate.append(CHOSUNG_LIST.charAt(choSung));
candidate.append(delimiter);
// 2. add prefix+chosung+jungsung
unicode = unicode % (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
int jongSung = unicode % JONGSUNG_LIST.length();
char choJung = (char) (ch - jongSung);
candidate.append(prefix);
candidate.append(choJung);
// change prefix
prefix.append(ch);
if (jongSung > 0) {
candidate.append(delimiter);
// 3. add whole character
candidate.append(prefix);
}
}
if (i < keyword.length() - 1)
candidate.append(delimiter);
}
return candidate.toString();
}
// suffix만들기.
public static String makeHangulSuffix(String keyword, char delimiter) {
StringBuffer candidate = new StringBuffer();
for (int i = 1; i < keyword.length(); i++) {
candidate.append(keyword.substring(i));
candidate.append(delimiter);
}
return candidate.toString();
}
// 초성검색
public static String makeHangulChosung(String keyword, char delimiter) {
StringBuffer candidate = new StringBuffer();
StringBuffer prefix = new StringBuffer();
for (int i = 0; i < keyword.length(); i++) {
char ch = keyword.charAt(i);
if (ch >= unicodeHangulBase && ch <= unicodeHangulLast) {
int unicode = ch - unicodeHangulBase;
int choSung = unicode / (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
candidate.append(prefix);
candidate.append(CHOSUNG_LIST.charAt(choSung));
if (i < keyword.length() - 1) {
candidate.append(delimiter);
}
prefix.append(CHOSUNG_LIST.charAt(choSung));
}
}
return candidate.toString();
}
public static char mergeJaso(String choSung, String jungSung, String jongSung) {
int choSungPos = CHOSUNG_LIST.indexOf(choSung);
int jungSungPos = JUNGSUNG_LIST.indexOf(jungSung);
int jongSungPos = JONGSUNG_LIST.indexOf(jongSung);
int unicode = unicodeHangulBase + (choSungPos * JUNGSUNG_LIST.length() + jungSungPos) * JONGSUNG_LIST.length() + jongSungPos;
return (char) unicode;
}
public static HangulInfo devideJaso(char hanChar) {
HangulInfo hangulInfo = new HangulInfo();
// if character is not Hangul
if (hanChar < unicodeHangulBase || hanChar > unicodeHangulLast) {
hangulInfo.isHangul = false;
hangulInfo.hanChar = hanChar;
hangulInfo.hanParts = null;
} else {
int unicode = hanChar - unicodeHangulBase;
int choSung = unicode / (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
unicode = unicode % (JUNGSUNG_LIST.length() * JONGSUNG_LIST.length());
int jungSung = unicode / JONGSUNG_LIST.length();
int jongSung = unicode % JONGSUNG_LIST.length();
hangulInfo.isHangul = true;
hangulInfo.hanChar = hanChar;
hangulInfo.hanParts = new char[] { CHOSUNG_LIST.charAt(choSung), JUNGSUNG_LIST.charAt(jungSung), JONGSUNG_LIST.charAt(jongSung) };
}
return hangulInfo;
}
public static class HangulInfo {
public boolean isHangul;
public char hanChar;
public char[] hanParts;
public String toString() {
String parts = "";
if (hanParts != null) {
for (int i = 0; i < hanParts.length; i++) {
parts += hanParts[i];
if (i < hanParts.length - 1)
parts += ",";
}
}
return "[HangulInfo]" + isHangul + ":" + hanChar + ":" + parts;
}
}
}