/*
* Copyright 2011-2013 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.analysis.kr.utils;
import org.apache.lucene.analysis.kr.morph.AnalysisOutput;
import org.apache.lucene.analysis.kr.morph.MorphException;
import org.apache.lucene.analysis.kr.morph.PatternConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.List;
@SuppressWarnings( "unchecked" )
public class EomiUtil {
private static final Logger log = LoggerFactory.getLogger(EomiUtil.class);
public static final String RESULT_FAIL = "0";
public static final String RESULT_SUCCESS = "1";
public static final String[] verbSuffix = {
"이", "하", "되", "스럽", "스러우", "시키", "있", "없", "같", "당하", "만하", "드리", "받", "나", "내"
};
/**
* 가장 길이가 긴 어미를 분리한다.
*
* @throws org.apache.lucene.analysis.kr.morph.MorphException
*
*/
public static String[] longestEomi(final String term) throws MorphException {
log.trace("가장 길이가 긴 어미를 분리합니다. term=[{}]", term);
String[] result = new String[2];
result[0] = term;
String stem;
String eomi;
char[] efeature;
for (int i = term.length(); i > 0; i--) {
stem = term.substring(0, i);
if (i != term.length()) {
eomi = term.substring(i);
efeature = SyllableUtil.getFeature(eomi.charAt(0));
} else {
efeature = SyllableUtil.getFeature(stem.charAt(i - 1));
eomi = "";
}
if (SyllableUtil.isAlpanumeric(stem.charAt(i - 1))) break;
char[] jasos = MorphUtil.decompose(stem.charAt(i - 1));
if (!eomi.equals("") && !DictionaryUtil.existEomi(eomi)) {
// do not anything.
} else if (jasos.length > 2 &&
(jasos[2] == 'ㄴ' || jasos[2] == 'ㄹ' || jasos[2] == 'ㅁ' || jasos[2] == 'ㅂ') &&
DictionaryUtil.combineAndEomiCheck(jasos[2], eomi) != null) {
result[0] = Character.toString(MorphUtil.makeChar(stem.charAt(i - 1), 0));
if (i != 0) result[0] = stem.substring(0, i - 1) + result[0];
result[1] = Character.toString(jasos[2]);
} else if (i > 0 && (stem.endsWith("하") && "여".equals(eomi)) ||
(stem.endsWith("가") && "거라".equals(eomi)) ||
(stem.endsWith("오") && "너라".equals(eomi))) {
result[0] = stem;
result[1] = eomi;
} else if (jasos.length == 2 && (!stem.endsWith("아") && !stem.endsWith("어")) &&
(jasos[1] == 'ㅏ' || jasos[1] == 'ㅓ' || jasos[1] == 'ㅔ' || jasos[1] == 'ㅐ') &&
(DictionaryUtil.combineAndEomiCheck('어', eomi) != null)) {
// char[] chs = MorphUtil.decompose(stem.charAt(stem.length() - 1));
result[0] = stem;
result[1] = "어" + eomi;
} else if ((jasos[1] == 'ㅘ' || jasos[1] == 'ㅝ' || jasos[1] == 'ㅕ' || jasos[1] == 'ㅐ' || jasos[1] == 'ㅒ') &&
(DictionaryUtil.combineAndEomiCheck('어', eomi) != null)) {
String end = "";
if (jasos[1] == 'ㅘ')
end = MorphUtil.makeChar(stem.charAt(i - 1), 8, 0) + "아";
else if (jasos[1] == 'ㅝ')
end = MorphUtil.makeChar(stem.charAt(i - 1), 13, 0) + "어";
else if (jasos[1] == 'ㅕ')
end = Character.toString(MorphUtil.makeChar(stem.charAt(i - 1), 6, 0));
else if (jasos[1] == 'ㅐ')
end = MorphUtil.makeChar(stem.charAt(i - 1), 0, 0) + "어";
else if (jasos[1] == 'ㅒ')
end = MorphUtil.makeChar(stem.charAt(i - 1), 20, 0) + "애";
if (jasos.length == 3) {
end = end.substring(0, end.length() - 1) + MorphUtil.replaceJongsung(end.charAt(end.length() - 1), stem.charAt(i - 1));
}
if (stem.length() < 2) result[0] = end;
else result[0] = stem.substring(0, stem.length() - 1) + end;
result[1] = eomi;
} else if (efeature != null && efeature[SyllableUtil.IDX_EOMI1] != '0' &&
DictionaryUtil.existEomi(eomi)) {
if (!(((jasos.length == 2 && jasos[0] == 'ㄹ') || (jasos.length == 3 && jasos[2] == 'ㄹ')) && eomi.equals("러"))) { // ㄹ 불규칙은 예외
result[0] = stem;
result[1] = eomi;
}
}
if (efeature != null && efeature[SyllableUtil.IDX_EOMI2] == '0') break;
}
log.trace("가장 긴 어미를 분리했습니다. term=[{}], result=[{}]", term, StringUtil.join(result, ","));
return result;
}
/** 선어말어미를 분석한다. */
public static String[] splitPomi(String stem) throws MorphException {
log.trace("선어말어미를 분석합니다. stem=[{}]", stem);
// results[0]:성공(1)/실패(0), results[1]: 어근, results[2]: 선어말어미
String[] results = new String[2];
results[0] = stem;
if (stem == null || stem.length() == 0 || "있".equals(stem)) return results;
char[] chrs = stem.toCharArray();
int len = chrs.length;
String pomi = "";
int index = len - 1;
char[] jaso = MorphUtil.decompose(chrs[index]);
if (chrs[index] != '시' && chrs[index] != 'ㅆ' && jaso[jaso.length - 1] != 'ㅆ') return results; // 선어말어미가 발견되지 않았다
if (chrs[index] == '겠') {
pomi = "겠";
setPomiResult(results, stem.substring(0, index), pomi);
if (--index <= 0 ||
(chrs[index] != '시' && chrs[index] != 'ㅆ' && jaso[jaso.length - 1] != 'ㅆ'))
return results; // 다음이거나 선어말어미가 없다면...
jaso = MorphUtil.decompose(chrs[index]);
}
if (chrs[index] == '었') { // 시었, ㅆ었, 었
pomi = chrs[index] + pomi;
setPomiResult(results, stem.substring(0, index), pomi);
if (--index <= 0 ||
(chrs[index] != '시' && chrs[index] != 'ㅆ' && jaso[jaso.length - 1] != 'ㅆ'))
return results; // 다음이거나 선어말어미가 없다면...
jaso = MorphUtil.decompose(chrs[index]);
}
if (chrs[index] == '였') {
pomi = MorphUtil.replaceJongsung('어', chrs[index]) + pomi;
if (index > 0 && chrs[index - 1] == '하')
stem = stem.substring(0, index);
else
stem = stem.substring(0, index) + "이";
setPomiResult(results, stem, pomi);
} else if (chrs[index] == '셨') {
pomi = MorphUtil.replaceJongsung('어', chrs[index]) + pomi;
stem = stem.substring(0, index);
setPomiResult(results, stem, "시" + pomi);
} else if (chrs[index] == '았' || chrs[index] == '었') {
pomi = chrs[index] + pomi;
setPomiResult(results, stem.substring(0, index), pomi);
if (--index <= 0 ||
(chrs[index] != '시' && chrs[index] != '으')) return results; // 다음이거나 선어말어미가 없다면...
jaso = MorphUtil.decompose(chrs[index]);
} else if (jaso.length == 3 && jaso[2] == 'ㅆ') {
if (jaso[0] == 'ㅎ' && jaso[1] == 'ㅐ') {
pomi = MorphUtil.replaceJongsung('어', chrs[index]) + pomi;
stem = stem.substring(0, index) + "하";
} else if (jaso[0] != 'ㅇ' && (jaso[1] == 'ㅏ' || jaso[1] == 'ㅓ' || jaso[1] == 'ㅔ' || jaso[1] == 'ㅐ')) {
pomi = "었" + pomi;
stem = stem.substring(0, index) + MorphUtil.makeChar(chrs[index], 0);
} else if (jaso[0] != 'ㅇ' && (jaso[1] == 'ㅙ')) {
pomi = "었" + pomi;
stem = stem.substring(0, index) + MorphUtil.makeChar(chrs[index], 11, 0);
} else if (jaso[1] == 'ㅘ') {
pomi = MorphUtil.replaceJongsung('아', chrs[index]) + pomi;
stem = stem.substring(0, index) + MorphUtil.makeChar(chrs[index], 8, 0);
} else if (jaso[1] == 'ㅝ') {
pomi = MorphUtil.replaceJongsung('어', chrs[index]) + pomi;
stem = stem.substring(0, index) + MorphUtil.makeChar(chrs[index], 13, 0);
} else if (jaso[1] == 'ㅕ') {
pomi = MorphUtil.replaceJongsung('어', chrs[index]) + pomi;
stem = stem.substring(0, index) + MorphUtil.makeChar(chrs[index], 20, 0);
} else if (jaso[1] == 'ㅐ') {
pomi = MorphUtil.replaceJongsung('어', chrs[index]) + pomi;
stem = stem.substring(0, index);
} else if (jaso[1] == 'ㅒ') {
pomi = MorphUtil.replaceJongsung('애', chrs[index]) + pomi;
stem = stem.substring(0, index);
} else {
pomi = "었" + pomi;
}
setPomiResult(results, stem, pomi);
if (chrs[index] != '시' && chrs[index] != '으') return results; // 다음이거나 선어말어미가 없다면...
jaso = MorphUtil.decompose(chrs[index]);
}
char[] nChrs = null;
if (index > 0) nChrs = MorphUtil.decompose(chrs[index - 1]);
else nChrs = new char[2];
if (nChrs.length == 2 && chrs[index] == '시' && (chrs.length <= index + 1 ||
(chrs.length > index + 1 && chrs[index + 1] != '셨'))) {
if (DictionaryUtil.getWord(results[0]) != null) return results; //'시'가 포함된 단어가 있다. 성가시다/도시다/들쑤시다
pomi = chrs[index] + pomi;
setPomiResult(results, stem.substring(0, index), pomi);
if (--index == 0 || chrs[index] != '으') return results; // 다음이거나 선어말어미가 없다면...
jaso = MorphUtil.decompose(chrs[index]);
}
if (index > 0) nChrs = MorphUtil.decompose(chrs[index - 1]);
else nChrs = new char[2];
if (chrs.length > index + 1 && nChrs.length == 3 && (chrs[index + 1] == '셨' || chrs[index + 1] == '시') && chrs[index] == '으') {
pomi = chrs[index] + pomi;
setPomiResult(results, stem.substring(0, index), pomi);
}
log.trace("선어말어미를 분석합니다. stem=[{}], result=[{}]", stem, StringUtil.join(results, ","));
return results;
}
/**
* 불규칙 용언의 원형을 구한다.
*
* @throws org.apache.lucene.analysis.kr.morph.MorphException
*
*/
public static List<AnalysisOutput> irregular(AnalysisOutput output) throws MorphException {
List<AnalysisOutput> results = new ArrayList<AnalysisOutput>();
if (output.getStem() == null || output.getStem().length() == 0)
return results;
String ending = output.getEomi();
if (output.getPomi() != null) ending = output.getPomi();
List<String[]> irrs = new ArrayList<String[]>();
irregularStem(irrs, output.getStem(), ending);
irregularEnding(irrs, output.getStem(), ending);
irregularAO(irrs, output.getStem(), ending);
try {
for (String[] irr : irrs) {
AnalysisOutput result = output.clone();
result.setStem(irr[0]);
if (output.getPatn() == PatternConstants.PTN_VM) {
if (output.getPomi() == null) result.setEomi(irr[1]);
else result.setPomi(irr[1]);
}
results.add(result);
}
} catch (CloneNotSupportedException e) {
throw new MorphException(e.getMessage(), e);
}
return results;
}
/** 어간만 변하는 경우 */
private static void irregularStem(List<String[]> results, String stem, String ending) {
char feCh = ending.charAt(0);
char[] fechJaso = MorphUtil.decompose(feCh);
char ls = stem.charAt(stem.length() - 1);
char[] lsJaso = MorphUtil.decompose(ls);
if (feCh == '아' || feCh == '어' || feCh == '으') {
if (lsJaso[lsJaso.length - 1] == 'ㄹ') { // ㄷ 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 7)
, ending
, String.valueOf(PatternConstants.IRR_TYPE_DI) });
} else if (lsJaso.length == 2) { // ㅅ 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 19)
, ending
, String.valueOf(PatternConstants.IRR_TYPE_SI) });
}
}
if ((fechJaso[0] == 'ㄴ' || fechJaso[0] == 'ㄹ' || fechJaso[0] == 'ㅁ' || feCh == '오' || feCh == '시')
&& (ls == '우')) { // ㅂ 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 17)
, ending
, String.valueOf(PatternConstants.IRR_TYPE_BI) });
}
if ((fechJaso[0] == 'ㄴ' || fechJaso[0] == 'ㅂ' || fechJaso[0] == 'ㅅ' || feCh == '오')
&& (lsJaso.length == 2)) { // ㄹ 탈락
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 8)
, ending
, String.valueOf(PatternConstants.IRR_TYPE_LI) });
}
if (lsJaso.length == 2
&& (fechJaso[0] == 'ㄴ' || fechJaso[0] == 'ㄹ' || fechJaso[0] == 'ㅁ' || fechJaso[0] == 'ㅂ' ||
lsJaso[1] == 'ㅏ' || lsJaso[1] == 'ㅓ' || lsJaso[1] == 'ㅑ' || lsJaso[1] == 'ㅕ')
&& !"나".equals(stem)) { // ㅎ 불규칙, 그러나 [낳다]는 ㅎ 불규칙이 아니다.
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 27)
, ending
, String.valueOf(PatternConstants.IRR_TYPE_HI) });
}
}
/** 어미만 변하는 경우 */
private static void irregularEnding(List<String[]> results, String stem, String ending) {
if (ending.startsWith("ㅆ")) return;
char feCh = ending.charAt(0);
char ls = stem.charAt(stem.length() - 1);
if (feCh == '러' && ls == '르') { // '러' 불규칙
results.add(
new String[] { stem
, "어" + ending.substring(1)
, String.valueOf(PatternConstants.IRR_TYPE_RO) });
} else if ("라".equals(ending) && "가거".equals(stem)) { // '거라' 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 1)
, "어라"
, String.valueOf(PatternConstants.IRR_TYPE_GU) });
} else if ("라".equals(ending) && "오너".equals(stem)) { // '너라' 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 1)
, "어라"
, String.valueOf(PatternConstants.IRR_TYPE_NU) });
}
if ("여".equals(ending) && ls == '하') { // '여' 불규칙
results.add(
new String[] { stem
, "어"
, String.valueOf(PatternConstants.IRR_TYPE_NU) });
}
}
/** 어간과 어미가 모두 변하는 경우 */
private static void irregularAO(List<String[]> results, String stem, String ending) {
char ls = stem.charAt(stem.length() - 1);
char[] lsJaso = MorphUtil.decompose(ls);
if (lsJaso.length < 2) return;
if (lsJaso[1] == 'ㅘ') {
if (stem.endsWith("도와") || stem.endsWith("고와")) { // '곱다', '돕다'의 'ㅂ' 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 2) +
MorphUtil.makeChar(stem.charAt(stem.length() - 2), 17) // + 'ㅂ'
, makeTesnseEomi("아", ending)
, String.valueOf(PatternConstants.IRR_TYPE_BI) });
} else { // '와' 축약
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 8, 0) // 자음 + ㅗ
, makeTesnseEomi("아", ending)
, String.valueOf(PatternConstants.IRR_TYPE_WA) });
}
} else if (stem.endsWith("퍼")) {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 18, 0) // 자음 + -
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_WA) });
} else if (lsJaso[1] == 'ㅝ') {
if (stem.length() >= 2) // 'ㅂ' 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 2) +
MorphUtil.makeChar(stem.charAt(stem.length() - 2), 17) // + 'ㅂ'
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_BI) });
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 13, 0) // 자음 + ㅗ
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_WA) });
} else if (stem.length() >= 2 && ls == '라') {
char[] ns = MorphUtil.decompose(stem.charAt(stem.length() - 2));
if (ns.length == 3 && ns[2] == 'ㄹ') { // 르 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 2) +
MorphUtil.makeChar(stem.charAt(stem.length() - 2), 0) + "르"
, makeTesnseEomi("아", ending)
, String.valueOf(PatternConstants.IRR_TYPE_RO) });
}
} else if (stem.length() >= 2 && ls == '러') {
char[] ns = MorphUtil.decompose(stem.charAt(stem.length() - 2));
if (stem.charAt(stem.length() - 2) == '르') { // 러 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 1)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_LO) });
} else if (ns.length == 3 && ns[2] == 'ㄹ') { // 르 불규칙
results.add(
new String[] { stem.substring(0, stem.length() - 2) +
MorphUtil.makeChar(stem.charAt(stem.length() - 2), 0) + "르"
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_RO) });
}
} else if (stem.endsWith("펴") || stem.endsWith("켜")) {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 20, 0)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_EI) });
} else if (stem.endsWith("해")) {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 0, 0)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_EI) });
} else if (lsJaso.length == 2 && lsJaso[1] == 'ㅏ') {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 18, 0)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_UO) });
} else if (lsJaso.length == 2 && lsJaso[1] == 'ㅓ') {
// 으 탈락
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 18, 0)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_UO) });
// 아 불규칙
results.add(
new String[] { stem
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_AH) });
} else if (lsJaso[1] == 'ㅕ') {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 20, 0)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_EI) });
} else if (lsJaso[1] == 'ㅙ') {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 11, 0)
, makeTesnseEomi("어", ending)
, String.valueOf(PatternConstants.IRR_TYPE_OE) });
} else if (lsJaso[1] == 'ㅐ') {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 0, 27)
, makeTesnseEomi("아", ending)
, String.valueOf(PatternConstants.IRR_TYPE_HI) });
} else if (lsJaso[1] == 'ㅒ') {
results.add(
new String[] { stem.substring(0, stem.length() - 1) +
MorphUtil.makeChar(stem.charAt(stem.length() - 1), 2, 27)
, makeTesnseEomi("아", ending)
, String.valueOf(PatternConstants.IRR_TYPE_HI) });
}
}
/**
* 시제 선어미말을 만들어서 반환한다.
*
* @param preword '아' 또는 '어'
* @param endword 어미[선어미말을 포함]
* @return '았' 또는 '었'을 만들어서 반환한다.
*/
public static String makeTesnseEomi(String preword, String endword) {
if (preword == null || preword.length() == 0) return endword;
if (endword == null || endword.length() == 0) return preword;
if (endword.charAt(0) == 'ㅆ') {
return preword.substring(0, preword.length() - 1) +
MorphUtil.makeChar(preword.charAt(preword.length() - 1), 20) + endword.substring(1, endword.length());
} else if (endword.charAt(0) == 'ㄴ') {
return preword.substring(0, preword.length() - 1) +
MorphUtil.makeChar(preword.charAt(preword.length() - 1), 4) + endword.substring(1, endword.length());
} else if (endword.charAt(0) == 'ㄹ') {
return preword.substring(0, preword.length() - 1) +
MorphUtil.makeChar(preword.charAt(preword.length() - 1), 8) + endword.substring(1, endword.length());
} else if (endword.charAt(0) == 'ㅁ') {
return preword.substring(0, preword.length() - 1) +
MorphUtil.makeChar(preword.charAt(preword.length() - 1), 16) + endword.substring(1, endword.length());
} else if (endword.charAt(0) == 'ㅂ') {
return preword.substring(0, preword.length() - 1) +
MorphUtil.makeChar(preword.charAt(preword.length() - 1), 17) + endword.substring(1, endword.length());
}
return preword + endword;
}
/** '음/기' + '이' + 어미, '에서/부터/에서부터' + '이' + 어미 인지 조사한다. */
public static boolean endsWithEEomi(String stem) {
int len = stem.length();
if (len < 2 || !stem.endsWith("이")) return false;
char[] jasos = MorphUtil.decompose(stem.charAt(len - 2));
if (jasos.length == 3 && jasos[2] == 'ㅁ')
return true;
else {
int index = stem.lastIndexOf("기");
if (index == -1) index = stem.lastIndexOf("에서");
if (index == -1) index = stem.lastIndexOf("부터");
if (index == -1) return false;
}
return true;
}
private static void setPomiResult(String[] results, String stem, String pomi) {
results[0] = stem;
results[1] = pomi;
}
public static boolean IsNLMBSyl(char ech, char lch) throws MorphException {
char[] features = SyllableUtil.getFeature(ech);
switch (lch) {
case 'ㄴ':
return (features[SyllableUtil.IDX_YNPNA] == '1' || features[SyllableUtil.IDX_YNPLN] == '1');
case 'ㄹ':
return (features[SyllableUtil.IDX_YNPLA] == '1');
case 'ㅁ':
return (features[SyllableUtil.IDX_YNPMA] == '1');
case 'ㅂ':
return (features[SyllableUtil.IDX_YNPBA] == '1');
}
return false;
}
/**
* 어미를 분리한다.
* <p/>
* 1. 규칙용언과 어간만 바뀌는 불규칙 용언
* 2. 어미가 종성 'ㄴ/ㄹ/ㅁ/ㅂ'으로 시작되는 어절
* 3. '여/거라/너라'의 불규칙 어절
* 4. 어미 '아/어'가 탈락되는 어절
* 5. '아/어'의 변이체 분리
*
* @throws org.apache.lucene.analysis.kr.morph.MorphException
*
*/
public static String[] splitEomi(String stem, String end) throws MorphException {
log.trace("어미를 분리한다. stem=[{}], end=[{}]", stem, end);
String[] strs = new String[2];
int strlen = stem.length();
if (strlen == 0) return strs;
char estem = stem.charAt(strlen - 1);
char[] chrs = MorphUtil.decompose(estem);
if (chrs.length == 1) return strs; // 한글이 아니라면...
if ((chrs.length == 3) && (chrs[2] == 'ㄴ' || chrs[2] == 'ㄹ' || chrs[2] == 'ㅁ' || chrs[2] == 'ㅂ') &&
EomiUtil.IsNLMBSyl(estem, chrs[2]) &&
DictionaryUtil.combineAndEomiCheck(chrs[2], end) != null) {
strs[1] = Character.toString(chrs[2]);
if (end.length() > 0) strs[1] += end;
strs[0] = stem.substring(0, strlen - 1) + MorphUtil.makeChar(estem, 0);
} else if (estem == '해' && DictionaryUtil.existEomi("어" + end)) {
strs[0] = stem.substring(0, strlen - 1) + "하";
strs[1] = "어" + end;
} else if (estem == '히' && DictionaryUtil.existEomi("이" + end)) {
strs[0] = stem.substring(0, strlen - 1) + "하";
strs[1] = "이" + end;
} else if (chrs[0] != 'ㅇ' &&
(chrs[1] == 'ㅏ' || chrs[1] == 'ㅓ' || chrs[1] == 'ㅔ' || chrs[1] == 'ㅐ') &&
(chrs.length == 2 || SyllableUtil.getFeature(estem)[SyllableUtil.IDX_YNPAH] == '1') &&
(DictionaryUtil.combineAndEomiCheck('어', end) != null)) {
strs[0] = stem;
if (chrs.length == 2) strs[1] = "어" + end;
else strs[1] = end;
} else if (stem.endsWith("하") && "여".equals(end)) {
strs[0] = stem;
strs[1] = "어";
} else if ((chrs.length == 2) && (chrs[1] == 'ㅘ' || chrs[1] == 'ㅙ' || chrs[1] == 'ㅝ' || chrs[1] == 'ㅕ' || chrs[1] == 'ㅐ' || chrs[1] == 'ㅒ') &&
(DictionaryUtil.combineAndEomiCheck('어', end) != null)) {
StringBuilder sb = new StringBuilder();
if (strlen > 1) sb.append(stem.substring(0, strlen - 1));
if (chrs[1] == 'ㅘ')
sb.append(MorphUtil.makeChar(estem, 8, 0)).append(MorphUtil.replaceJongsung('아', estem));
else if (chrs[1] == 'ㅝ')
sb.append(MorphUtil.makeChar(estem, 13, 0)).append(MorphUtil.replaceJongsung('어', estem));
else if (chrs[1] == 'ㅙ')
sb.append(MorphUtil.makeChar(estem, 11, 0)).append(MorphUtil.replaceJongsung('어', estem));
else if (chrs[1] == 'ㅕ')
sb.append(Character.toString(MorphUtil.makeChar(estem, 20, 0))).append(MorphUtil.replaceJongsung('어', estem));
else if (chrs[1] == 'ㅐ')
sb.append(MorphUtil.makeChar(estem, 0, 0)).append(MorphUtil.replaceJongsung('어', estem));
else if (chrs[1] == 'ㅒ')
sb.append(MorphUtil.makeChar(estem, 20, 0)).append(MorphUtil.replaceJongsung('애', estem));
strs[0] = sb.toString();
end = strs[0].substring(strs[0].length() - 1) + end;
strs[0] = strs[0].substring(0, strs[0].length() - 1);
strs[1] = end;
} else if (!"".equals(end) && DictionaryUtil.existEomi(end)) {
strs = new String[] { stem, end };
}
log.trace("어미를 분리했습니다. stem=[{}], end=[{}], strs=[{}]", stem, end, StringUtil.join(strs, ","));
return strs;
}
}