/* * Copyright 2011-2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.kr.utils; import org.apache.lucene.analysis.kr.morph.MorphException; import org.apache.lucene.analysis.kr.morph.WordEntry; /** * 동사의 불규칙 변형을 처리하는 Utility Class * * @author S.M.Lee */ public class IrregularUtil { // ㅂ 불규칙 public static final char IRR_TYPE_BIUP = 'B'; // ㅎ 불규칙 public static final char IRR_TYPE_HIOOT = 'H'; // ㄹ 불규칙 public static final char IRR_TYPE_LIUL = 'U'; // 르 불규칙 public static final char IRR_TYPE_LOO = 'L'; // ㅅ 불규칙 public static final char IRR_TYPE_SIUT = 'S'; // ㄷ 불규칙 public static final char IRR_TYPE_DI = 'D'; // 러 불규칙 public static final char IRR_TYPE_RU = 'R'; // 으 탈락 public static final char IRR_TYPE_UI = 'X'; // 규칙형 public static final char IRR_TYPE_REGULAR = 'X'; public static String[] restoreIrregularVerb(String start, String end) throws MorphException { if (end == null) end = ""; char[] jasos = new char[0]; if (end.length() > 0) jasos = MorphUtil.decompose(end.charAt(0)); if (end.startsWith("ㄴ")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; irrs = restoreHIrregular(start, end); if (irrs != null) return irrs; irrs = restoreELIrregular(start, end); if (irrs != null) return irrs; } else if (end.startsWith("ㄹ")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; irrs = restoreHIrregular(start, end); if (irrs != null) return irrs; irrs = restoreELIrregular(start, end); if (irrs != null) return irrs; } else if (end.startsWith("ㅁ")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; irrs = restoreHIrregular(start, end); if (irrs != null) return irrs; } else if (end.startsWith("ㅂ")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; irrs = restoreHIrregular(start, end); if (irrs != null) return irrs; irrs = restoreELIrregular(start, end); if (irrs != null) return irrs; } else if (start.endsWith("우") || start.endsWith("오")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; } else if (end.startsWith("오")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; } else if (end.startsWith("시")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; irrs = restoreELIrregular(start, end); if (irrs != null) return irrs; } else if (end.startsWith("으")) { String[] irrs = restoreBIrregular(start, end); if (irrs != null) return irrs; } else if (jasos.length > 1 && jasos[0] == 'ㅇ' && (jasos[1] == 'ㅓ' || jasos[1] == 'ㅏ')) { String[] irrs = restoreDIrregular(start, end); if (irrs != null) return irrs; irrs = restoreSIrregular(start, end); if (irrs != null) return irrs; irrs = restoreLIrregular(start, end); if (irrs != null) return irrs; irrs = restoreHIrregular(start, end); if (irrs != null) return irrs; irrs = restoreUIrregular(start, end); if (irrs != null) return irrs; irrs = restoreRUIrregular(start, end); if (irrs != null) return irrs; } else if (jasos.length > 1 && jasos[0] == 'ㅇ' && jasos[1] == 'ㅡ') { String[] irrs = restoreDIrregular(start, end); if (irrs != null) return irrs; irrs = restoreSIrregular(start, end); if (irrs != null) return irrs; } else if (("가".equals(start) && "거라".equals(end)) || ("오".equals(start) && "너라".equals(end))) { return new String[] { start, end }; } return null; } /** * ㅂ 불규칙 원형을 복원한다. (돕다, 곱다) * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreBIrregular(String start, String end) throws MorphException { if (start == null || "".equals(start) || end == null) return null; if (start.length() < 2) return null; if (!(start.endsWith("오") || start.endsWith("우"))) return null; char convEnd = MorphUtil.makeChar(end.charAt(0), 0); if ("ㅁ".equals(end) || "ㄴ".equals(end) || "ㄹ".equals(end) || convEnd == '아' || convEnd == '어') { // 도우(돕), 고오(곱), 스러우(스럽) 등으로 변형되므로 반드시 2자 이상임 char ch = start.charAt(start.length() - 2); ch = MorphUtil.makeChar(ch, 17); if (start.length() > 2) start = Utilities.arrayToString(new String[] { start.substring(0, start.length() - 2), Character.toString(ch) }); else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null && entry.getFeature(WordEntry.IDX_REGURA) == IRR_TYPE_BIUP) return new String[] { start, end }; } return null; } /** * ㄷ 불규칙 원형을 복원한다. (깨닫다, 묻다) * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreDIrregular(String start, String end) throws MorphException { if (start == null || "".equals(start)) return null; char ch = start.charAt(start.length() - 1); char[] jasos = MorphUtil.decompose(ch); if (jasos.length != 3 || jasos[2] != 'ㄹ') return null; ch = MorphUtil.makeChar(ch, 7); if (start.length() > 1) start = Utilities.arrayToString(new String[] { start.substring(0, start.length() - 1), Character.toString(ch) }); else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null && entry.getFeature(WordEntry.IDX_REGURA) == IRR_TYPE_DI) return new String[] { start, end }; return null; } /** * ㅅ 불규칙 원형을 복원한다. (긋다--그어) * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreSIrregular(String start, String end) throws MorphException { if (start == null || "".equals(start)) return null; char ch = start.charAt(start.length() - 1); char[] jasos = MorphUtil.decompose(ch); if (jasos.length != 2) return null; ch = MorphUtil.makeChar(ch, 19); if (start.length() > 1) start = start.substring(0, start.length() - 1) + ch; else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null && entry.getFeature(WordEntry.IDX_REGURA) == IRR_TYPE_SIUT) return new String[] { start, end }; return null; } /** * 르 불규칙 원형을 복원한다. (흐르다-->흘러) * "따르다"는 ㄹ불규칙이 아니지만.. 인 것처럼 처리한다. * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreLIrregular(String start, String end) throws MorphException { if (start.length() < 2) return null; char ch1 = start.charAt(start.length() - 2); char ch2 = start.charAt(start.length() - 1); char[] jasos1 = MorphUtil.decompose(ch1); if (((jasos1.length == 3 && jasos1[2] == 'ㄹ') || jasos1.length == 2) && (ch2 == '러' || ch2 == '라')) { StringBuilder sb = new StringBuilder(); ch1 = MorphUtil.makeChar(ch1, 0); if (start.length() > 2) sb.append(start.substring(0, start.length() - 2)).append(ch1).append("르"); else sb.append(Character.toString(ch1)).append("르"); WordEntry entry = DictionaryUtil.getVerb(sb.toString()); if (entry != null && entry.getFeature(WordEntry.IDX_REGURA) == IRR_TYPE_LOO) return new String[] { sb.toString(), end }; } return null; } /** * ㄹ불규칙 원형을 복원한다. (길다-->긴, 알다-->안) * 어간의 끝소리인 ‘ㄹ’이 ‘ㄴ’, ‘ㄹ’, ‘ㅂ’, ‘오’, ‘시’ 앞에서 탈락하는 활용의 형식 * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreELIrregular(String start, String end) throws MorphException { if (start == null || start.length() == 0 || end == null || end.length() == 0) return null; if (!(end.charAt(0) == 'ㄴ' || end.charAt(0) == 'ㄹ' || end.charAt(0) == 'ㅂ' || end.charAt(0) == '오' || end.charAt(0) == '시')) return null; char convEnd = MorphUtil.makeChar(start.charAt(start.length() - 1), 8); start = start.substring(0, start.length() - 1) + convEnd; WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null) return new String[] { start, end }; return null; } /** * 러 불규칙 원형을 복원한다. (이르다->이르러, 푸르다->푸르러) * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreRUIrregular(String start, String end) throws MorphException { if (start.length() < 2) return null; char ch1 = start.charAt(start.length() - 1); char ch2 = start.charAt(start.length() - 2); char[] jasos1 = MorphUtil.decompose(ch1); char[] jasos2 = MorphUtil.decompose(ch2); if (jasos1[0] != 'ㄹ' || jasos2[0] != 'ㄹ') return null; ch2 = MorphUtil.makeChar(ch2, 0); if (start.length() > 2) start = start.substring(0, start.length() - 1); else start = Character.toString(ch2); WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null && entry.getFeature(WordEntry.IDX_REGURA) == IRR_TYPE_RU) return new String[] { start, end }; return null; } /** * ㅎ 탈락 원형을 복원한다. (까맣다-->까만,까매서) * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreHIrregular(String start, String end) throws MorphException { if (start == null || "".equals(start) || end == null || "".equals(end)) return null; char ch1 = end.charAt(0); char ch2 = start.charAt(start.length() - 1); char[] jasos1 = MorphUtil.decompose(ch1); char[] jasos2 = MorphUtil.decompose(ch2); if (jasos1.length == 1) { ch2 = MorphUtil.makeChar(ch2, 27); } else { if (jasos2.length != 2 || jasos2[1] != 'ㅐ') return null; ch2 = MorphUtil.makeChar(ch2, 0, 27); } if (start.length() > 1) start = start.substring(0, start.length() - 1) + ch2; else start = Character.toString(ch2); WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null && entry.getFeature(WordEntry.IDX_REGURA) == IRR_TYPE_HIOOT) return new String[] { start, end }; return null; } /** * 으 탈락 원형을 복원한다. (뜨다->더, 크다-커) * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ private static String[] restoreUIrregular(String start, String end) throws MorphException { if (start == null || "".equals(start)) return null; char ch = start.charAt(start.length() - 1); char[] jasos = MorphUtil.decompose(ch); if (!(jasos.length == 2 && jasos[1] == 'ㅓ')) return null; ch = MorphUtil.makeChar(ch, 18, 0); if (start.length() > 1) start = start.substring(0, start.length() - 1) + ch; else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if (entry != null) return new String[] { start, end }; return null; } }