package org.apache.lucene.analysis.kr.utils; /** * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import org.apache.lucene.analysis.kr.morph.MorphException; import org.apache.lucene.analysis.kr.morph.WordEntry; /** * * 동사의 불규칙 변형을 처리하는 Utility Class * * @author S.M.Lee * */ public class IrregularUtil { // ㅂ 불규칙 public static final char IRR_TYPE_BIUP = 'B'; // ㅎ 불규칙 public static final char IRR_TYPE_HIOOT = 'H'; // ㄹ 불규칙 public static final char IRR_TYPE_LIUL = 'U'; // 르 불규칙 public static final char IRR_TYPE_LOO = 'L'; // ㅅ 불규칙 public static final char IRR_TYPE_SIUT = 'S'; // ㄷ 불규칙 public static final char IRR_TYPE_DI = 'D'; // 러 불규칙 public static final char IRR_TYPE_RU = 'R'; // 으 탈락 public static final char IRR_TYPE_UI = 'X'; // 규칙형 public static final char IRR_TYPE_REGULAR = 'X'; public static String[] restoreIrregularVerb(String start, String end) throws MorphException { if(end==null) end=""; char[] jasos = new char[0]; if(end.length()>0) jasos = MorphUtil.decompose(end.charAt(0)); if(end.startsWith("ㄴ")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreHIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreELIrregular(start,end); if(irrs!=null) return irrs; }else if(end.startsWith("ㄹ")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreHIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreELIrregular(start,end); if(irrs!=null) return irrs; }else if(end.startsWith("ㅁ")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreHIrregular(start,end); if(irrs!=null) return irrs; }else if(end.startsWith("ㅂ")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreHIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreELIrregular(start,end); if(irrs!=null) return irrs; }else if(start.endsWith("우")||start.endsWith("오")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; }else if(end.startsWith("오")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; }else if(end.startsWith("시")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreELIrregular(start,end); if(irrs!=null) return irrs; }else if(end.startsWith("으")) { String[] irrs = restoreBIrregular(start,end); if(irrs!=null) return irrs; }else if(jasos.length>1&&jasos[0]=='ㅇ'&&(jasos[1]=='ㅓ'||jasos[1]=='ㅏ')) { String[] irrs = restoreDIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreSIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreLIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreHIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreUIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreRUIrregular(start,end); if(irrs!=null) return irrs; }else if(jasos.length>1&&jasos[0]=='ㅇ'&&jasos[1]=='ㅡ') { String[] irrs = restoreDIrregular(start,end); if(irrs!=null) return irrs; irrs = restoreSIrregular(start,end); if(irrs!=null) return irrs; }else if(("가".equals(start)&&"거라".equals(end))|| ("오".equals(start)&&"너라".equals(end))) { return new String[]{start,end}; } return null; } /** * ㅂ 불규칙 원형을 복원한다. (돕다, 곱다) * @param start * @param end * @return * @throws MorphException */ private static String[] restoreBIrregular(String start, String end) throws MorphException { if(start==null||"".equals(start)||end==null) return null; if(start.length()<2) return null; if(!(start.endsWith("오")||start.endsWith("우"))) return null; char convEnd = MorphUtil.makeChar(end.charAt(0), 0); if("ㅁ".equals(end)||"ㄴ".equals(end)||"ㄹ".equals(end)|| convEnd=='아'||convEnd=='어') { // 도우(돕), 고오(곱), 스러우(스럽) 등으로 변형되므로 반드시 2자 이상임 char ch = start.charAt(start.length()-2); ch = MorphUtil.makeChar(ch, 17); if(start.length()>2) start = Utilities.arrayToString(new String[]{start.substring(0,start.length()-2),Character.toString(ch)}); else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null&&entry.getFeature(WordEntry.IDX_REGURA)==IRR_TYPE_BIUP) return new String[]{start,end}; } return null; } /** * ㄷ 불규칙 원형을 복원한다. (깨닫다, 묻다) * @param start * @param end * @return * @throws MorphException */ private static String[] restoreDIrregular(String start, String end) throws MorphException { if(start==null||"".equals(start)) return null; char ch = start.charAt(start.length()-1); char[] jasos = MorphUtil.decompose(ch); if(jasos.length!=3||jasos[2]!='ㄹ') return null; ch = MorphUtil.makeChar(ch, 7); if(start.length()>1) start = Utilities.arrayToString(new String[]{start.substring(0,start.length()-1),Character.toString(ch)}); else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null&&entry.getFeature(WordEntry.IDX_REGURA)==IRR_TYPE_DI) return new String[]{start,end}; return null; } /** * ㅅ 불규칙 원형을 복원한다. (긋다--그어) * @param start * @param end * @return * @throws MorphException */ private static String[] restoreSIrregular(String start, String end) throws MorphException { if(start==null||"".equals(start)) return null; char ch = start.charAt(start.length()-1); char[] jasos = MorphUtil.decompose(ch); if(jasos.length!=2) return null; ch = MorphUtil.makeChar(ch, 19); if(start.length()>1) start = start.substring(0,start.length()-1)+ch; else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null&&entry.getFeature(WordEntry.IDX_REGURA)==IRR_TYPE_SIUT) return new String[]{start,end}; return null; } /** * 르 불규칙 원형을 복원한다. (흐르다-->흘러) * "따르다"는 ㄹ불규칙이 아니지만.. 인 것처럼 처리한다. * @param start * @param end * @return * @throws MorphException */ private static String[] restoreLIrregular(String start, String end) throws MorphException { if(start.length()<2) return null; char ch1 = start.charAt(start.length()-2); char ch2 = start.charAt(start.length()-1); char[] jasos1 = MorphUtil.decompose(ch1); if(((jasos1.length==3&&jasos1[2]=='ㄹ')||jasos1.length==2)&&(ch2=='러'||ch2=='라')) { StringBuffer sb = new StringBuffer(); ch1 = MorphUtil.makeChar(ch1, 0); if(start.length()>2) sb.append(start.substring(0,start.length()-2)).append(ch1).append("르"); else sb.append(Character.toString(ch1)).append("르"); WordEntry entry = DictionaryUtil.getVerb(sb.toString()); if(entry!=null&&entry.getFeature(WordEntry.IDX_REGURA)==IRR_TYPE_LOO) return new String[]{sb.toString(),end}; } return null; } /** * ㄹ불규칙 원형을 복원한다. (길다-->긴, 알다-->안) * 어간의 끝소리인 ‘ㄹ’이 ‘ㄴ’, ‘ㄹ’, ‘ㅂ’, ‘오’, ‘시’ 앞에서 탈락하는 활용의 형식 * @param start * @param end * @return * @throws MorphException */ private static String[] restoreELIrregular(String start, String end) throws MorphException { if(start==null || start.length()==0 || end==null||end.length()==0) return null; if(!(end.charAt(0)=='ㄴ'||end.charAt(0)=='ㄹ'||end.charAt(0)=='ㅂ'||end.charAt(0)=='오'||end.charAt(0)=='시')) return null; char convEnd = MorphUtil.makeChar(start.charAt(start.length()-1), 8); start = start.substring(0,start.length()-1)+convEnd; WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null) return new String[]{start,end}; return null; } /** * 러 불규칙 원형을 복원한다. (이르다->이르러, 푸르다->푸르러) * @param start * @param end * @return * @throws MorphException */ private static String[] restoreRUIrregular(String start, String end) throws MorphException { if(start.length()<2) return null; char ch1 = start.charAt(start.length()-1); char ch2 = start.charAt(start.length()-2); char[] jasos1 = MorphUtil.decompose(ch1); char[] jasos2 = MorphUtil.decompose(ch2); if(jasos1[0]!='ㄹ'||jasos2[0]!='ㄹ') return null; ch2 = MorphUtil.makeChar(ch2, 0); if(start.length()>2) start = start.substring(0,start.length()-1); else start = Character.toString(ch2); WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null&&entry.getFeature(WordEntry.IDX_REGURA)==IRR_TYPE_RU) return new String[]{start,end}; return null; } /** * ㅎ 탈락 원형을 복원한다. (까맣다-->까만,까매서) * @param start * @param end * @return * @throws MorphException */ private static String[] restoreHIrregular(String start, String end) throws MorphException { if(start==null||"".equals(start)||end==null||"".equals(end)) return null; char ch1 = end.charAt(0); char ch2 = start.charAt(start.length()-1); char[] jasos1 = MorphUtil.decompose(ch1); char[] jasos2 = MorphUtil.decompose(ch2); if(jasos1.length==1) { ch2 = MorphUtil.makeChar(ch2, 27); }else { if(jasos2.length!=2||jasos2[1]!='ㅐ') return null; ch2 = MorphUtil.makeChar(ch2, 0, 27); } if(start.length()>1) start = start.substring(0,start.length()-1)+ch2; else start = Character.toString(ch2); WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null&&entry.getFeature(WordEntry.IDX_REGURA)==IRR_TYPE_HIOOT) return new String[]{start,end}; return null; } /** * 으 탈락 원형을 복원한다. (뜨다->더, 크다-커) * @param start * @param end * @return * @throws MorphException */ private static String[] restoreUIrregular(String start, String end) throws MorphException { if(start==null||"".equals(start)) return null; char ch = start.charAt(start.length()-1); char[] jasos = MorphUtil.decompose(ch); if(!(jasos.length==2&&jasos[1]=='ㅓ')) return null; ch = MorphUtil.makeChar(ch, 18,0); if(start.length()>1) start = start.substring(0,start.length()-1)+ch; else start = Character.toString(ch); WordEntry entry = DictionaryUtil.getVerb(start); if(entry!=null) return new String[]{start,end}; return null; } }