/* * Copyright 2011-2013 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.analysis.kr.utils; import com.google.common.base.Joiner; import com.google.common.collect.SetMultimap; import com.google.common.collect.TreeMultimap; import org.apache.commons.lang3.StringUtils; import org.apache.lucene.analysis.kr.morph.MorphException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; /** * 동의어 분석을 수행합니다. * * @author 배성혁 sunghyouk.bae@gmail.com * @since 13. 4. 27. 오전 12:31 */ public class SynonymUtil { private static final Logger log = LoggerFactory.getLogger(SynonymUtil.class); private static final boolean isTraceEnabled = log.isTraceEnabled(); private static final boolean isDebugEnabled = log.isDebugEnabled(); /** 동의어 사전 */ private static final SetMultimap<String, String> synonymMap = TreeMultimap.create(); private static final Set<String> EMPTY_SET = new HashSet<String>(); static { final String filename = KoreanEnv.getInstance().getValue(KoreanEnv.FILE_SYNONYM); log.info("동의어 사전에서 동의어 정보를 로드합니다... filename=[{}]", filename); List<String> lines = FileUtil.readLines(filename, "UTF-8"); log.info("동의어 사전을 빌드합니다..."); for (String line : lines) { String[] words = StringUtils.split(line, ","); if (words != null && words.length > 1) { synonymMap.putAll(words[0], Arrays.asList(words)); if (isTraceEnabled) log.trace("동의어를 추가합니다. words=[{}]", Joiner.on(",").join(words)); } } log.info("동의어 사전을 빌드했습니다. 라인수=[{}], 동의어수=[{}]", lines.size(), synonymMap.values().size()); } /** * 지정한 단어의 동의어가 있으면, 모든 동의어를 반환합니다. * * @throws org.apache.lucene.analysis.kr.morph.MorphException * */ public static Set<String> getSynonym(String word) throws MorphException { if (word == null || word.length() == 0) return new HashSet<String>(); word = word.toLowerCase(); if (isTraceEnabled) log.trace("동의어를 찾습니다... word=[{}]", word); if (synonymMap == null || synonymMap.size() == 0) return EMPTY_SET; for (String key : synonymMap.keySet()) { Set<String> synonyms = synonymMap.get(key); if (key.equalsIgnoreCase(word) || synonyms.contains(word)) { if (isTraceEnabled) log.trace("동의어를 찾았습니다. word=[{}], synonyms=[{}]", word, StringUtil.join(synonyms, ",")); return synonyms; } } if (isTraceEnabled) log.trace("동의어가 없습니다."); return EMPTY_SET; } }