/** * Copyright 2013-2014 Guoqiang Chen, Shanghai, China. All rights reserved. * * Email: subchen@gmail.com * URL: http://subchen.github.io/ * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jetbrick.text.pinyin; import java.io.*; import java.util.HashMap; import java.util.Map; import jetbrick.io.IoUtils; import jetbrick.lang.StringUtils; /** * 得到汉字的完整拼音 (支持GBK) * * @author Guoqiang Chen */ public class ChinesePinyin { private static ChinesePinyin instance = new ChinesePinyin(); private Map<String, String[]> pinyinTable = new HashMap<String, String[]>(21000); public static ChinesePinyin getInstance() { return instance; } private ChinesePinyin() { InputStream is = getClass().getResourceAsStream("ChinesePinyin.dat"); try { BufferedReader reader = new BufferedReader(new InputStreamReader(is, "ISO-8859-1")); String line = null; while (true) { line = reader.readLine(); if (line == null) break; String hex = StringUtils.substringBefore(line, "="); String pinyin = StringUtils.substringAfter(line, "="); if (StringUtils.isNotBlank(pinyin)) { pinyinTable.put(hex, pinyin.split(",")); } } } catch (Exception e) { throw new RuntimeException(e); } finally { IoUtils.closeQuietly(is); } } /** * 得到指定的中文字符对应的拼音, 返回值带声调. * * @return null - 不可识别的字符 */ public String[] getPinyinFromChar(char c) { String s = Integer.toHexString(c).toUpperCase(); return pinyinTable.get(s); } /** * 返回中文,删除不可识别的字符. */ public String getChinese(String str) { if (str == null) return null; StringBuffer sb = new StringBuffer(); for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); if (getPinyinFromChar(ch) != null) { sb.append(ch); } } return sb.toString(); } /** * 得到字符串对应的拼音(默认小写, 不带声调), 不可识别的字符原样返回. */ public String getFullPinyin(String str) { if (str == null) return null; StringBuffer sb = new StringBuffer(); String[] item = null; for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); item = getPinyinFromChar(ch); if (item == null) { sb.append(ch); } else { sb.append(item[0].substring(0, item[0].length() - 1)); } } return sb.toString(); } /** * 得到字符串对应的拼音首字母(默认小写), 不可识别的字符原样返回. */ public String getFirstPinyin(String str) { if (str == null) return null; StringBuffer sb = new StringBuffer(); String[] item = null; for (int i = 0; i < str.length(); i++) { char ch = str.charAt(i); item = getPinyinFromChar(ch); if (item == null) { sb.append(ch); } else { sb.append(item[0].substring(0, 1)); } } return sb.toString(); } }