/* * JEF - Copyright 2009-2010 Jiyi (mr.jiyi@gmail.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jef.tools.chinese; import java.io.BufferedReader; import java.io.File; import java.io.IOException; import java.lang.ref.SoftReference; import java.util.ArrayList; import jef.tools.IOUtils; import jef.tools.TextFileCallback; import jef.tools.TextFileCallback.Dealwith; import org.apache.commons.lang.StringUtils; /** * 繁简体转换工具 * <p> * 备注:从繁体转换到简体是可靠的,安全的。 * 但是简体字转换到繁体是不推荐的做法。因为简体字将很多繁体字合并了, * 反向转换时不可能根据上下文的含义来决定使用哪个字,因此可能会转换到别字上 * <p> * Usage:<pre> * CFJUtil.getInstance().fan2jan("中華民國"); //返回 "中华民国" * </pre> * @author Jiyi * */ public class CFJUtil { private static CFJUtil instance; private SoftReference<Mapping[]> jan2fanMapping; private SoftReference<Mapping[]> fan2janMapping; /** * 获得繁简转换工具的实例 * @return 繁简转换工具 */ public static CFJUtil getInstance() { if (instance == null) { try { instance = new CFJUtil(); } catch (IOException e) { throw new RuntimeException(e.getMessage()); } } return instance; } private CFJUtil() throws IOException { checkAndInit(); } private void checkAndInit() { if (jan2fanMapping == null || jan2fanMapping.get() == null) { jan2fanMapping = new SoftReference<Mapping[]>(loadMapping( "jf_map_utf8.properties", 2700)); } if (fan2janMapping == null || fan2janMapping.get() == null) { fan2janMapping = new SoftReference<Mapping[]>(loadMapping( "fj_map_utf8.properties", 3180)); } } static class Mapping { int jId; int fId; public Mapping(int jChar, int fChar) { this.jId = jChar; this.fId = fChar; } } /** * 将文字繁体转为简体 * @param input * @return String */ public String fan2jan(String input) { if (input == null) return null; checkAndInit(); Mapping[] a=fan2janMapping.get(); int len = input.length(); char[] result = new char[len]; for (int n = 0; n < len; n++) { int ch=input.charAt(n); int index=f2jSearch(a,ch); if(index<0){ result[n]=(char)ch; }else{ result[n]=(char)a[index].jId; } } return new String(result); } /** * 简体转为繁体 * @param input 简体 * @return 繁体 */ public String jan2fan(String input) { if (input == null) return null; checkAndInit(); Mapping[] a=jan2fanMapping.get(); int len = input.length(); char[] result = new char[len]; for (int n = 0; n < len; n++) { int ch=input.charAt(n); int index=j2fSearch(a,ch); if(index<0){ result[n]=(char)ch; }else{ result[n]=(char)a[index].fId; } } return new String(result); } private static int j2fSearch(Mapping[] a, int key) { int low = 0; int high = a.length - 1; while (low <= high) { int mid = (low + high) >>> 1; Mapping midVal = a[mid]; if (midVal.jId < key) low = mid + 1; else if (midVal.jId > key) high = mid - 1; else return mid; // key found } return -(low + 1); // key not found. } private static int f2jSearch(Mapping[] a, int key) { int low = 0; int high = a.length - 1; while (low <= high) { int mid = (low + high) >>> 1; Mapping midVal = a[mid]; if (midVal.fId < key) low = mid + 1; else if (midVal.fId > key) high = mid - 1; else return mid; // key found } return -(low + 1); // key not found. } private static Mapping[] loadMapping(String fileName, int size) { String line = null; BufferedReader br = null; int num = 0; try { br = IOUtils.getReader(CFJUtil.class, fileName, "UTF-8"); ArrayList<Mapping> list = new ArrayList<Mapping>(size); while ((line = br.readLine()) != null) { if (line.startsWith("#") || StringUtils.isBlank(line)) { continue; } char fChar = line.charAt(0); char jChar = line.charAt(2); list.add(new Mapping(jChar, fChar)); num++; } return list.toArray(new Mapping[list.size()]); } catch (IOException e) { throw new RuntimeException(e.getMessage()); } finally { IOUtils.closeQuietly(br); } } /** * 繁体文件转为简体 * @param from 输入文件 * @param fromCharset 文件编码 * @param to 输出文件 * @param toCharset 输出编码 * @throws IOException */ public void fan2Jan(File from, String fromCharset, final File to, String toCharset) throws IOException { IOUtils.processFile(from, new TextFileCallback(fromCharset,toCharset,Dealwith.NONE) { @Override protected String processLine(String line) { return fan2jan(line); } @Override protected File getTarget(File source) { return to; } }); } }