/** * Copyright (c)2010-2011 Enterprise Website Content Management System(EWCMS), All rights reserved. * EWCMS PROPRIETARY/CONFIDENTIAL. Use is subject to license terms. * http://www.ewcms.com */ package com.ewcms.content.document.util.analyzer.help; /** * <ul> * 字符集识别辅助工具类 * </ul> * * @author 吴智俊 */ public class CharacterHelper { public static boolean isSpaceLetter(char input){ return input == 8 || input == 9 || input == 10 || input == 13 || input == 32 || input == 160; } public static boolean isEnglishLetter(char input){ return (input >= 'a' && input <= 'z') || (input >= 'A' && input <= 'Z'); } public static boolean isArabicNumber(char input){ return input >= '0' && input <= '9'; } public static boolean isCJKCharacter(char input){ Character.UnicodeBlock ub = Character.UnicodeBlock.of(input); if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A //全角数字字符和日韩字符 || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS //韩文字符集 || ub == Character.UnicodeBlock.HANGUL_SYLLABLES || ub == Character.UnicodeBlock.HANGUL_JAMO || ub == Character.UnicodeBlock.HANGUL_COMPATIBILITY_JAMO //日文字符集 || ub == Character.UnicodeBlock.HIRAGANA //平假名 || ub == Character.UnicodeBlock.KATAKANA //片假名 || ub == Character.UnicodeBlock.KATAKANA_PHONETIC_EXTENSIONS ) { return true; }else{ return false; } //其他的CJK标点符号,可以不做处理 //|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION //|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION } /** * 进行字符规格化(全角转半角,大写转小写处理) * @param input * @return char */ public static char regularize(char input){ if (input == 12288) { input = (char) 32; }else if (input > 65280 && input < 65375) { input = (char) (input - 65248); }else if (input >= 'A' && input <= 'Z') { input += 32; } return input; } }