/** * Copyright © 2010-2012 Atilika Inc. All rights reserved. * * Atilika Inc. licenses this file to you under the Apache License, Version * 2.0 (the "License"); you may not use this file except in compliance with * the License. A copy of the License is distributed with this work in the * LICENSE.txt file. You may also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the * License for the specific language governing permissions and limitations * under the License. */ package org.atilika.kuromoji.dict; import java.io.Serializable; import java.util.EnumMap; /** * どの文字がどのカテゴリ(アルファベット、漢字、etc)であるかを記憶している? * * @author Masaru Hasegawa * @author Christian Moen */ public final class CharacterDefinition implements Serializable { private static final long serialVersionUID = -1436753619176638532L; private final CharacterClass[] characterCategoryMap = new CharacterClass[65536]; private final EnumMap<CharacterClass, int[]> invokeDefinitionMap = new EnumMap<CharacterClass, int[]>(CharacterClass.class); // invoke, group, length public enum CharacterClass { NGRAM, DEFAULT, SPACE, SYMBOL, NUMERIC, ALPHA, CYRILLIC, GREEK, HIRAGANA, KATAKANA, KANJI, KANJINUMERIC; /** * 列挙定数の序数 (列挙宣言での位置。 初期定数はゼロの序数に割り当てられる) を返します。(ordinal()の説明) * * @return */ public int getId() { return ordinal(); } } /** * Constructor */ public CharacterDefinition() { for (int i = 0; i < characterCategoryMap.length; i++) { characterCategoryMap[i] = CharacterClass.DEFAULT; } } /** * 与えられた文字のカテゴリを数値で返す * * @param c * @return */ public int lookup(char c) { return characterCategoryMap[c].getId(); } /** * 与えられた文字のカテゴリを返す * * @param c * @return */ public CharacterClass getCharacterClass(char c) { return characterCategoryMap[c]; } public boolean isInvoke(char c) { CharacterClass characterClass = characterCategoryMap[c]; int[] invokeDefinition = invokeDefinitionMap.get(characterClass); return invokeDefinition[0] == 1; } public boolean isGroup(char c) { CharacterClass characterClass = characterCategoryMap[c]; int[] invokeDefinition = invokeDefinitionMap.get(characterClass); return invokeDefinition[1] == 1; } /** * 漢字であるか否かを返す * * @param c * @return */ public boolean isKanji(char c) { return characterCategoryMap[c] == CharacterClass.KANJI || characterCategoryMap[c] == CharacterClass.KANJINUMERIC; } /** * Put mapping from unicode code point to character class. * * @param codePoint * code point * @param class character class name */ public void putCharacterCategory(int codePoint, String characterClassName) { characterClassName = characterClassName.split(" ")[0]; // use first // category // class // Override Nakaguro if (codePoint == 0x30FB) { characterClassName = "SYMBOL"; } characterCategoryMap[codePoint] = CharacterClass.valueOf(characterClassName); } public void putInvokeDefinition(String characterClassName, int invoke, int group, int length) { CharacterClass characterClass = CharacterClass .valueOf(characterClassName); int[] values = { invoke, group, length }; invokeDefinitionMap.put(characterClass, values); } }