/*-* * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md) * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. A copy of the * License is distributed with this work in the LICENSE.md file. You may * also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.atilika.kuromoji.compile; import com.atilika.kuromoji.dict.CharacterDefinitions; import com.atilika.kuromoji.io.IntegerArrayIO; import com.atilika.kuromoji.io.StringArrayIO; import org.junit.Before; import org.junit.Test; import java.io.*; import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; import static org.junit.Assert.*; public class CharacterDefinitionsCompilerTest { private File charDef; private Map<Integer, String> categoryIdMap; private CharacterDefinitions characterDefinition; @Before public void setUp() throws IOException { charDef = File.createTempFile("kuromoji-chardef-", ".bin"); charDef.deleteOnExit(); CharacterDefinitionsCompiler compiler = new CharacterDefinitionsCompiler(new BufferedOutputStream(new FileOutputStream(charDef))); compiler.readCharacterDefinition(new BufferedInputStream( CharacterDefinitionsCompilerTest.class.getClassLoader().getResourceAsStream("char.def")), "euc-jp"); categoryIdMap = invert(compiler.makeCharacterCategoryMap()); compiler.compile(); InputStream input = new BufferedInputStream(new FileInputStream(charDef)); int[][] definitions = IntegerArrayIO.readSparseArray2D(input); int[][] mappings = IntegerArrayIO.readSparseArray2D(input); String[] symbols = StringArrayIO.readArray(input); characterDefinition = new CharacterDefinitions(definitions, mappings, symbols); } @Test public void testCharacterCategories() throws IOException { // Non-defined characters get the default definition assertCharacterCategories(characterDefinition, '\u0000', "DEFAULT"); assertCharacterCategories(characterDefinition, '〇', "SYMBOL", "KANJI", "KANJINUMERIC"); assertCharacterCategories(characterDefinition, ' ', "SPACE"); assertCharacterCategories(characterDefinition, '。', "SYMBOL"); assertCharacterCategories(characterDefinition, 'A', "ALPHA"); assertCharacterCategories(characterDefinition, 'A', "ALPHA"); } @Test public void testAddCategoryDefinitions() { assertCharacterCategories(characterDefinition, '・', "KATAKANA"); characterDefinition.setCategories('・', new String[] {"SYMBOL", "KATAKANA"}); assertCharacterCategories(characterDefinition, '・', "KATAKANA", "SYMBOL"); assertCharacterCategories(characterDefinition, '・', "SYMBOL", "KATAKANA"); } public void assertCharacterCategories(CharacterDefinitions characterDefinition, char c, String... categories) { int[] categoryIds = characterDefinition.lookupCategories(c); if (categoryIds == null) { assertNull(categories); return; } assertEquals(categories.length, categoryIds.length); List<String> categoryList = Arrays.asList(categories); for (int categoryId : categoryIds) { String category = categoryIdMap.get(categoryId); assertTrue(categoryList.contains(category)); } } private static Map<Integer, String> invert(Map<String, Integer> map) { Map<Integer, String> inverted = new HashMap<>(); for (String key : map.keySet()) { inverted.put(map.get(key), key); } return inverted; } }