/*-*
* Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. A copy of the
* License is distributed with this work in the LICENSE.md file. You may
* also obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.atilika.kuromoji.dict;
import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.List;
import static org.junit.Assert.assertEquals;
public class UserDictionaryTest {
@Test
public void testLookup() throws IOException {
UserDictionary dictionary = new UserDictionary(getResource("userdict.txt"), 9, 7, 0);
List<UserDictionary.UserDictionaryMatch> matches = dictionary.findUserDictionaryMatches("関西国際空港に行った");
// Length should be three 関西, 国際, 空港
assertEquals(3, matches.size());
// Test positions
assertEquals(0, matches.get(0).getMatchStartIndex()); // index of 関西
assertEquals(2, matches.get(1).getMatchStartIndex()); // index of 国際
assertEquals(4, matches.get(2).getMatchStartIndex()); // index of 空港
// Test lengths
assertEquals(2, matches.get(0).getMatchLength()); // length of 関西
assertEquals(2, matches.get(1).getMatchLength()); // length of 国際
assertEquals(2, matches.get(2).getMatchLength()); // length of 空港
List<UserDictionary.UserDictionaryMatch> matches2 = dictionary.findUserDictionaryMatches("関西国際空港と関西国際空港に行った");
assertEquals(6, matches2.size());
}
@Test
public void testIpadicFeatures() throws IOException {
UserDictionary dictionary = new UserDictionary(getResource("userdict.txt"), 9, 7, 0);
assertEquals("カスタム名詞,*,*,*,*,*,*,ニホン,*", dictionary.getAllFeatures(100000000));
}
@Test
public void testJumanDicFeatures() throws IOException {
UserDictionary dictionary = new UserDictionary(getResource("userdict.txt"), 7, 5, 0);
assertEquals("カスタム名詞,*,*,*,*,ニホン,*", dictionary.getAllFeatures(100000000));
}
@Test
public void testNaistJDicFeatures() throws IOException {
UserDictionary dictionary = new UserDictionary(getResource("userdict.txt"), 11, 7, 0);
// This is a sample naist-jdic entry:
//
// 葦登,1358,1358,4975,名詞,一般,*,*,*,*,葦登,ヨシノボリ,ヨシノボリ,,
//
// How should we treat the last features in the user dictionary? They seem empty, but we return * for them...
assertEquals("カスタム名詞,*,*,*,*,*,*,ニホン,*,*,*", dictionary.getAllFeatures(100000000));
}
@Test
public void testUniDicFeatures() throws IOException {
UserDictionary dictionary = new UserDictionary(getResource("userdict.txt"), 13, 7, 0);
assertEquals("カスタム名詞,*,*,*,*,*,*,ニホン,*,*,*,*,*", dictionary.getAllFeatures(100000000));
}
@Test
public void testUniDicExtendedFeatures() throws IOException {
UserDictionary dictionary = new UserDictionary(getResource("userdict.txt"), 22, 13, 0);
assertEquals("カスタム名詞,*,*,*,*,*,*,*,*,*,*,*,*,ニホン,*,*,*,*,*,*,*,*", dictionary.getAllFeatures(100000000));
}
@Test
public void testUserDictionaryEntries() throws IOException {
String userDictionaryEntry = "クロ,クロ,クロ,カスタム名詞";
UserDictionary dictionary = new UserDictionary(
new ByteArrayInputStream(userDictionaryEntry.getBytes(StandardCharsets.UTF_8)), 9, 7, 0);
List<UserDictionary.UserDictionaryMatch> matches = dictionary.findUserDictionaryMatches("この丘はアクロポリスと呼ばれている");
assertEquals(1, matches.size());
assertEquals(5, matches.get(0).getMatchStartIndex());
}
@Test
public void testOverlappingUserDictionaryEntries() throws IOException {
String userDictionaryEntries = "" + "クロ,クロ,クロ,カスタム名詞\n" + "アクロ,アクロ,アクロ,カスタム名詞";
UserDictionary dictionary = new UserDictionary(
new ByteArrayInputStream(userDictionaryEntries.getBytes(StandardCharsets.UTF_8)), 9, 7, 0);
List<UserDictionary.UserDictionaryMatch> positions = dictionary.findUserDictionaryMatches("この丘はアクロポリスと呼ばれている");
assertEquals(4, positions.get(0).getMatchStartIndex());
assertEquals(2, positions.size());
}
private InputStream getResource(String resource) {
return this.getClass().getClassLoader().getResourceAsStream(resource);
}
}