/*-*
* Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. A copy of the
* License is distributed with this work in the LICENSE.md file. You may
* also obtain a copy of the License from
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.atilika.kuromoji.trie;
import org.junit.Test;
import java.util.*;
import static org.junit.Assert.*;
public class PatriciaTrieTest {
@Test
public void testRomaji() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put("a", "a");
trie.put("b", "b");
trie.put("ab", "ab");
trie.put("bac", "bac");
assertEquals("a", trie.get("a"));
assertEquals("bac", trie.get("bac"));
assertEquals("b", trie.get("b"));
assertEquals("ab", trie.get("ab"));
assertNull(trie.get("nonexistant"));
}
@Test
public void testJapanese() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put("寿司", "sushi");
trie.put("刺身", "sashimi");
assertEquals("sushi", trie.get("寿司"));
assertEquals("sashimi", trie.get("刺身"));
}
@Test(expected = NullPointerException.class)
public void testNull() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put("null", null);
assertEquals(null, trie.get("null"));
trie.put(null, "null"); // Throws NullPointerException
assertTrue(false);
}
@Test
public void testRandom() {
// Generate random strings
List<String> randoms = new ArrayList<>();
for (int i = 0; i < 100000; i++) {
randoms.add(UUID.randomUUID().toString());
}
// Insert them
PatriciaTrie<String> trie = new PatriciaTrie<>();
for (String random : randoms) {
trie.put(random, random);
}
// Get and test them
for (String random : randoms) {
assertEquals(random, trie.get(random));
assertTrue(trie.containsKey(random));
}
}
@Test
public void testPutAll() {
// Generate random strings
Map<String, String> randoms = new HashMap<>();
for (int i = 0; i < 10000; i++) {
String random = UUID.randomUUID().toString();
randoms.put(random, random);
}
// Insert them
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.putAll(randoms);
// Get and test them
for (Map.Entry<String, String> random : randoms.entrySet()) {
assertEquals(random.getValue(), trie.get(random.getKey()));
assertTrue(trie.containsKey(random.getKey()));
}
}
@Test
public void testLongString() {
String longMovieTitle = "マルキ・ド・サドの演出のもとにシャラントン精神病院患者たちによって演じられたジャン=ポール・マラーの迫害と暗殺";
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put(longMovieTitle, "found it");
assertEquals("found it", trie.get(longMovieTitle));
}
@Test(expected = ClassCastException.class)
public void testUnsupportedType() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put("hello", "world");
assertTrue(trie.containsKey("hello"));
trie.containsKey(new Integer(1));
assertTrue(false);
}
@Test
public void testEmpty() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
assertTrue(trie.isEmpty());
trie.put("hello", "world");
assertFalse(trie.isEmpty());
}
@Test
public void testEmptyInsert() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
assertTrue(trie.isEmpty());
trie.put("", "i am empty bottle of beer!");
assertFalse(trie.isEmpty());
assertEquals("i am empty bottle of beer!", trie.get(""));
trie.put("", "...and i'm an empty bottle of sake");
assertEquals("...and i'm an empty bottle of sake", trie.get(""));
}
@Test
public void testClear() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
assertTrue(trie.isEmpty());
assertEquals(0, trie.size());
trie.put("hello", "world");
trie.put("world", "hello");
assertFalse(trie.isEmpty());
trie.clear();
assertTrue(trie.isEmpty());
assertEquals(0, trie.size());
}
@Test
public void testNaiveCollections() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put("寿司", "sushi");
trie.put("刺身", "sashimi");
trie.put("そば", "soba");
trie.put("ラーメン", "ramen");
// Test keys
assertEquals(4, trie.keySet().size());
assertTrue(trie.keySet().containsAll(Arrays.asList(new String[] {"寿司", "そば", "ラーメン", "刺身"})));
// Test values
assertEquals(4, trie.values().size());
assertTrue(trie.values().containsAll(Arrays.asList(new String[] {"sushi", "soba", "ramen", "sashimi"})));
}
@Test
public void testEscapeChars() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
trie.put("new", "no error");
assertFalse(trie.containsKeyPrefix("new\na"));
assertFalse(trie.containsKeyPrefix("\n"));
assertFalse(trie.containsKeyPrefix("\t"));
}
@Test
public void testPrefix() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
String[] tokyoPlaces = new String[] {"Hachiōji", "Tachikawa", "Musashino", "Mitaka", "Ōme", "Fuchū", "Akishima",
"Chōfu", "Machida", "Koganei", "Kodaira", "Hino", "Higashimurayama", "Kokubunji", "Kunitachi",
"Fussa", "Komae", "Higashiyamato", "Kiyose", "Higashikurume", "Musashimurayama", "Tama",
"Inagi", "Hamura", "Akiruno", "Nishitōkyō"};
for (int i = 0; i < tokyoPlaces.length; i++) {
trie.put(tokyoPlaces[i], tokyoPlaces[i]);
}
// Prefixes of Kodaira
assertTrue(trie.containsKeyPrefix("K"));
assertTrue(trie.containsKeyPrefix("Ko"));
assertTrue(trie.containsKeyPrefix("Kod"));
assertTrue(trie.containsKeyPrefix("Koda"));
assertTrue(trie.containsKeyPrefix("Kodai"));
assertTrue(trie.containsKeyPrefix("Kodair"));
assertTrue(trie.containsKeyPrefix("Kodaira"));
assertFalse(trie.containsKeyPrefix("Kodaira "));
assertFalse(trie.containsKeyPrefix("Kodaira "));
assertTrue(trie.get("Kodaira") != null);
// Prefixes of Fussa
assertFalse(trie.containsKeyPrefix("fu"));
assertTrue(trie.containsKeyPrefix("Fu"));
assertTrue(trie.containsKeyPrefix("Fus"));
}
@Test
public void testTextScan() {
PatriciaTrie<String> trie = new PatriciaTrie<>();
String[] terms = new String[] {"お寿司", "sushi", "美味しい", "tasty", "日本", "japan", "だと思います", "i think", "料理",
"food", "日本料理", "japanese food", "一番", "first and foremost",};
for (int i = 0; i < terms.length; i += 2) {
trie.put(terms[i], terms[i + 1]);
}
String text = "日本料理の中で、一番美味しいのはお寿司だと思います。すぐ日本に帰りたいです。";
StringBuilder builder = new StringBuilder();
int startIndex = 0;
while (startIndex < text.length()) {
int matchLength = 0;
while (trie.containsKeyPrefix(text.substring(startIndex, startIndex + matchLength + 1))) {
matchLength++;
}
if (matchLength > 0) {
String match = text.substring(startIndex, startIndex + matchLength);
builder.append("[");
builder.append(match);
builder.append("|");
builder.append(trie.get(match));
builder.append("]");
startIndex += matchLength;
} else {
builder.append(text.charAt(startIndex));
startIndex++;
}
}
assertEquals("[日本料理|japanese food]の中で、[一番|first and foremost][美味しい|tasty]のは[お寿司|sushi][だと思います|i think]。すぐ[日本|japan]に帰りたいです。",
builder.toString());
}
@Test
public void testMultiThreadedTrie() throws InterruptedException {
final int numThreads = 10;
final int perThreadRuns = 500000;
final int keySetSize = 1000;
final List<Thread> threads = new ArrayList<>();
final List<String> randoms = new ArrayList<>();
final PatriciaTrie<Integer> trie = new PatriciaTrie<>();
for (int i = 0; i < keySetSize; i++) {
String random = UUID.randomUUID().toString();
randoms.add(random);
trie.put(random, i);
}
for (int i = 0; i < numThreads; i++) {
Thread thread = new Thread(new Runnable() {
@Override
public void run() {
for (int run = 0; run < perThreadRuns; run++) {
int randomIndex = (int) (Math.random() * randoms.size());
String random = randoms.get(randomIndex);
// Test retrieve
assertEquals(randomIndex, (int) trie.get(random));
int randomPrefixLength = (int) (Math.random() * random.length());
// Test random prefix length prefix match
assertTrue(trie.containsKeyPrefix(random.substring(0, randomPrefixLength)));
}
}
});
threads.add(thread);
thread.start();
}
for (Thread thread : threads) {
thread.join();
}
assertTrue(true);
}
@Test
public void testSimpleKey() {
PatriciaTrie.KeyMapper<String> keyMapper = new PatriciaTrie.StringKeyMapper();
String key = "abc";
// a = U+0061 = 0000 0000 0110 0001
assertFalse(keyMapper.isSet(0, key));
assertFalse(keyMapper.isSet(1, key));
assertFalse(keyMapper.isSet(2, key));
assertFalse(keyMapper.isSet(3, key));
assertFalse(keyMapper.isSet(4, key));
assertFalse(keyMapper.isSet(5, key));
assertFalse(keyMapper.isSet(6, key));
assertFalse(keyMapper.isSet(7, key));
assertFalse(keyMapper.isSet(8, key));
assertTrue(keyMapper.isSet(9, key));
assertTrue(keyMapper.isSet(10, key));
assertFalse(keyMapper.isSet(11, key));
assertFalse(keyMapper.isSet(12, key));
assertFalse(keyMapper.isSet(13, key));
assertFalse(keyMapper.isSet(14, key));
assertTrue(keyMapper.isSet(15, key));
// b = U+0062 = 0000 0000 0110 0010
assertFalse(keyMapper.isSet(16, key));
assertFalse(keyMapper.isSet(17, key));
assertFalse(keyMapper.isSet(18, key));
assertFalse(keyMapper.isSet(19, key));
assertFalse(keyMapper.isSet(20, key));
assertFalse(keyMapper.isSet(21, key));
assertFalse(keyMapper.isSet(22, key));
assertFalse(keyMapper.isSet(23, key));
assertFalse(keyMapper.isSet(24, key));
assertTrue(keyMapper.isSet(25, key));
assertTrue(keyMapper.isSet(26, key));
assertFalse(keyMapper.isSet(27, key));
assertFalse(keyMapper.isSet(28, key));
assertFalse(keyMapper.isSet(29, key));
assertTrue(keyMapper.isSet(30, key));
assertFalse(keyMapper.isSet(31, key));
// c = U+0063 = 0000 0000 0110 0011
assertFalse(keyMapper.isSet(32, key));
assertFalse(keyMapper.isSet(33, key));
assertFalse(keyMapper.isSet(34, key));
assertFalse(keyMapper.isSet(35, key));
assertFalse(keyMapper.isSet(36, key));
assertFalse(keyMapper.isSet(37, key));
assertFalse(keyMapper.isSet(38, key));
assertFalse(keyMapper.isSet(39, key));
assertFalse(keyMapper.isSet(40, key));
assertTrue(keyMapper.isSet(41, key));
assertTrue(keyMapper.isSet(42, key));
assertFalse(keyMapper.isSet(43, key));
assertFalse(keyMapper.isSet(44, key));
assertFalse(keyMapper.isSet(45, key));
assertTrue(keyMapper.isSet(46, key));
assertTrue(keyMapper.isSet(47, key));
}
@Test
public void testNullKeyMap() {
PatriciaTrie.KeyMapper<String> keyMapper = new PatriciaTrie.StringKeyMapper();
assertFalse(keyMapper.isSet(0, null));
assertFalse(keyMapper.isSet(100, null));
assertFalse(keyMapper.isSet(1000, null));
}
@Test
public void testEmptyKeyMap() {
PatriciaTrie.KeyMapper<String> keyMapper = new PatriciaTrie.StringKeyMapper();
// Note: this is a special case handled in PatriciaTrie
assertTrue(keyMapper.isSet(0, ""));
assertTrue(keyMapper.isSet(100, ""));
assertTrue(keyMapper.isSet(1000, ""));
}
@Test
public void testOverflowBit() {
PatriciaTrie.KeyMapper<String> keyMapper = new PatriciaTrie.StringKeyMapper();
String key = "a";
// a = U+0061 = 0000 0000 0110 0001
assertFalse(keyMapper.isSet(0, key));
assertFalse(keyMapper.isSet(1, key));
assertFalse(keyMapper.isSet(2, key));
assertFalse(keyMapper.isSet(3, key));
assertFalse(keyMapper.isSet(4, key));
assertFalse(keyMapper.isSet(5, key));
assertFalse(keyMapper.isSet(6, key));
assertFalse(keyMapper.isSet(7, key));
assertFalse(keyMapper.isSet(8, key));
assertTrue(keyMapper.isSet(9, key));
assertTrue(keyMapper.isSet(10, key));
assertFalse(keyMapper.isSet(11, key));
assertFalse(keyMapper.isSet(12, key));
assertFalse(keyMapper.isSet(13, key));
assertFalse(keyMapper.isSet(14, key));
assertTrue(keyMapper.isSet(15, key));
// Asking for overflow bits should return 1
assertTrue(keyMapper.isSet(16, key));
assertTrue(keyMapper.isSet(17, key));
assertTrue(keyMapper.isSet(100, key));
}
}