/*-* * Copyright © 2010-2015 Atilika Inc. and contributors (see CONTRIBUTORS.md) * * Licensed under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. A copy of the * License is distributed with this work in the LICENSE.md file. You may * also obtain a copy of the License from * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.atilika.kuromoji.util; import org.junit.Test; import java.util.Arrays; import static org.junit.Assert.assertArrayEquals; import static org.junit.Assert.assertEquals; public class DictionaryEntryLineParserTest { private DictionaryEntryLineParser parser = new DictionaryEntryLineParser(); @Test public void testTrivial() { assertArrayEquals(new String[] {"日本経済新聞", "日本 経済 新聞", "ニホン ケイザイ シンブン", "カスタム名詞"}, parser.parseLine("日本経済新聞,日本 経済 新聞,ニホン ケイザイ シンブン,カスタム名詞")); } @Test public void testQuotes() { assertArrayEquals( new String[] {"Java Platform, Standard Edition", "Java Platform, Standard Edition", "Java Platform, Standard Edition", "カスタム名詞"}, parser.parseLine( "\"Java Platform, Standard Edition\",\"Java Platform, Standard Edition\",\"Java Platform, Standard Edition\",カスタム名詞")); } @Test public void testQuotedQuotes() { assertArrayEquals(new String[] {"Java \"Platform\"", "Java \"Platform\"", "Java \"Platform\"", "カスタム名詞"}, parser .parseLine("\"Java \"\"Platform\"\"\",\"Java \"\"Platform\"\"\",\"Java \"\"Platform\"\"\",カスタム名詞")); } @Test public void testEmptyQuotedQuotes() { assertArrayEquals(new String[] {"\"", "\"", "quote", "punctuation"}, parser.parseLine("\"\"\"\",\"\"\"\",quote,punctuation")); } @Test public void testCSharp() { assertArrayEquals(new String[] {"C#", "C #", "シーシャープ", "プログラミング言語"}, parser.parseLine("\"C#\",\"C #\",シーシャープ,プログラミング言語")); } @Test public void testTab() { assertArrayEquals(new String[] {"A\tB", "A B", "A B", "tab"}, parser.parseLine("A\tB,A B,A B,tab")); } @Test public void testFrancoisWhiteBuffaloBota() { assertArrayEquals( new String[] {"フランソワ\"ザホワイトバッファロー\"ボタ", "フランソワ\"ザホワイトバッファロー\"ボタ", "フランソワ\"ザホワイトバッファロー\"ボタ", "名詞"}, parser.parseLine( "\"フランソワ\"\"ザホワイトバッファロー\"\"ボタ\",\"フランソワ\"\"ザホワイトバッファロー\"\"ボタ\",\"フランソワ\"\"ザホワイトバッファロー\"\"ボタ\",名詞")); } @Test(expected = RuntimeException.class) public void testSingleQuote() { parser.parseLine("this is an entry with \"unmatched quote"); } @Test(expected = RuntimeException.class) public void testUnmatchedQuote() { parser.parseLine("this is an entry with \"\"\"unmatched quote"); } @Test public void testEscapeRoundTrip() { String original = "3,\"14"; assertEquals("\"3,\"\"14\"", DictionaryEntryLineParser.escape(original)); assertEquals(original, DictionaryEntryLineParser.unescape(DictionaryEntryLineParser.escape(original))); } @Test public void testUnescape() { assertEquals("A", DictionaryEntryLineParser.unescape("\"A\"")); assertEquals("\"A\"", DictionaryEntryLineParser.unescape("\"\"\"A\"\"\"")); assertEquals("\"", DictionaryEntryLineParser.unescape("\"\"\"\"")); assertEquals("\"\"", DictionaryEntryLineParser.unescape("\"\"\"\"\"\"")); assertEquals("\"\"\"", DictionaryEntryLineParser.unescape("\"\"\"\"\"\"\"\"")); assertEquals("\"\"\"\"\"", DictionaryEntryLineParser.unescape("\"\"\"\"\"\"\"\"\"\"\"\"")); } // TODO: these tests should be checked, right now they are documenting what is happening. @Test public void testParseInputString() throws Exception { String input = "日本経済新聞,1292,1292,4980,名詞,固有名詞,組織,*,*,*,日本経済新聞,ニホンケイザイシンブン,ニホンケイザイシンブン"; String expected = Arrays.deepToString(new String[] {"日本経済新聞", "1292", "1292", "4980", "名詞", "固有名詞", "組織", "*", "*", "*", "日本経済新聞", "ニホンケイザイシンブン", "ニホンケイザイシンブン"}); assertEquals(expected, given(input)); } @Test public void testParseInputStringWithQuotes() throws Exception { String input = "日本経済新聞,1292,1292,4980,名詞,固有名詞,組織,*,*,\"1,0\",日本経済新聞,ニホンケイザイシンブン,ニホンケイザイシンブン"; String expected = Arrays.deepToString(new String[] {"日本経済新聞", "1292", "1292", "4980", "名詞", "固有名詞", "組織", "*", "*", "1,0", "日本経済新聞", "ニホンケイザイシンブン", "ニホンケイザイシンブン"}); assertEquals(expected, given(input)); } @Test public void testQuoteEscape() throws Exception { String input = "日本経済新聞,1292,1292,4980,名詞,固有名詞,組織,*,*,\"1,0\",日本経済新聞,ニホンケイザイシンブン,ニホンケイザイシンブン"; String expected = "\"日本経済新聞,1292,1292,4980,名詞,固有名詞,組織,*,*,\"\"1,0\"\",日本経済新聞,ニホンケイザイシンブン,ニホンケイザイシンブン\""; assertEquals(expected, parser.escape(input)); } private String given(String input) { return Arrays.deepToString(parser.parseLine(input)); } }