/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.addthis.hydra.data.util;
import java.util.List;
import org.junit.Before;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
public class TestTokenizer {
private class Tester {
private String line;
private String[] tokens;
Tester(String line, String[] tokens) {
this.line = line;
this.tokens = tokens;
}
public void test(Tokenizer t) {
List<String> test = t.tokenize(line);
// System.out.println("line=("+line+") return="+test+":"+test.size()+" matchlen="+tokens.length);
assertTrue(test + " <> " + tokens.length, test.size() == tokens.length);
for (int i = 0; i < tokens.length; i++) {
String tt = test.get(i);
assertTrue(tt == tokens[i] || tt.equals(tokens[i]));
}
}
}
@Test
public void testTokenizer1() {
Tokenizer t = new Tokenizer().setSeparator(",;").setGrouping(new String[]{"'", "[]", "{}"}).setPacking(false);
new Tester("a1", new String[]{"a1"}).test(t);
new Tester("a1,", new String[]{"a1", ""}).test(t);
new Tester("a1,c1,", new String[]{"a1", "c1", ""}).test(t);
new Tester("a1,,c1", new String[]{"a1", "", "c1"}).test(t);
new Tester("a1,b1,c1", new String[]{"a1", "b1", "c1"}).test(t);
new Tester("'abc','def','g h i'", new String[]{"abc", "def", "g h i"}).test(t);
new Tester(",,", new String[]{"", "", ""}).test(t);
new Tester(";\\';abc def; ghi", new String[]{"", "'", "abc def", " ghi"}).test(t);
new Tester("[a b c],def,{g,[h] i},jkl", new String[]{"a b c", "def", "g,[h] i", "jkl"}).test(t);
t = new Tokenizer().setSeparator(",").setGrouping(new String[]{"'", "[]", "{}"}).setPacking(true);
new Tester("a1", new String[]{"a1"}).test(t);
new Tester("a1,", new String[]{"a1"}).test(t);
new Tester("a1,c1,", new String[]{"a1", "c1"}).test(t);
new Tester("a1,,c1", new String[]{"a1", "c1"}).test(t);
}
protected Tokenizer tokens;
protected List<String> line;
@Before
public void setUp() throws Exception {
tokens = new Tokenizer().setSeparator("|\t").setGrouping(new String[]{"[]", "\""}).setPacking(false);
}
@Test
public void testNull() throws Exception {
line = tokens.tokenize(null);
assertNull(line);
line = tokens.tokenize(" ");
assertNull(line);
}
@Test
public void testSuccess() throws Exception {
line = tokens.tokenize("a\tb\tc");
assertNotNull(line);
assertEquals(3, line.size());
assertEquals("a", line.get(0));
assertEquals("b", line.get(1));
assertEquals("c", line.get(2));
}
@Test
public void testEmptyFields() throws Exception {
line = tokens.tokenize("a\t\tc");
assertNotNull(line);
assertEquals(3, line.size());
assertEquals("a", line.get(0));
assertEquals("", line.get(1));
assertEquals("c", line.get(2));
}
@Test
public void testGroup1() throws Exception {
line = tokens.tokenize("a|\"b\tb\"|c");
assertNotNull(line);
assertEquals(3, line.size());
assertEquals("a", line.get(0));
assertEquals("b\tb", line.get(1));
assertEquals("c", line.get(2));
}
@Test
public void testGroup2() throws Exception {
line = tokens.tokenize("a|[b\tb]|c");
assertNotNull(line);
assertEquals(3, line.size());
assertEquals("a", line.get(0));
assertEquals("b\tb", line.get(1));
assertEquals("c", line.get(2));
}
@Test
public void testPack() throws Exception {
tokens = new Tokenizer().setSeparator("\t|").setGrouping(new String[]{"[]", "\""}).setPacking(true);
// tokens = new Tokenizer("\t", new String[] { "[]", "\"" }, true,
// Collections.singletonMap(" | ", "\t"));
line = tokens.tokenize("a||c");
assertNotNull(line);
assertEquals("line=" + line, 2, line.size());
assertEquals("a", line.get(0));
assertEquals("c", line.get(1));
}
}