/***************************************************************************
* Copyright 2010 Global Biodiversity Information Facility Secretariat
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
***************************************************************************/
package org.gbif.io;
import java.io.IOException;
import org.apache.commons.lang3.text.StrTokenizer;
import org.junit.Test;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNull;
public class StrTokenizerTest {
@Test
public void testCsvQuoted() throws IOException {
StrTokenizer tokenizer = new StrTokenizer();
tokenizer.setDelimiterString(",");
tokenizer.setQuoteChar('"');
tokenizer.setEmptyTokenAsNull(true);
tokenizer.setIgnoreEmptyTokens(false);
tokenizer.reset("121,432423, 9099053,\"Frieda karla L.,DC.\",Ahrens");
String[] columns = tokenizer.getTokenArray();
assertEquals("121", columns[0]);
assertEquals("432423", columns[1]);
assertEquals(" 9099053", columns[2]);
assertEquals("Frieda karla L.,DC.", columns[3]);
assertEquals("Ahrens", columns[4]);
tokenizer.reset(" ,4321");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertEquals("4321", columns[1]);
tokenizer.reset(" ,,,,zzz ");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
tokenizer.reset(",,,,zzz ");
columns = tokenizer.getTokenArray();
assertNull(columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
}
@Test
public void testCsvUnquoted() throws IOException {
StrTokenizer tokenizer = new StrTokenizer();
tokenizer.setDelimiterString(",");
tokenizer.setEmptyTokenAsNull(true);
tokenizer.setIgnoreEmptyTokens(false);
tokenizer.reset("121,432423, 9099053,Frieda karla L.,DC.,Ahrens");
String[] columns = tokenizer.getTokenArray();
assertEquals("121", columns[0]);
assertEquals("432423", columns[1]);
assertEquals(" 9099053", columns[2]);
assertEquals("Frieda karla L.", columns[3]);
assertEquals("DC.", columns[4]);
assertEquals("Ahrens", columns[5]);
tokenizer.reset(",,,,zzz ");
columns = tokenizer.getTokenArray();
assertNull(columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
}
@Test
public void testPipes() throws IOException {
StrTokenizer tokenizer = new StrTokenizer();
tokenizer.setDelimiterChar('|');
tokenizer.setQuoteChar('"');
tokenizer.setEmptyTokenAsNull(true);
tokenizer.setIgnoreEmptyTokens(false);
tokenizer.reset("121|432423| 9099053|\"Frieda karla L.|DC.\"|Ahrens");
String[] columns = tokenizer.getTokenArray();
assertEquals("121", columns[0]);
assertEquals("432423", columns[1]);
assertEquals(" 9099053", columns[2]);
assertEquals("Frieda karla L.|DC.", columns[3]);
assertEquals("Ahrens", columns[4]);
tokenizer.reset(" |4321");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertEquals("4321", columns[1]);
tokenizer.reset(" ||||zzz ");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
tokenizer.reset("||||zzz ");
columns = tokenizer.getTokenArray();
assertNull(columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
}
@Test
public void testTabQuoted() throws IOException {
StrTokenizer tokenizer = new StrTokenizer();
tokenizer.setDelimiterString("\t");
tokenizer.setQuoteChar('"');
tokenizer.setEmptyTokenAsNull(true);
tokenizer.setIgnoreEmptyTokens(false);
tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
String[] columns = tokenizer.getTokenArray();
assertEquals("121", columns[0]);
assertEquals("432423", columns[1]);
assertEquals(" 9099053", columns[2]);
assertEquals("Frieda karla L.,DC.", columns[3]);
assertEquals("Ahrens", columns[4]);
tokenizer.reset(" \t4321");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertEquals("4321", columns[1]);
tokenizer.reset(" \t\t\t\tzzz ");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
tokenizer.reset("\t\t\t\tzzz ");
columns = tokenizer.getTokenArray();
assertNull(columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
}
@Test
public void testTabUnquoted() throws IOException {
StrTokenizer tokenizer = new StrTokenizer();
tokenizer.setDelimiterString("\t");
tokenizer.setEmptyTokenAsNull(true);
tokenizer.setIgnoreEmptyTokens(false);
tokenizer.reset("121\t432423\t 9099053\t\"Frieda karla L.,DC.\"\tAhrens");
String[] columns = tokenizer.getTokenArray();
assertEquals("121", columns[0]);
assertEquals("432423", columns[1]);
assertEquals(" 9099053", columns[2]);
assertEquals("\"Frieda karla L.,DC.\"", columns[3]);
assertEquals("Ahrens", columns[4]);
tokenizer.reset(" \t4321");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertEquals("4321", columns[1]);
tokenizer.reset(" \t\t\t\tzzz ");
columns = tokenizer.getTokenArray();
assertEquals(" ", columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
tokenizer.reset("\t\t\t\tzzz ");
columns = tokenizer.getTokenArray();
assertNull(columns[0]);
assertNull(columns[1]);
assertNull(columns[2]);
assertNull(columns[3]);
assertEquals("zzz ", columns[4]);
}
}