/* * Copyright 2006-2014 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.springframework.batch.item.file.transform; import org.junit.Test; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class DelimitedLineTokenizerTests { private static final String TOKEN_MATCHES = "token equals the expected string"; private DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(); @Test public void testTokenizeRegularUse() { FieldSet tokens = tokenizer.tokenize("sfd,\"Well,I have no idea what to do in the afternoon\",sFj, asdf,,as\n"); assertEquals(6, tokens.getFieldCount()); assertTrue(TOKEN_MATCHES, tokens.readString(0).equals("sfd")); assertTrue(TOKEN_MATCHES, tokens.readString(1).equals("Well,I have no idea what to do in the afternoon")); assertTrue(TOKEN_MATCHES, tokens.readString(2).equals("sFj")); assertTrue(TOKEN_MATCHES, tokens.readString(3).equals("asdf")); assertTrue(TOKEN_MATCHES, tokens.readString(4).equals("")); assertTrue(TOKEN_MATCHES, tokens.readString(5).equals("as")); tokens = tokenizer.tokenize("First string,"); assertEquals(2, tokens.getFieldCount()); assertTrue(TOKEN_MATCHES, tokens.readString(0).equals("First string")); assertTrue(TOKEN_MATCHES, tokens.readString(1).equals("")); } @Test public void testInvalidConstructorArgument() { try { new DelimitedLineTokenizer(String.valueOf(DelimitedLineTokenizer.DEFAULT_QUOTE_CHARACTER)); fail("Quote character can't be used as delimiter for delimited line tokenizer!"); } catch (Exception e) { assertTrue(true); } } @Test public void testDelimitedLineTokenizer() { FieldSet line = tokenizer.tokenize("a,b,c"); assertEquals(3, line.getFieldCount()); } @Test public void testNames() { tokenizer.setNames(new String[] {"A", "B", "C"}); FieldSet line = tokenizer.tokenize("a,b,c"); assertEquals(3, line.getFieldCount()); assertEquals("a", line.readString("A")); } @Test public void testTooFewNames() { tokenizer.setNames(new String[] {"A", "B"}); try { tokenizer.tokenize("a,b,c"); fail("Expected IncorrectTokenCountException"); } catch (IncorrectTokenCountException e) { assertEquals(2, e.getExpectedCount()); assertEquals(3, e.getActualCount()); assertEquals("a,b,c", e.getInput()); } } @Test public void testTooFewNamesNotStrict() { tokenizer.setNames(new String[] {"A", "B"}); tokenizer.setStrict(false); FieldSet tokens = tokenizer.tokenize("a,b,c"); assertTrue(TOKEN_MATCHES, tokens.readString(0).equals("a")); assertTrue(TOKEN_MATCHES, tokens.readString(1).equals("b")); } @Test public void testTooManyNames() { tokenizer.setNames(new String[] {"A", "B", "C", "D"}); try{ tokenizer.tokenize("a,b,c"); } catch(IncorrectTokenCountException e){ assertEquals(4, e.getExpectedCount()); assertEquals(3, e.getActualCount()); assertEquals("a,b,c", e.getInput()); } } @Test public void testTooManyNamesNotStrict() { tokenizer.setNames(new String[] {"A", "B", "C", "D","E"}); tokenizer.setStrict( false ); FieldSet tokens = tokenizer.tokenize("a,b,c"); assertTrue(TOKEN_MATCHES, tokens.readString(0).equals("a")); assertTrue(TOKEN_MATCHES, tokens.readString(1).equals("b")); assertTrue(TOKEN_MATCHES, tokens.readString(2).equals("c")); assertTrue(TOKEN_MATCHES, tokens.readString(3).equals("")); assertTrue(TOKEN_MATCHES, tokens.readString(4).equals("")); } @Test public void testDelimitedLineTokenizerChar() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(" "); FieldSet line = tokenizer.tokenize("a b c"); assertEquals(3, line.getFieldCount()); } @Test(expected=IllegalArgumentException.class) public void testDelimitedLineTokenizerNullDelimiter() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(null); tokenizer.tokenize("a b c"); } @Test(expected=IllegalArgumentException.class) public void testDelimitedLineTokenizerEmptyString() throws Exception { DelimitedLineTokenizer tokenizer = new DelimitedLineTokenizer(""); tokenizer.afterPropertiesSet(); tokenizer.tokenize("a b c"); } @Test public void testDelimitedLineTokenizerString() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(" b "); FieldSet line = tokenizer.tokenize("a b c"); assertEquals(2, line.getFieldCount()); assertEquals("a", line.readString(0)); assertEquals("c", line.readString(1)); } @Test public void testDelimitedLineTokenizerStringBeginningOfLine() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(" | "); FieldSet line = tokenizer.tokenize(" | a | b"); assertEquals(3, line.getFieldCount()); assertEquals("", line.readString(0)); assertEquals("a", line.readString(1)); assertEquals("b", line.readString(2)); } @Test public void testDelimitedLineTokenizerStringEndOfLine() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(" | "); FieldSet line = tokenizer.tokenize("a | b | "); assertEquals(3, line.getFieldCount()); assertEquals("a", line.readString(0)); assertEquals("b", line.readString(1)); assertEquals("", line.readString(2)); } @Test public void testDelimitedLineTokenizerStringsOverlap() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(" | "); FieldSet line = tokenizer.tokenize("a | | | b"); assertEquals(3, line.getFieldCount()); assertEquals("a", line.readString(0)); assertEquals("|", line.readString(1)); assertEquals("b", line.readString(2)); } @Test public void testDelimitedLineTokenizerStringsOverlapWithoutSeparation() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer(" | "); FieldSet line = tokenizer.tokenize("a | | b"); assertEquals(2, line.getFieldCount()); assertEquals("a", line.readString(0)); assertEquals("| b", line.readString(1)); } @Test public void testDelimitedLineTokenizerNewlineToken() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer("\n"); FieldSet line = tokenizer.tokenize("a b\n c"); assertEquals(2, line.getFieldCount()); assertEquals("a b", line.readString(0)); assertEquals("c", line.readString(1)); } @Test public void testDelimitedLineTokenizerWrappedToken() { AbstractLineTokenizer tokenizer = new DelimitedLineTokenizer("\nrap"); FieldSet line = tokenizer.tokenize("a b\nrap c"); assertEquals(2, line.getFieldCount()); assertEquals("a b", line.readString(0)); assertEquals("c", line.readString(1)); } @Test public void testTokenizeWithQuotes() { FieldSet line = tokenizer.tokenize("a,b,\"c\""); assertEquals(3, line.getFieldCount()); assertEquals("c", line.readString(2)); } @Test public void testTokenizeWithNotDefaultQuotes() { tokenizer.setQuoteCharacter('\''); FieldSet line = tokenizer.tokenize("a,b,'c'"); assertEquals(3, line.getFieldCount()); assertEquals("c", line.readString(2)); } @Test public void testTokenizeWithEscapedQuotes() { FieldSet line = tokenizer.tokenize("a,\"\"b,\"\"\"c\""); assertEquals(3, line.getFieldCount()); assertEquals("\"\"b", line.readString(1)); assertEquals("\"c", line.readString(2)); } @Test public void testTokenizeWithUnclosedQuotes() { tokenizer.setQuoteCharacter('\''); FieldSet line = tokenizer.tokenize("a,\"b,c"); assertEquals(3, line.getFieldCount()); assertEquals("\"b", line.readString(1)); assertEquals("c", line.readString(2)); } @Test public void testTokenizeWithSpaceInField() { FieldSet line = tokenizer.tokenize("a,b ,c"); assertEquals(3, line.getFieldCount()); assertEquals("b ", line.readRawString(1)); } @Test public void testTokenizeWithSpaceAtEnd() { FieldSet line = tokenizer.tokenize("a,b,c "); assertEquals(3, line.getFieldCount()); assertEquals("c ", line.readRawString(2)); } @Test public void testTokenizeWithQuoteAndSpaceAtEnd() { FieldSet line = tokenizer.tokenize("a,b,\"c\" "); assertEquals(3, line.getFieldCount()); assertEquals("c", line.readString(2)); } @Test public void testTokenizeWithQuoteAndSpaceBeforeDelimiter() { FieldSet line = tokenizer.tokenize("a,\"b\" ,c"); assertEquals(3, line.getFieldCount()); assertEquals("b", line.readString(1)); } @Test public void testTokenizeWithDelimiterAtEnd() { FieldSet line = tokenizer.tokenize("a,b,c,"); assertEquals(4, line.getFieldCount()); assertEquals("c", line.readString(2)); assertEquals("", line.readString(3)); } @Test public void testEmptyLine() throws Exception { FieldSet line = tokenizer.tokenize(""); assertEquals(0, line.getFieldCount()); } @Test public void testEmptyLineWithNames(){ tokenizer.setNames(new String[]{"A", "B"}); try{ tokenizer.tokenize(""); } catch(IncorrectTokenCountException ex){ assertEquals(2, ex.getExpectedCount()); assertEquals(0, ex.getActualCount()); assertEquals("", ex.getInput()); } } @Test public void testWhitespaceLine() throws Exception { FieldSet line = tokenizer.tokenize(" "); // whitespace counts as text assertEquals(1, line.getFieldCount()); } @Test public void testNullLine() throws Exception { FieldSet line = tokenizer.tokenize(null); // null doesn't... assertEquals(0, line.getFieldCount()); } @Test public void testMultiLineField() throws Exception { FieldSet line = tokenizer.tokenize("a,b,c\nrap"); assertEquals(3, line.getFieldCount()); assertEquals("c\nrap", line.readString(2)); } @Test public void testMultiLineFieldWithQuotes() throws Exception { FieldSet line = tokenizer.tokenize("a,b,\"c\nrap\""); assertEquals(3, line.getFieldCount()); assertEquals("c\nrap", line.readString(2)); } @Test public void testTokenizeWithQuotesEmptyValue() { FieldSet line = tokenizer.tokenize("\"a\",\"b\",\"\",\"d\""); assertEquals(4, line.getFieldCount()); assertEquals("", line.readString(2)); } @Test public void testTokenizeWithIncludedFields() { tokenizer.setIncludedFields(new int[] {1,2}); FieldSet line = tokenizer.tokenize("\"a\",\"b\",\"c\",\"d\""); assertEquals(2, line.getFieldCount()); assertEquals("c", line.readString(1)); } @Test public void testTokenizeWithIncludedFieldsAndEmptyEnd() { tokenizer.setIncludedFields(new int[] {1,3}); FieldSet line = tokenizer.tokenize("\"a\",\"b\",\"c\","); assertEquals(2, line.getFieldCount()); assertEquals("", line.readString(1)); } @Test public void testTokenizeWithIncludedFieldsAndNames() { tokenizer.setIncludedFields(new int[] {1,2}); tokenizer.setNames(new String[] {"foo", "bar"}); FieldSet line = tokenizer.tokenize("\"a\",\"b\",\"c\",\"d\""); assertEquals(2, line.getFieldCount()); assertEquals("c", line.readString("bar")); } @Test(expected=IncorrectTokenCountException.class) public void testTokenizeWithIncludedFieldsAndTooFewNames() { tokenizer.setIncludedFields(new int[] {1,2}); tokenizer.setNames(new String[] {"foo"}); FieldSet line = tokenizer.tokenize("\"a\",\"b\",\"c\",\"d\""); assertEquals(2, line.getFieldCount()); assertEquals("c", line.readString("bar")); } @Test(expected=IncorrectTokenCountException.class) public void testTokenizeWithIncludedFieldsAndTooManyNames() { tokenizer.setIncludedFields(new int[] {1,2}); tokenizer.setNames(new String[] {"foo", "bar", "spam"}); FieldSet line = tokenizer.tokenize("\"a\",\"b\",\"c\",\"d\""); assertEquals(2, line.getFieldCount()); assertEquals("c", line.readString("bar")); } @Test public void testTokenizeOverMultipleLines() { tokenizer = new DelimitedLineTokenizer(";"); FieldSet line = tokenizer.tokenize("value1;\"value2\nvalue2cont\";value3;value4"); assertEquals(4, line.getFieldCount()); assertEquals("value2\nvalue2cont", line.readString(1)); } }