/** * AnalyzerBeans * Copyright (C) 2014 Neopost - Customer Information Management * * This copyrighted material is made available to anyone wishing to use, modify, * copy, or redistribute it subject to the terms and conditions of the GNU * Lesser General Public License, as published by the Free Software Foundation. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License * for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this distribution; if not, write to: * Free Software Foundation, Inc. * 51 Franklin Street, Fifth Floor * Boston, MA 02110-1301 USA */ package org.eobjects.analyzer.beans.stringpattern; import java.util.List; import junit.framework.TestCase; public class DefaultTokenizerTest extends TestCase { private TokenizerConfiguration conf = new TokenizerConfiguration(false, '.', ',', '-'); public void testTokenizeEmptyString() throws Exception { List<SimpleToken> tokens = DefaultTokenizer.preliminaryTokenize("", conf); assertTrue(tokens.isEmpty()); } public void testMinusSignAsDelimOrAsMinus() throws Exception { List<SimpleToken> tokens = DefaultTokenizer.preliminaryTokenize("123-456", conf); assertEquals(3, tokens.size()); assertEquals("Token['123' (NUMBER)]", tokens.get(0).toString()); assertEquals("Token['-' (DELIM)]", tokens.get(1).toString()); assertEquals("Token['456' (NUMBER)]", tokens.get(2).toString()); } public void testPreliminaryTokenizeAndMixedTokens() throws Exception { List<SimpleToken> tokens = DefaultTokenizer.preliminaryTokenize("hi \t123there - yay -10", conf); assertEquals(10, tokens.size()); assertEquals("Token['hi' (TEXT)]", tokens.get(0).toString()); assertEquals("Token[' \t' (WHITESPACE)]", tokens.get(1).toString()); assertEquals("Token['123' (NUMBER)]", tokens.get(2).toString()); assertEquals("Token['there' (TEXT)]", tokens.get(3).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(4).toString()); assertEquals("Token['-' (DELIM)]", tokens.get(5).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(6).toString()); assertEquals("Token['yay' (TEXT)]", tokens.get(7).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(8).toString()); assertEquals("Token['-10' (NUMBER)]", tokens.get(9).toString()); tokens = DefaultTokenizer.flattenMixedTokens(tokens); assertEquals(9, tokens.size()); assertEquals("Token['123there' (MIXED)]", tokens.get(2).toString()); tokens = DefaultTokenizer.preliminaryTokenize("w00p", conf); assertEquals(3, tokens.size()); assertEquals("Token['w' (TEXT)]", tokens.get(0).toString()); assertEquals("Token['00' (NUMBER)]", tokens.get(1).toString()); assertEquals("Token['p' (TEXT)]", tokens.get(2).toString()); tokens = DefaultTokenizer.flattenMixedTokens(tokens); assertEquals(1, tokens.size()); assertEquals("Token['w00p' (MIXED)]", tokens.get(0).toString()); } public void testNegativeNumbers() throws Exception { List<SimpleToken> tokens = DefaultTokenizer.preliminaryTokenize("10 -4", conf); assertEquals(3, tokens.size()); assertEquals("Token['10' (NUMBER)]", tokens.get(0).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(1).toString()); assertEquals("Token['-4' (NUMBER)]", tokens.get(2).toString()); } public void testDecimals() throws Exception { List<SimpleToken> tokens = DefaultTokenizer.preliminaryTokenize("yay 10.1 whut 20,632. hmm", conf); assertEquals(10, tokens.size()); assertEquals("Token['yay' (TEXT)]", tokens.get(0).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(1).toString()); assertEquals("Token['10.1' (NUMBER)]", tokens.get(2).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(3).toString()); assertEquals("Token['whut' (TEXT)]", tokens.get(4).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(5).toString()); assertEquals("Token['20,632' (NUMBER)]", tokens.get(6).toString()); assertEquals("Token['.' (DELIM)]", tokens.get(7).toString()); assertEquals("Token[' ' (WHITESPACE)]", tokens.get(8).toString()); assertEquals("Token['hmm' (TEXT)]", tokens.get(9).toString()); tokens = DefaultTokenizer.preliminaryTokenize("20,632.20213", conf); assertEquals(1, tokens.size()); assertEquals("Token['20,632.20213' (NUMBER)]", tokens.get(0).toString()); tokens = DefaultTokenizer.preliminaryTokenize("20,632,123.20213", conf); assertEquals(1, tokens.size()); assertEquals(TokenType.NUMBER, tokens.get(0).getType()); tokens = DefaultTokenizer.preliminaryTokenize("20,632,.20213", conf); assertEquals(3, tokens.size()); assertEquals("Token['20,632' (NUMBER)]", tokens.get(0).toString()); assertEquals("Token[',.' (DELIM)]", tokens.get(1).toString()); assertEquals("Token['20213' (NUMBER)]", tokens.get(2).toString()); tokens = DefaultTokenizer.preliminaryTokenize(",-20,632.20213", conf); assertEquals(2, tokens.size()); assertEquals("Token[',' (DELIM)]", tokens.get(0).toString()); assertEquals("Token['-20,632.20213' (NUMBER)]", tokens.get(1).toString()); tokens = DefaultTokenizer.preliminaryTokenize("20,632.20213,", conf); assertEquals(2, tokens.size()); assertEquals("Token['20,632.20213' (NUMBER)]", tokens.get(0).toString()); assertEquals("Token[',' (DELIM)]", tokens.get(1).toString()); tokens = DefaultTokenizer.preliminaryTokenize("20,632-20213,", conf); assertEquals(4, tokens.size()); assertEquals("Token['20,632' (NUMBER)]", tokens.get(0).toString()); assertEquals("Token['-' (DELIM)]", tokens.get(1).toString()); assertEquals("Token['20213' (NUMBER)]", tokens.get(2).toString()); assertEquals("Token[',' (DELIM)]", tokens.get(3).toString()); } public void testNumberParsingWithoutSeparatorChars() throws Exception { TokenizerConfiguration c = new TokenizerConfiguration(false, null, null, null); List<SimpleToken> tokens = DefaultTokenizer.preliminaryTokenize("20,-632.20213", c); assertEquals(5, tokens.size()); assertEquals("Token['20' (NUMBER)]", tokens.get(0).toString()); assertEquals("Token[',-' (DELIM)]", tokens.get(1).toString()); assertEquals("Token['632' (NUMBER)]", tokens.get(2).toString()); assertEquals("Token['.' (DELIM)]", tokens.get(3).toString()); assertEquals("Token['20213' (NUMBER)]", tokens.get(4).toString()); } }