/* * File: LetterNumberTokenizerTest.java * Authors: Justin Basilico * Company: Sandia National Laboratories * Project: Cognitive Foundry * * Copyright March 02, 2009, Sandia Corporation. * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive * license for use of this work by or on behalf of the U.S. Government. Export * of this program may require a license from the United States Government. * See CopyrightHistory.txt for complete details. * */ package gov.sandia.cognition.text.token; import gov.sandia.cognition.collection.CollectionUtil; import org.junit.Test; import static org.junit.Assert.*; /** * Unit tests for class LetterNumberTokenizer. * * @author Justin Basilico * @since 3.0 */ public class LetterNumberTokenizerTest { /** * Creates a new test. */ public LetterNumberTokenizerTest() { } /** * Test of tokenize method, of class LetterNumberTokenizer. */ @Test public void testTokenize() { LetterNumberTokenizer instance = new LetterNumberTokenizer(); String input = "...Mr. Taco? Help? 123\n AbC ?~!@\t Yes7."; String[] expected = { "Mr", "Taco", "Help", "123", "AbC", "Yes7" }; Iterable<Token> result = instance.tokenize(input); assertEqualTokens(input, expected, result); input = "aB3"; expected = new String[] { "aB3" }; result = instance.tokenize(input); assertEqualTokens(input, expected, result); input = " aB3 "; expected = new String[] { "aB3" }; result = instance.tokenize(input); assertEqualTokens(input, expected, result); input = " 4 "; expected = new String[] { "4" }; result = instance.tokenize(input); assertEqualTokens(input, expected, result); input = ""; expected = new String[0]; result = instance.tokenize(input); assertEqualTokens(input, expected, result); } public void assertEqualTokens( final String input, final String[] expected, final Iterable<? extends Token> tokens) { assertEquals(expected.length, CollectionUtil.size(tokens)); int index = 0; for (Token token : tokens) { assertEquals(expected[index], token.getText()); assertEquals(expected[index].length(), token.getLength()); assertEquals(expected[index], input.substring(token.getStart(), token.getStart() + token.getLength())); index++; } } }