/* * This file is part of the Heritrix web crawler (crawler.archive.org). * * Licensed to the Internet Archive (IA) by one or more individual * contributors. * * The IA licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.archive.util; import java.util.regex.Matcher; import junit.framework.Test; import junit.framework.TestCase; import junit.framework.TestSuite; /** * JUnit test suite for TextUtils * * @author gojomo * @version $ Id$ */ public class TextUtilsTest extends TestCase { /** * Create a new TextUtilsTest object * * @param testName * the name of the test */ public TextUtilsTest(final String testName) { super(testName); } /** * run all the tests for TextUtilsTest * * @param argv * the command line arguments */ public static void main(String argv[]) { junit.textui.TestRunner.run(suite()); } /** * return the suite of tests for MemQueueTest * * @return the suite of test */ public static Test suite() { return new TestSuite(TextUtilsTest.class); } public void testMatcherRecycling() { String pattern = "f.*"; Matcher m1 = TextUtils.getMatcher(pattern,"foo"); assertTrue("matcher against 'foo' problem", m1.matches()); TextUtils.recycleMatcher(m1); Matcher m2 = TextUtils.getMatcher(pattern,""); assertFalse("matcher against '' problem", m2.matches()); assertTrue("matcher not recycled",m1==m2); // now verify proper behavior without recycling Matcher m3 = TextUtils.getMatcher(pattern,"fuggedaboutit"); assertTrue("matcher against 'fuggedaboutit' problem",m3.matches()); assertFalse("matcher was recycled",m3==m2); } public void testGetFirstWord() { final String firstWord = "one"; String tmpStr = TextUtils.getFirstWord(firstWord + " two three"); assertTrue("Failed to get first word 1 " + tmpStr, tmpStr.equals(firstWord)); tmpStr = TextUtils.getFirstWord(firstWord); assertTrue("Failed to get first word 2 " + tmpStr, tmpStr.equals(firstWord)); } public void testUnescapeHtml() { final String abc = "abc"; CharSequence cs = TextUtils.unescapeHtml("abc"); assertEquals(cs, abc); final String backwards = "aaa;lt&aaa"; cs = TextUtils.unescapeHtml(backwards); assertEquals(cs, backwards); final String ampersand = "aaa&aaa"; cs = TextUtils.unescapeHtml(ampersand); assertEquals(cs, ampersand); final String encodedAmpersand = "aaa&aaa"; cs = TextUtils.unescapeHtml(encodedAmpersand); assertEquals(cs, ampersand); final String encodedQuote = "aaa'aaa"; cs = TextUtils.unescapeHtml(encodedQuote); assertEquals(cs, "aaa'aaa"); final String entityQuote = "aaa"aaa"; cs = TextUtils.unescapeHtml(entityQuote); assertEquals(cs, "aaa\"aaa"); final String hexencoded = "aaa aaa"; cs = TextUtils.unescapeHtml(hexencoded); assertEquals(cs, "aaa\naaa"); final String zeroPos = "&aaa"; cs = TextUtils.unescapeHtml(zeroPos); assertEquals(cs, "&aaa"); } public void testUnescapeHtmlWithDanglingAmpersand() { final String mixedEncodedAmpersand1 = "aaa&aaa&aaa"; CharSequence cs = TextUtils.unescapeHtml(mixedEncodedAmpersand1); assertEquals("aaa&aaa&aaa",cs); final String mixedEncodedAmpersand2 = "aaa&aaa&aaa&aaa"; cs = TextUtils.unescapeHtml(mixedEncodedAmpersand2); assertEquals("aaa&aaa&aaa&aaa",cs); } }