// BlogBridge -- RSS feed reader, manager, and web based service // Copyright (C) 2002-2006 by R. Pito Salas // // This program is free software; you can redistribute it and/or modify it under // the terms of the GNU General Public License as published by the Free Software Foundation; // either version 2 of the License, or (at your option) any later version. // // This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; // without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. // See the GNU General Public License for more details. // // You should have received a copy of the GNU General Public License along with this program; // if not, write to the Free Software Foundation, Inc., 59 Temple Place, // Suite 330, Boston, MA 02111-1307 USA // // Contact: R. Pito Salas // mailto:pitosalas@users.sourceforge.net // More information: about BlogBridge // http://www.blogbridge.com // http://sourceforge.net/projects/blogbridge // // $Id: TestTextProcessor.java,v 1.12 2008/04/04 14:03:27 spyromus Exp $ // package com.salas.bb.utils.swinghtml; import com.salas.bb.utils.Constants; import junit.framework.TestCase; /** * This suite contains tests for <code>TextProcessor</code> unit. * It covers: * TODO put list */ public class TestTextProcessor extends TestCase { /** * Tests conversion of HTML entities in the text. */ public void testConvertEntities() { assertEquals("& & ' '\" \"", TextProcessor.convertHTMLEntities("& &   &apos '" "")); assertEquals("&", TextProcessor.convertHTMLEntities("&")); assertEquals("&", TextProcessor.convertHTMLEntities("&")); assertEquals("'", TextProcessor.convertHTMLEntities("&apos")); assertEquals("'", TextProcessor.convertHTMLEntities("'")); assertEquals(" ", TextProcessor.convertHTMLEntities(" ")); assertEquals(" ", TextProcessor.convertHTMLEntities(" ")); assertEquals("\"", TextProcessor.convertHTMLEntities(""")); assertEquals("\"", TextProcessor.convertHTMLEntities(""")); assertEquals("", TextProcessor.convertHTMLEntities("")); assertNull("NULL should be returned.", TextProcessor.convertHTMLEntities(null)); } /** * Tests conversion of some numeric entities into strings. */ public void testConvertNumericEntities() { assertEquals("Apple's and ' Shmapples'", TextProcessor.convertNumericHTMLEntities("Apple's and ' Shmapples'")); assertEquals("\\", TextProcessor.convertNumericHTMLEntities("\")); assertEquals("$", TextProcessor.convertNumericHTMLEntities("$")); assertNull("NULL should be returned.", TextProcessor.convertNumericHTMLEntities(null)); } /** * Tests removing of HTML entities from text. */ public void testRemoveHTMLEntities() { assertEquals("Entities weren't remove.", "a b c", TextProcessor.removeHTMLEntities("a&b c")); assertNull("NULL should be returned.", TextProcessor.removeHTMLEntities(null)); } /** * Tests removing of tags from the text. */ public void testRemoveTags() { assertEquals("Tags weren't removed.", "abc", TextProcessor.removeTags("<p>a<br/><i >b</i>c</p>")); assertNull("NULL should be returned.", TextProcessor.removeTags(null)); } /** * Tests getting the excerpt from the text. */ public void testGetExcerpt() { assertEquals("First words should be selected", "a bb, c.", TextProcessor.getExcerpt("a bb, c. d e", 4)); assertEquals("First words should be selected", "a b, c!", TextProcessor.getExcerpt(" a b, c! d e", 4)); assertEquals("First words should be selected", "[Quote] a b, c?", TextProcessor.getExcerpt(" [Quote] a b, c? d e", 5)); assertEquals("First words on the second line should be selected.", "a b c...", TextProcessor.getExcerpt("\na b c d\n e", 3)); assertEquals("First line is empty.", "a b...", TextProcessor.getExcerpt("\na b\n c d e", 3)); assertEquals("First line is empty.", "a b", TextProcessor.getExcerpt("\na b", 3)); // No text assertNull("No text given.", TextProcessor.getExcerpt(null, 3)); // No words assertEquals("No words in the text.", "", TextProcessor.getExcerpt("", 3)); assertEquals("No words in the text.", "", TextProcessor.getExcerpt("\n", 3)); // Using excerpt from the text, but skipping tags and converting entities assertEquals("Art Mobs: \"If a paintings could speak, what would they...", TextProcessor.getExcerpt("Art Mobs: \"If a paintings could speak, " + "what would they say?\"", 10)); // One word assertEquals("The word should be returned.", "First", TextProcessor.getExcerpt("First", 3)); } /** * Tests how the text is filtered from the garbage and undesired staff. * Target: inline scripts. */ public void testFilterTextScripts() { assertEquals("test", TextProcessor.filterText("te<script>\n" + "<!--\nalert('test');\n--></script>st")); assertEquals("", TextProcessor.filterText( "<P><SPAN lang=EN-US>\n" + "<SCRIPT>\n" + "<!--\n" + "D(['mb','The',1]\n" + ");\n" + "\n" + "<a href='//'>//</a>-->\n" + "</SCRIPT>\n" + "</SPAN>")); } /** * Tests how the text is filtered from the garbage and undesired staff. * Target: tags and entities. */ public void testFilterTextConversions() { assertEquals(null, TextProcessor.filterText(null)); // Extra spaces trimming assertEquals("test", TextProcessor.filterText(" test ")); // Conversion of STRONG into B tags assertEquals("<b>test</b>", TextProcessor.filterText("<strong>test</strong>")); // Removing extra P tags assertEquals("test", TextProcessor.filterText("<p> <p> <p/>test</p>")); // Handling empty tags (BR and IMG in our case) assertEquals("test<br>", TextProcessor.filterText("<p>test<br/>")); assertEquals("test<img src='aaa'>", TextProcessor.filterText("<p>test<img src='aaa'/>")); // Converting numeric HTML entities to text and handling known HTML entities assertEquals("test's", TextProcessor.filterText("test's")); assertEquals("test &<>'"", TextProcessor.filterText("test &<>'"")); } /** * Tests converting unknown entities into Unicode chars. */ public void testFilterTextEntities() { assertEquals("\u2014 \u2014 \u2014", TextProcessor.filterText("— &mdash &mdash")); } /** * Tests filtering the title. */ public void testFilterTitle() { // Converting HTML entities to plain text assertEquals("a b", TextProcessor.filterTitle("a b", null)); // Skipping unknown entities assertEquals("Q&A:", TextProcessor.filterTitle("Q&A:", null)); // Incorrect numeric HTML entity -- kipping assertEquals("test&12;1", TextProcessor.filterTitle("test&12;1", null)); // Extra long (more than 6 chars) HTML entity -- skipping assertEquals("test&qwertyu;1", TextProcessor.filterTitle("test&qwertyu;1", null)); // Converting numeric HTML entities to string assertEquals("Apple's", TextProcessor.filterTitle("Apple's", null)); // Converting unknown, but looking valid HTML entities to space assertEquals("test 1", TextProcessor.filterTitle("test&nb;1", null)); assertEquals("test 1", TextProcessor.filterTitle("test&qwerty;1", null)); // Using excerpt from the text, but skipping tags and converting entities assertEquals("Art Mobs: \"If a paintings could speak, what would they...", TextProcessor.filterTitle(null, "<a>Art Mobs</a>: \"If a paintings could speak, " + "what would they say?\"")); } /** * Tests filtering of the title when there's not title, but there's text. */ public void testFilterTitleNull() { String textString; StringBuffer buf = new StringBuffer(); for (int i = 0; i < Constants.WORDS_IN_EXCERPT; i++) buf.append(i).append(" "); // Prepare the look of the title String ethalon = buf.toString(); ethalon = ethalon.substring(0, ethalon.length() - 1) + "..."; // Add some more "words" to mess things up buf.append("a b c"); textString = buf.toString(); assertEquals("Title should be taken from excerpt.", ethalon, TextProcessor.filterTitle(null, textString)); assertEquals("Title should be taken from excerpt.", null, TextProcessor.filterTitle(null, null)); } /** * Tests removing buggy background attribute where there's no actual link specified. */ public void testRemoveBackgroundAttr() { assertEquals("BACKGROUND should be removed", "<i >a</i>", TextProcessor.processHTML("<i background=''>a</i>", -1)); assertEquals("BACKGROUND should be removed", "<i >a</i>", TextProcessor.processHTML("<i background = ' '>a</i>", -1)); assertEquals("BACKGROUND should be removed", "<i >a</i>", TextProcessor.processHTML("<i bAckGroUnd=\" \">a</i>", -1)); assertEquals("BACKGROUND should stay", "<i BACKGROUND='a'>a</i>", TextProcessor.processHTML("<i BACKGROUND='a'>a</i>", -1)); } /** * Tests removing heading paragraph signs. */ public void testRemoveLeadingParagraphs() { assertEquals("a", TextProcessor.removeLeadingParagraphs("<p>a")); assertEquals("a", TextProcessor.removeLeadingParagraphs(" <p> a")); assertEquals("a<p>", TextProcessor.removeLeadingParagraphs(" <p> a<p>")); assertEquals("a<p>", TextProcessor.removeLeadingParagraphs("<p> <p> a<p>")); } /** * Tests processing text like this: "<p><strong>Some</strong> text". * Those STRONG tags should be converted into B tags. */ public void testProcessingStrongText() { assertEquals("<b>Some</b> text", TextProcessor.processHTML("<strong>Some</strong> text", -1)); assertEquals("<p><b>Some</b> text", TextProcessor.processHTML("<p><strong>Some</strong> text", -1)); } /** * Tests converting entities to plain text. */ public void testToPlainText() { assertEquals(" ", TextProcessor.toPlainText(" ")); assertEquals("<", TextProcessor.toPlainText("<")); assertEquals(">", TextProcessor.toPlainText(">")); assertEquals("\"", TextProcessor.toPlainText(""")); assertEquals("'", TextProcessor.toPlainText("'")); assertEquals("&", TextProcessor.toPlainText("&")); } /** * Tests converting entities to plain text. */ public void testToPlainTextComposite() { assertEquals("a b<c", TextProcessor.toPlainText("a b<c")); } }