// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v $ // $Author: derrickoswald $ // $Date: 2006/05/27 14:02:28 $ // $Revision: 1.49 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tests.parserHelperTests; import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.Remark; import org.htmlparser.Tag; import org.htmlparser.Text; import org.htmlparser.lexer.Lexer; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.ParserException; public class RemarkNodeParserTest extends ParserTestCase { static { System.setProperty ("org.htmlparser.tests.parserHelperTests.RemarkParserTest", "RemarkParserTest"); } public RemarkNodeParserTest (String name) { super(name); } /** * Test unparsed remark node. * The bug being reproduced is this : <BR> * <!-- saved from url=(0022)http://internet.e-mail --> * <HTML> * <HEAD><META name="title" content="Training Introduction"> * <META name="subject" content=""> * <!-- Whats gonna happen now ? * --> * <TEST> * </TEST> * * The above line is incorrectly parsed - the remark is not correctly identified. * This bug was reported by Serge Kruppa (2002-Feb-08). */ public void testRemarkBug() throws ParserException { createParser( "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ "<HTML>\n"+ "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ "<META name=\"subject\" content=\"\">\n"+ "<!--\n"+ " Whats gonna happen now ?\n"+ "-->\n"+ "<TEST>\n"+ "</TEST>\n"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(15); // The first node should be a Remark assertTrue("First node should be a Remark",node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertEquals("Text of the Remark #1"," saved from url=(0022)http://internet.e-mail ",Remark.getText()); // The tenth node should be a Remark assertTrue("Tenth node should be a Remark",node[9] instanceof Remark); Remark = (Remark)node[9]; assertEquals("Text of the Remark #10","\n Whats gonna happen now ?\n",Remark.getText()); } public void testGetText () throws ParserException { createParser( "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ "<HTML>\n"+ "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ "<META name=\"subject\" content=\"\">\n"+ "<!--\n"+ " Whats gonna happen now ?\n"+ "-->\n"+ "<TEST>\n"+ "</TEST>\n"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(15); // The first node should be a Remark assertTrue("First node should be a Remark",node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertEquals("Plain Text of the Remark #1"," saved from url=(0022)http://internet.e-mail ",Remark.getText ()); // The tenth node should be a Remark assertTrue("Tenth node should be a Remark",node[9] instanceof Remark); Remark = (Remark)node[9]; assertEquals("Plain Text of the Remark #10","\n Whats gonna happen now ?\n",Remark.getText()); } public void testToRawString() throws ParserException { createParser( "<!-- saved from url=(0022)http://internet.e-mail -->\n"+ "<HTML>\n"+ "<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+ "<META name=\"subject\" content=\"\">\n"+ "<!--\n"+ " Whats gonna happen now ?\n"+ "-->\n"+ "<TEST>\n"+ "</TEST>\n"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(15); // The first node should be a Remark assertTrue("First node should be a Remark",node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertStringEquals("Raw String of the Remark #1","<!-- saved from url=(0022)http://internet.e-mail -->",Remark.toHtml()); // The tenth node should be a Remark assertTrue("Tenth node should be a Remark",node[9] instanceof Remark); Remark = (Remark)node[9]; assertStringEquals("Raw String of the Remark #6","<!--\n Whats gonna happen now ?\n-->",Remark.toHtml()); } public void testNonRemark() throws ParserException { createParser(" <![endif]>"); parseAndAssertNodeCount(2); // The first node should be a Remark assertTrue("First node should be a string node",node[0] instanceof Text); assertTrue("Second node should be a Tag",node[1] instanceof Tag); Text stringNode = (Text)node[0]; Tag tag = (Tag)node[1]; assertEquals("Text contents"," ",stringNode.getText()); assertEquals("Tag Contents","![endif]",tag.getText()); } /** * This is the simulation of bug report 586756, submitted * by John Zook. * If all the comment contains is a blank line, it breaks * the state */ public void testRemarkWithBlankLine() throws ParserException { createParser("<!--\n"+ "\n"+ "-->"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a Remark",node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertEquals("Expected contents","\n\n",Remark.getText()); } /** * This is the simulation of a bug report submitted * by Claude Duguay. * If it is a comment with nothing in it, parser crashes */ public void testRemarkWithNothing() throws ParserException { createParser("<!-->"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a Remark",node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertEquals("Expected contents","",Remark.getText()); } /** * Test tag within remark. * Reproduction of bug reported by John Zook [594301] * When we have tags like : * <!-- <A> --> * it doesent get parsed correctly */ public void testTagWithinRemark() throws ParserException { createParser("<!-- \n"+ "<A>\n"+ "bcd -->"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a Remark",node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertStringEquals("Expected contents"," \n<A>\nbcd ",Remark.getText()); } /** * Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes. * <<br> * -<br> * -<br> * ssd --><br> * This is not supposed to be a Remark */ public void testInvalidTag() throws ParserException { createParser("<!\n"+ "-\n"+ "-\n"+ "ssd -->"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag); Tag tag = (Tag)node[0]; assertStringEquals("Expected contents","!\n"+ "-\n"+ "-\n"+ "ssd --",tag.getText()); } /** * Bug reported by John Zook [594301] * If dashes exist in a comment, they dont get added to the comment text */ public void testDashesInComment() throws ParserException{ createParser("<!-- -- -->"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertTrue("Node should be a Remark but was "+node[0],node[0] instanceof Remark); Remark Remark = (Remark)node[0]; assertEquals("Remark Node contents"," -- ",Remark.getText()); } // from http://www.w3.org/MarkUp/html-spec/html-spec_3.html //Comments // //To include comments in an HTML document, use a comment declaration. //A comment declaration consists of `<!' followed by zero or more comments //followed by `>'. Each comment starts with `--' and includes all text up to //and including the next occurrence of `--'. In a comment declaration, white //space is allowed after each comment, but not before the first comment. The //entire comment declaration is ignored. (10) // //For example: // //<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN"> //<HEAD> //<TITLE>HTML Comment Example</TITLE> //<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp --> //<!-- another -- -- comment --> //<!> //</HEAD> //<BODY> //<p> <!- not a comment, just regular old data characters -> /** * Test a comment declaration with a comment. */ public void testSingleComment () throws ParserException { createParser( "<HTML>\n" + "<HEAD>\n" + "<TITLE>HTML Comment Test</TITLE>\n" + "</HEAD>\n" + "<BODY>\n" + "<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->\n" + "</BODY>\n" + "</HTML>\n" ); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(18); assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark); Remark Remark = (Remark)node[12]; assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",Remark.getText()); } /** * Test a comment declaration with two comments. */ public void testDoubleComment () throws ParserException { createParser( "<HTML>\n" + "<HEAD>\n" + "<TITLE>HTML Comment Test</TITLE>\n" + "</HEAD>\n" + "<BODY>\n" + "<!-- another -- -- comment -->\n" + "</BODY>\n" + "</HTML>\n" ); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(18); assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark); Remark Remark = (Remark)node[12]; assertEquals("Remark Node contents"," another -- -- comment ",Remark.getText()); } /** * Test a comment declaration without any comments. */ public void testEmptyComment () throws ParserException { createParser( "<HTML>\n" + "<HEAD>\n" + "<TITLE>HTML Comment Test 'testEmptyComment'</TITLE>\n" + "</HEAD>\n" + "<BODY>\n" + "<!>\n" + "</BODY>\n" + "</HTML>\n" ); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(18); assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark); Remark Remark = (Remark)node[12]; assertEquals("Remark Node contents","",Remark.getText()); } // /** // * Test what the specification calls data characters. // * Actually, no browser I've tried handles this correctly (as text). // * Some handle it as a comment and others handle it as a tag. // * So for now we leave this test case out. // */ // public void testNotAComment () // throws // HTMLParserException // { // createParser( // "<HTML>\n" // + "<HEAD>\n" // + "<TITLE>HTML Comment Test 'testNotAComment'</TITLE>\n" // + "</HEAD>\n" // + "<BODY>\n" // + "<!- not a comment, just regular old data characters ->\n" // + "</BODY>\n" // + "</HTML>\n" // ); // parseAndAssertNodeCount(10); // assertTrue("Node should not be a Remark",!(node[7] instanceof Remark)); // assertTrue("Node should be a HTMLText but was "+node[7],node[7].getType()==HTMLText.TYPE); // HTMLText stringNode = (HTMLText)node[7]; // assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText()); // } /** * Test exclamation mark ending. * Test a comment ending with !--. * See bug #788746 parser crashes on comments like <!-- foobar --!> */ public void testExclamationComment () throws ParserException { boolean old_remark_handling = Lexer.STRICT_REMARKS; try { // handling this requires non-strict handling Lexer.STRICT_REMARKS = false; createParser ( "<html>\n" + "<head>\n" + "<title>foobar</title>\n" + "</head>\n" + "<body>\n" + "<!-- foobar --!>\n" + "</body>\n" + "</html>\n" ); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount (18); assertTrue("Node should be a Remark but was " + node[12], node[12] instanceof Remark); assertStringEquals ("remark text", "<!-- foobar --!>", node[12].toHtml ()); } finally { Lexer.STRICT_REMARKS = old_remark_handling; } } /** * Test a comment ending with -. * See also the Acid2 test at http://www.webstandards.org/act/acid2/test.html. */ public void testDashEnding () throws ParserException { String preamble = "<div class=\"parser\">"; String remark = "<!-- ->ERROR<!- -->"; String rest = "</div></div> <!-- two dashes is what delimits a comment, so the text \"->ERROR<!-\" earlier on this line is actually part of a comment -->"; createParser (preamble + remark + rest); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount (6); assertTrue("Node should be a Remark but was " + node[1], node[1] instanceof Remark); assertStringEquals ("remark text", remark, node[1].toHtml ()); } /** * Test a comment ending with ---. * See bug #1345049 HTMLParser should not terminate a comment with ---> * See also the Acid2 test at http://www.webstandards.org/act/acid2/test.html. */ public void test3DashesEnding () throws ParserException { String preamble = "<div class=\"parser\">"; String remark = "<!-- --->ERROR<!- -->"; String rest = "</div></div> <!-- two dashes is what delimits a comment, so the text \"->ERROR<!-\" earlier on this line is actually part of a comment -->"; createParser (preamble + remark + rest); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount (6); assertTrue("Node should be a Remark but was " + node[1], node[1] instanceof Remark); assertStringEquals ("remark text", remark, node[1].toHtml ()); } }