// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/parserHelperTests/RemarkNodeParserTest.java,v $
// $Author: derrickoswald $
// $Date: 2006/05/27 14:02:28 $
// $Revision: 1.49 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.tests.parserHelperTests;
import org.htmlparser.PrototypicalNodeFactory;
import org.htmlparser.Remark;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.lexer.Lexer;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.ParserException;
public class RemarkNodeParserTest extends ParserTestCase
{
static
{
System.setProperty ("org.htmlparser.tests.parserHelperTests.RemarkParserTest", "RemarkParserTest");
}
public RemarkNodeParserTest (String name) {
super(name);
}
/**
* Test unparsed remark node.
* The bug being reproduced is this : <BR>
* <!-- saved from url=(0022)http://internet.e-mail -->
* <HTML>
* <HEAD><META name="title" content="Training Introduction">
* <META name="subject" content="">
* <!--
Whats gonna happen now ?
* -->
* <TEST>
* </TEST>
*
* The above line is incorrectly parsed - the remark is not correctly identified.
* This bug was reported by Serge Kruppa (2002-Feb-08).
*/
public void testRemarkBug() throws ParserException
{
createParser(
"<!-- saved from url=(0022)http://internet.e-mail -->\n"+
"<HTML>\n"+
"<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+
"<META name=\"subject\" content=\"\">\n"+
"<!--\n"+
" Whats gonna happen now ?\n"+
"-->\n"+
"<TEST>\n"+
"</TEST>\n");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(15);
// The first node should be a Remark
assertTrue("First node should be a Remark",node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertEquals("Text of the Remark #1"," saved from url=(0022)http://internet.e-mail ",Remark.getText());
// The tenth node should be a Remark
assertTrue("Tenth node should be a Remark",node[9] instanceof Remark);
Remark = (Remark)node[9];
assertEquals("Text of the Remark #10","\n Whats gonna happen now ?\n",Remark.getText());
}
public void testGetText () throws ParserException {
createParser(
"<!-- saved from url=(0022)http://internet.e-mail -->\n"+
"<HTML>\n"+
"<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+
"<META name=\"subject\" content=\"\">\n"+
"<!--\n"+
" Whats gonna happen now ?\n"+
"-->\n"+
"<TEST>\n"+
"</TEST>\n");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(15);
// The first node should be a Remark
assertTrue("First node should be a Remark",node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertEquals("Plain Text of the Remark #1"," saved from url=(0022)http://internet.e-mail ",Remark.getText ());
// The tenth node should be a Remark
assertTrue("Tenth node should be a Remark",node[9] instanceof Remark);
Remark = (Remark)node[9];
assertEquals("Plain Text of the Remark #10","\n Whats gonna happen now ?\n",Remark.getText());
}
public void testToRawString() throws ParserException {
createParser(
"<!-- saved from url=(0022)http://internet.e-mail -->\n"+
"<HTML>\n"+
"<HEAD><META name=\"title\" content=\"Training Introduction\">\n"+
"<META name=\"subject\" content=\"\">\n"+
"<!--\n"+
" Whats gonna happen now ?\n"+
"-->\n"+
"<TEST>\n"+
"</TEST>\n");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(15);
// The first node should be a Remark
assertTrue("First node should be a Remark",node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertStringEquals("Raw String of the Remark #1","<!-- saved from url=(0022)http://internet.e-mail -->",Remark.toHtml());
// The tenth node should be a Remark
assertTrue("Tenth node should be a Remark",node[9] instanceof Remark);
Remark = (Remark)node[9];
assertStringEquals("Raw String of the Remark #6","<!--\n Whats gonna happen now ?\n-->",Remark.toHtml());
}
public void testNonRemark() throws ParserException {
createParser(" <![endif]>");
parseAndAssertNodeCount(2);
// The first node should be a Remark
assertTrue("First node should be a string node",node[0] instanceof Text);
assertTrue("Second node should be a Tag",node[1] instanceof Tag);
Text stringNode = (Text)node[0];
Tag tag = (Tag)node[1];
assertEquals("Text contents"," ",stringNode.getText());
assertEquals("Tag Contents","![endif]",tag.getText());
}
/**
* This is the simulation of bug report 586756, submitted
* by John Zook.
* If all the comment contains is a blank line, it breaks
* the state
*/
public void testRemarkWithBlankLine() throws ParserException {
createParser("<!--\n"+
"\n"+
"-->");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(1);
assertTrue("Node should be a Remark",node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertEquals("Expected contents","\n\n",Remark.getText());
}
/**
* This is the simulation of a bug report submitted
* by Claude Duguay.
* If it is a comment with nothing in it, parser crashes
*/
public void testRemarkWithNothing() throws ParserException {
createParser("<!-->");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(1);
assertTrue("Node should be a Remark",node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertEquals("Expected contents","",Remark.getText());
}
/**
* Test tag within remark.
* Reproduction of bug reported by John Zook [594301]
* When we have tags like :
* <!-- <A> -->
* it doesent get parsed correctly
*/
public void testTagWithinRemark() throws ParserException {
createParser("<!-- \n"+
"<A>\n"+
"bcd -->");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(1);
assertTrue("Node should be a Remark",node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertStringEquals("Expected contents"," \n<A>\nbcd ",Remark.getText());
}
/**
* Bug reported by John Zook [594301], invalid remark nodes are accepted as remark nodes.
* <<br>
* -<br>
* -<br>
* ssd --><br>
* This is not supposed to be a Remark
*/
public void testInvalidTag() throws ParserException {
createParser("<!\n"+
"-\n"+
"-\n"+
"ssd -->");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(1);
assertTrue("Node should be a Tag but was "+node[0],node[0] instanceof Tag);
Tag tag = (Tag)node[0];
assertStringEquals("Expected contents","!\n"+
"-\n"+
"-\n"+
"ssd --",tag.getText());
}
/**
* Bug reported by John Zook [594301]
* If dashes exist in a comment, they dont get added to the comment text
*/
public void testDashesInComment() throws ParserException{
createParser("<!-- -- -->");
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(1);
assertTrue("Node should be a Remark but was "+node[0],node[0] instanceof Remark);
Remark Remark = (Remark)node[0];
assertEquals("Remark Node contents"," -- ",Remark.getText());
}
// from http://www.w3.org/MarkUp/html-spec/html-spec_3.html
//Comments
//
//To include comments in an HTML document, use a comment declaration.
//A comment declaration consists of `<!' followed by zero or more comments
//followed by `>'. Each comment starts with `--' and includes all text up to
//and including the next occurrence of `--'. In a comment declaration, white
//space is allowed after each comment, but not before the first comment. The
//entire comment declaration is ignored. (10)
//
//For example:
//
//<!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
//<HEAD>
//<TITLE>HTML Comment Example</TITLE>
//<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->
//<!-- another -- -- comment -->
//<!>
//</HEAD>
//<BODY>
//<p> <!- not a comment, just regular old data characters ->
/**
* Test a comment declaration with a comment.
*/
public void testSingleComment ()
throws
ParserException
{
createParser(
"<HTML>\n"
+ "<HEAD>\n"
+ "<TITLE>HTML Comment Test</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!-- Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp -->\n"
+ "</BODY>\n"
+ "</HTML>\n"
);
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(18);
assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark);
Remark Remark = (Remark)node[12];
assertEquals("Remark Node contents"," Id: html-sgml.sgm,v 1.5 1995/05/26 21:29:50 connolly Exp ",Remark.getText());
}
/**
* Test a comment declaration with two comments.
*/
public void testDoubleComment ()
throws
ParserException
{
createParser(
"<HTML>\n"
+ "<HEAD>\n"
+ "<TITLE>HTML Comment Test</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!-- another -- -- comment -->\n"
+ "</BODY>\n"
+ "</HTML>\n"
);
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(18);
assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark);
Remark Remark = (Remark)node[12];
assertEquals("Remark Node contents"," another -- -- comment ",Remark.getText());
}
/**
* Test a comment declaration without any comments.
*/
public void testEmptyComment ()
throws
ParserException
{
createParser(
"<HTML>\n"
+ "<HEAD>\n"
+ "<TITLE>HTML Comment Test 'testEmptyComment'</TITLE>\n"
+ "</HEAD>\n"
+ "<BODY>\n"
+ "<!>\n"
+ "</BODY>\n"
+ "</HTML>\n"
);
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount(18);
assertTrue("Node should be a Remark but was "+node[12],node[12] instanceof Remark);
Remark Remark = (Remark)node[12];
assertEquals("Remark Node contents","",Remark.getText());
}
// /**
// * Test what the specification calls data characters.
// * Actually, no browser I've tried handles this correctly (as text).
// * Some handle it as a comment and others handle it as a tag.
// * So for now we leave this test case out.
// */
// public void testNotAComment ()
// throws
// HTMLParserException
// {
// createParser(
// "<HTML>\n"
// + "<HEAD>\n"
// + "<TITLE>HTML Comment Test 'testNotAComment'</TITLE>\n"
// + "</HEAD>\n"
// + "<BODY>\n"
// + "<!- not a comment, just regular old data characters ->\n"
// + "</BODY>\n"
// + "</HTML>\n"
// );
// parseAndAssertNodeCount(10);
// assertTrue("Node should not be a Remark",!(node[7] instanceof Remark));
// assertTrue("Node should be a HTMLText but was "+node[7],node[7].getType()==HTMLText.TYPE);
// HTMLText stringNode = (HTMLText)node[7];
// assertEquals("String Node contents","<!- not a comment, just regular old data characters ->\n",stringNode.getText());
// }
/**
* Test exclamation mark ending.
* Test a comment ending with !--.
* See bug #788746 parser crashes on comments like <!-- foobar --!>
*/
public void testExclamationComment ()
throws
ParserException
{
boolean old_remark_handling = Lexer.STRICT_REMARKS;
try
{
// handling this requires non-strict handling
Lexer.STRICT_REMARKS = false;
createParser (
"<html>\n"
+ "<head>\n"
+ "<title>foobar</title>\n"
+ "</head>\n"
+ "<body>\n"
+ "<!-- foobar --!>\n"
+ "</body>\n"
+ "</html>\n"
);
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount (18);
assertTrue("Node should be a Remark but was " + node[12], node[12] instanceof Remark);
assertStringEquals ("remark text", "<!-- foobar --!>", node[12].toHtml ());
}
finally
{
Lexer.STRICT_REMARKS = old_remark_handling;
}
}
/**
* Test a comment ending with -.
* See also the Acid2 test at http://www.webstandards.org/act/acid2/test.html.
*/
public void testDashEnding ()
throws
ParserException
{
String preamble = "<div class=\"parser\">";
String remark = "<!-- ->ERROR<!- -->";
String rest = "</div></div> <!-- two dashes is what delimits a comment, so the text \"->ERROR<!-\" earlier on this line is actually part of a comment -->";
createParser (preamble + remark + rest);
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount (6);
assertTrue("Node should be a Remark but was " + node[1], node[1] instanceof Remark);
assertStringEquals ("remark text", remark, node[1].toHtml ());
}
/**
* Test a comment ending with ---.
* See bug #1345049 HTMLParser should not terminate a comment with --->
* See also the Acid2 test at http://www.webstandards.org/act/acid2/test.html.
*/
public void test3DashesEnding ()
throws
ParserException
{
String preamble = "<div class=\"parser\">";
String remark = "<!-- --->ERROR<!- -->";
String rest = "</div></div> <!-- two dashes is what delimits a comment, so the text \"->ERROR<!-\" earlier on this line is actually part of a comment -->";
createParser (preamble + remark + rest);
parser.setNodeFactory (new PrototypicalNodeFactory (true));
parseAndAssertNodeCount (6);
assertTrue("Node should be a Remark but was " + node[1], node[1] instanceof Remark);
assertStringEquals ("remark text", remark, node[1].toHtml ());
}
}