// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Somik Raha
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/tagTests/LinkTagTest.java,v $
// $Author: derrickoswald $
// $Date: 2006/06/04 19:17:20 $
// $Revision: 1.54 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.tests.tagTests;
import org.htmlparser.Node;
import org.htmlparser.PrototypicalNodeFactory;
import org.htmlparser.Tag;
import org.htmlparser.Text;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.HeadTag;
import org.htmlparser.tags.Html;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import org.htmlparser.util.SimpleNodeIterator;
public class LinkTagTest extends ParserTestCase {
static
{
System.setProperty ("org.htmlparser.tests.tagTests.LinkTagTest", "LinkTagTest");
}
public LinkTagTest(String name) {
super(name);
}
/**
* The bug being reproduced is this : <BR>
* <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
* vLink=#551a8b>
* The above line is incorrectly parsed in that, the BODY tag is not identified.
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testLinkNodeBug() throws ParserException
{
createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[0];
assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink());
}
/**
* The bug being reproduced is this : <BR>
* <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
* vLink=#551a8b>
* The above line is incorrectly parsed in that, the BODY tag is not identified.
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testLinkNodeBug2() throws ParserException
{
createParser("<A HREF=\"../../test.html\">abcd</A>","http://www.google.com/test/test/index.html");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[0];
assertEquals("The image locn","http://www.google.com/test.html",linkNode.getLink());
}
/**
* The bug being reproduced is this : <BR>
* When a url ends with a slash, and the link begins with a slash,the parser puts two slashes
* This bug was submitted by Roget Kjensrud
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testLinkNodeBug3() throws ParserException
{
createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[0];
assertEquals("Link incorrect","http://www.cj.com/mylink.html",linkNode.getLink());
}
/**
* The bug being reproduced is this : <BR>
* Simple url without index.html, doesent get appended to link
* This bug was submitted by Roget Kjensrud
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testLinkNodeBug4() throws ParserException
{
createParser("<A HREF=\"/mylink.html\">abcd</A>","http://www.cj.com");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[0];
assertEquals("Link incorrect!!","http://www.cj.com/mylink.html",linkNode.getLink());
}
public void testLinkNodeBug5() throws ParserException
{
String link1 = "http://note.kimo.com.tw/";
String link2 = "http://photo.kimo.com.tw/";
String link3 = "http://address.kimo.com.tw/";
createParser("<a href=" + link1 + ">���O</a> <a \n"+
"href=" + link2 + ">��ï</a> <a\n"+
"href=" + link3 + ">�q�T��</a> ","http://www.cj.com");
parseAndAssertNodeCount(6);
assertTrue("Node should be a LinkTag",node[2] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[2];
assertStringEquals("Link incorrect!!",link2,linkNode.getLink());
assertTrue("Node should be a LinkTag",node[4] instanceof LinkTag);
LinkTag linkNode2 = (LinkTag)node[4];
assertStringEquals("Link incorrect!!",link3,linkNode2.getLink());
}
/**
* This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner.
* Creation date: (7/1/2001 2:42:13 PM)
*/
public void testLinkNodeBugNullPointerException() throws ParserException
{
createParser("<FORM action=http://search.yahoo.com/bin/search name=f><MAP name=m><AREA\n"+
"coords=0,0,52,52 href=\"http://www.yahoo.com/r/c1\" shape=RECT><AREA"+
"coords=53,0,121,52 href=\"http://www.yahoo.com/r/p1\" shape=RECT><AREA"+
"coords=122,0,191,52 href=\"http://www.yahoo.com/r/m1\" shape=RECT><AREA"+
"coords=441,0,510,52 href=\"http://www.yahoo.com/r/wn\" shape=RECT>","http://www.cj.com/");
parser.setNodeFactory (new PrototypicalNodeFactory (new LinkTag ()));
parseAndAssertNodeCount(6);
}
/**
* This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner.
* Creation date: (7/1/2001 2:42:13 PM)
*/
public void testLinkNodeMailtoBug() throws ParserException
{
createParser("<A HREF='mailto:somik@yahoo.com'>hello</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[0];
assertStringEquals("Link incorrect","somik@yahoo.com",linkNode.getLink());
assertEquals("Link Type",new Boolean(true),new Boolean(linkNode.isMailLink()));
}
/**
* This bug occurs when there is a null pointer exception thrown while scanning a tag using LinkScanner.
* Creation date: (7/1/2001 2:42:13 PM)
*/
public void testLinkNodeSingleQuoteBug() throws ParserException
{
createParser("<A HREF='abcd.html'>hello</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkNode = (LinkTag)node[0];
assertEquals("Link incorrect","http://www.cj.com/abcd.html",linkNode.getLink());
}
/**
* The bug being reproduced is this : <BR>
* <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
* vLink=#551a8b>
* The above line is incorrectly parsed in that, the BODY tag is not identified.
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testLinkTag() throws ParserException
{
createParser("<A HREF=\"test.html\">abcd</A>","http://www.google.com/test/index.html");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag LinkTag = (LinkTag)node[0];
assertEquals("The image locn","http://www.google.com/test/test.html",LinkTag.getLink());
}
/**
* The bug being reproduced is this : <BR>
* <BODY aLink=#ff0000 bgColor=#ffffff link=#0000cc onload=setfocus() text=#000000 <BR>
* vLink=#551a8b>
* The above line is incorrectly parsed in that, the BODY tag is not identified.
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testLinkTagBug() throws ParserException
{
createParser("<A HREF=\"../test.html\">abcd</A>","http://www.google.com/test/index.html");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag LinkTag = (LinkTag)node[0];
assertEquals("The image locn","http://www.google.com/test.html",LinkTag.getLink());
}
/**
* The bug being reproduced is this : <BR>
* <A HREF=>Something<A><BR>
* vLink=#551a8b>
* The above line is incorrectly parsed in that, the BODY tag is not identified.
* Creation date: (6/17/2001 4:01:06 PM)
*/
public void testNullTagBug() throws ParserException
{
createParser("<A HREF=>Something</A>","http://www.google.com/test/index.html");
parseAndAssertNodeCount(1);
// The node should be an LinkTag
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("The link location","",linkTag.getLink());
assertEquals("The link text","Something",linkTag.getLinkText());
}
public void testToPlainTextString() throws ParserException {
createParser("<A HREF='mailto:somik@yahoo.com'>hello</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("Link Plain Text","hello",linkTag.toPlainTextString());
}
public void testToHTML() throws ParserException {
String link1 = "<A HREF='mailto:somik@yahoo.com'>hello</A>";
String link2 = "<a \n"+
"href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+
"nical.html\"> Journalism 3.0</a>";
createParser(link1 + "\n"+
"<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font>" +
link2 + " by Rajesh Jain","http://www.cj.com/");
parser.setNodeFactory (new PrototypicalNodeFactory (new LinkTag ()));
parseAndAssertNodeCount(10);
assertTrue("First Node should be a LinkTag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals("Link Raw Text",link1,linkTag.toHtml());
assertTrue("Ninth Node should be a LinkTag",node[8] instanceof LinkTag);
linkTag = (LinkTag)node[8];
assertStringEquals("Link Raw Text",link2,linkTag.toHtml());
}
public void testTypeHttps() throws ParserException
{
LinkTag link;
createParser ("<A HREF='https://www.someurl.com'>Try https.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is a https link",link.isHTTPSLink());
}
public void testTypeFtp() throws ParserException
{
LinkTag link;
createParser ("<A HREF='ftp://www.someurl.com'>Try ftp.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is an ftp link",link.isFTPLink());
}
public void testTypeJavaScript() throws ParserException
{
LinkTag link;
createParser ("<A HREF='javascript://www.someurl.com'>Try javascript.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is a javascript link",link.isJavascriptLink());
}
public void testTypeHttpLink() throws ParserException
{
LinkTag link;
createParser ("<A HREF='http://www.someurl.com'>Try http.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is a http link : "+link.getLink(),link.isHTTPLink());
}
public void testRelativeTypeHttpLink() throws ParserException
{
LinkTag link;
createParser ("<A HREF='somePage.html'>Try relative http.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This relative link is also a http link : "+link.getLink(),link.isHTTPLink());
}
public void testTypeNonHttp() throws ParserException
{
LinkTag link;
createParser ("<A HREF='ftp://www.someurl.com'>Try non-http.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is not a http link : "+link.getLink(),!link.isHTTPLink());
}
public void testTypeHttpLikeLink() throws ParserException
{
LinkTag link;
createParser ("<A HREF='http://'>Try basic http.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is a http link",link.isHTTPLikeLink());
createParser ("<A HREF='https://www.someurl.com'>Try https.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is a https link",link.isHTTPLikeLink());
}
/**
* Test mail link.
* Bug #738504 MailLink != HTTPLink
*/
public void testMailToIsNotAHTTPLink () throws ParserException
{
LinkTag link;
createParser ("<A HREF='mailto:derrickoswald@users.sourceforge.net'>Derrick</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue ("bug #738504 MailLink != HTTPLink", !link.isHTTPLink ());
assertTrue ("bug #738504 MailLink != HTTPSLink", !link.isHTTPSLink ());
}
/**
* Bug #784767 irc://server/channel urls are HTTPLike?
*/
public void testIrcIsNotAHTTPLink () throws ParserException
{
LinkTag link;
createParser ("<A HREF='irc://server/channel'>Try irc.</A>","http://sourceforge.net");
parseAndAssertNodeCount (1);
assertTrue ("Node should be a LinkTag", node[0] instanceof LinkTag);
link = (LinkTag)node[0];
assertTrue("This is not a http link", !link.isHTTPLikeLink ());
}
public void testAccessKey() throws ParserException {
createParser("<a href=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\" accessKey=1>Click Here</A>");
parseAndAssertNodeCount(1);
assertTrue("The node should be a link tag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("Link URL of link tag","http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph",linkTag.getLink());
assertEquals("Link Text of link tag","Click Here",linkTag.getLinkText());
assertEquals("Access key","1",linkTag.getAccessKey());
}
public void testErroneousLinkBug() throws ParserException {
createParser(
"Site Comments?<br>" +
"<a href=\"mailto:sam@neurogrid.com?subject=Site Comments\">" +
"Mail Us" +
"<a>"
);
parseAndAssertNodeCount(4);
// The first node should be a Text
assertTrue("First node should be a Text",node[0] instanceof Text);
Text stringNode = (Text)node[0];
assertEquals("Text of the Text","Site Comments?",stringNode.getText());
assertTrue("Second node should be a tag",node[1] instanceof Tag);
assertTrue("Third node should be a link",node[2] instanceof LinkTag);
// LinkScanner.evaluate() says no HREF means it isn't a link:
assertTrue("Fourth node should be a tag",node[3] instanceof Tag);
}
/**
* Test case based on a report by Raghavender Srimantula, of the parser giving out of memory exceptions. Found to occur
* on the following piece of html
* <pre>
* <a href=s/8741><img src="http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif" height=16 width=16 border=0></img></td><td nowrap>
* <a href=s/7509>
* </pre>
*/
public void testErroneousLinkBugFromYahoo2() throws ParserException {
String link = "<a href=s/8741>" +
"<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" height=16 width=16 border=0>";
createParser(
"<td>" +
link +
"</td>" +
"<td nowrap> \n"+
"<a href=s/7509><b>Yahoo! Movies</b></a>" +
"</td>","http://www.yahoo.com");
NodeList linkNodes = parser.extractAllNodesThatMatch (new NodeClassFilter (LinkTag.class));
assertEquals("number of links", 2, linkNodes.size ());
LinkTag linkTag = (LinkTag)linkNodes.elementAt (0);
assertStringEquals("Link","http://www.yahoo.com/s/8741",linkTag.getLink());
// Verify the link data
assertStringEquals("Link Text","",linkTag.getLinkText());
// Verify the reconstruction html
assertStringEquals("toHTML",link + "</a>",linkTag.toHtml());
}
/**
* Test case based on a report by Raghavender Srimantula, of the parser giving out of memory exceptions. Found to occur
* on the following piece of html
* <pre>
* <a href=s/8741><img src="http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif" height=16 width=16 border=0></img>This is test
* <a href=s/7509>
* </pre>
*/
public void testErroneousLinkBugFromYahoo() throws ParserException {
String link =
"<a href=s/8741>" +
"<img src=\"http://us.i1.yimg.com/us.yimg.com/i/i16/mov_popc.gif\" " +
"height=16 " +
"width=16 " +
"border=0>" +
"This is a test\n";
createParser(
link +
"<a href=s/7509>" +
"<b>Yahoo! Movies</b>" +
"</a>",
"http://www.yahoo.com"
);
parseAndAssertNodeCount(2);
assertTrue("First node should be a LinkTag",node[0] instanceof LinkTag);
assertTrue("Second node should be a LinkTag",node[1] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("Link","http://www.yahoo.com/s/8741",linkTag.getLink());
// Verify the link data
assertEquals("Link Text","This is a test\n",linkTag.getLinkText());
// Verify the reconstruction html
assertStringEquals("toHTML()",link + "</a>",linkTag.toHtml());
}
/**
* This is the reproduction of a bug which produces multiple text copies.
*/
public void testExtractLinkInvertedCommasBug2() throws ParserException
{
createParser("<a href=\"http://cbc.ca/artsCanada/stories/greatnorth271202\" class=\"lgblacku\">Vancouver schools plan 'Great Northern Way'</a>");
parseAndAssertNodeCount(1);
assertTrue("The node should be a link tag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals("Extracted Text","Vancouver schools plan 'Great Northern Way'", linkTag.getLinkText ());
}
/**
* Bug pointed out by Sam Joseph (sam@neurogrid.net)
* Links with spaces in them will get their spaces absorbed
*/
public void testLinkSpacesBug() throws ParserException{
createParser("<a href=\"http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph\">Click Here</A>");
parseAndAssertNodeCount(1);
assertTrue("The node should be a link tag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("Link URL of link tag","http://www.kizna.com/servlets/SomeServlet?name=Sam Joseph",linkTag.getLink());
assertEquals("Link Text of link tag","Click Here",linkTag.getLinkText());
}
/**
* Bug reported by Raj Sharma,5-Apr-2002, upon parsing
* http://www.samachar.com, the entire page could not be picked up.
* The problem was occurring after parsing a particular link
* after which the parsing would not proceed. This link was spread over three lines.
* The bug has been reproduced and fixed.
*/
public void testMultipleLineBug() throws ParserException {
createParser("<LI><font color=\"FF0000\" size=-1><b>Tech Samachar:</b></font><a \n"+
"href=\"http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/tech\n"+
"nical.html\"> Journalism 3.0</a> by Rajesh Jain");
parser.setNodeFactory (new PrototypicalNodeFactory (new LinkTag ()));
parseAndAssertNodeCount(8);
assertTrue("Seventh node should be a link tag",node[6] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[6];
String exp = new String("http://ads.samachar.com/bin/redirect/tech.txt?http://www.samachar.com/technical.html");
//assertEquals("Length of link tag",exp.length(), linkTag.getLink().length());
assertStringEquals("Link URL of link tag",exp,linkTag.getLink());
assertEquals("Link Text of link tag"," Journalism 3.0",linkTag.getLinkText());
assertTrue("Eight node should be a string node",node[7] instanceof Text);
Text stringNode = (Text)node[7];
assertEquals("String node contents"," by Rajesh Jain",stringNode.getText());
}
public void testRelativeLinkScan() throws ParserException {
createParser("<A HREF=\"mytest.html\"> Hello World</A>","http://www.yahoo.com");
parseAndAssertNodeCount(1);
assertTrue("Node identified should be HTMLLinkTag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("Expected Link","http://www.yahoo.com/mytest.html",linkTag.getLink());
}
public void testRelativeLinkScan2() throws ParserException {
createParser("<A HREF=\"abc/def/mytest.html\"> Hello World</A>","http://www.yahoo.com");
parseAndAssertNodeCount(1);
assertTrue("Node identified should be HTMLLinkTag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals("Expected Link","http://www.yahoo.com/abc/def/mytest.html",linkTag.getLink());
}
public void testRelativeLinkScan3() throws ParserException {
createParser("<A HREF=\"../abc/def/mytest.html\"> Hello World</A>","http://www.yahoo.com/ghi");
parseAndAssertNodeCount(1);
assertTrue("Node identified should be HTMLLinkTag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals("Expected Link","http://www.yahoo.com/abc/def/mytest.html",linkTag.getLink());
}
/**
* Test scan with data which is of diff nodes type
*/
public void testScan() throws ParserException
{
createParser("<A HREF=\"mytest.html\"><IMG SRC=\"abcd.jpg\">Hello World</A>","http://www.yahoo.com");
parser.setNodeFactory (
new PrototypicalNodeFactory (
new Tag[] {
new LinkTag (),
new ImageTag (),
}));
parseAndAssertNodeCount(1);
assertTrue("Node should be a link node",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
// Get the link data and cross-check
Node [] dataNode= new Node[10];
int i = 0;
for (SimpleNodeIterator e = linkTag.children();e.hasMoreNodes();)
{
dataNode[i++] = e.nextNode();
}
assertEquals("Number of data nodes",new Integer(2),new Integer(i));
assertTrue("First data node should be an Image Node",dataNode[0] instanceof ImageTag);
assertTrue("Second data node shouls be a String Node",dataNode[1] instanceof Text);
// Check the contents of each data node
ImageTag imageTag = (ImageTag)dataNode[0];
assertEquals("Image URL","http://www.yahoo.com/abcd.jpg",imageTag.getImageURL());
Text stringNode = (Text)dataNode[1];
assertEquals("String Contents","Hello World",stringNode.getText());
}
/**
* A bug in the freshmeat page - really bad html
* tag - <A>Revision<\a>
* Reported by Mazlan Mat
* Note: Actually, this is completely legal HTML - Derrick
*/
public void testFreshMeatBug() throws ParserException
{
String html = "<a>Revision</a>";
createParser(html,"http://www.yahoo.com");
parseAndAssertNodeCount(1);
assertTrue("Node 0 should be a tag",node[0] instanceof Tag);
Tag tag = (Tag)node[0];
assertEquals("Tag Contents",html,tag.toHtml());
assertEquals("Node 0 should have one child", 1, tag.getChildren ().size ());
assertTrue("The child should be a string node", tag.getChildren ().elementAt (0) instanceof Text);
Text stringNode = (Text)tag.getChildren ().elementAt (0);
assertEquals("Text Contents","Revision",stringNode.getText());
}
/**
* Test suggested by Cedric Rosa
* A really bad link tag sends parser into infinite loop
*/
public void testBrokenLink() throws ParserException {
createParser(
"<a href=\"faq.html\">" +
"<br>\n"+
"<img src=\"images/46revues.gif\" " +
"width=\"100\" " +
"height=\"46\" " +
"border=\"0\" " +
"alt=\"Rejoignez revues.org!\" " +
"align=\"middle\">",
"http://www.yahoo.com"
);
parseAndAssertNodeCount(1);
assertTrue("Node 0 should be a link tag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertNotNull(linkTag.toString());
}
public void testLinkDataContents() throws ParserException {
createParser("<a href=\"http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689\" target=\"_new\"><img src=\"http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif\" width=468 height=60 border=\"0\" alt=\"See Signs in Theaters 8-2 - Starring Mel Gibson\" align=><font face=\"verdana,arial,helvetica\" SIZE=\"1\"><b></b></font></a>","http://transfer.go.com");
parser.setNodeFactory (
new PrototypicalNodeFactory (
new Tag[] {
new LinkTag (),
new ImageTag (),
}));
parseAndAssertNodeCount(1);
assertTrue("Node 0 should be a link tag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("Link URL","http://transfer.go.com/cgi/atransfer.pl?goto=http://www.signs.movies.com&name=114332&srvc=nws&context=283&guid=4AD5723D-C802-4310-A388-0B24E1A79689",linkTag.getLink());
assertEquals("Link Text","",linkTag.getLinkText());
Node [] containedNodes = new Node[10];
int i=0;
for (SimpleNodeIterator e = linkTag.children();e.hasMoreNodes();) {
containedNodes[i++] = e.nextNode();
}
assertEquals("There should be 5 contained nodes in the link tag",5,i);
assertTrue("First contained node should be an image tag",containedNodes[0] instanceof ImageTag);
ImageTag imageTag = (ImageTag)containedNodes[0];
assertEquals("Image Location","http://ad.abcnews.com/ad/sponsors/buena_vista_pictures/bvpi-ban0003.gif",imageTag.getImageURL());
assertEquals("Image Height","60",imageTag.getAttribute("HEIGHT"));
assertEquals("Image Width","468",imageTag.getAttribute("WIDTH"));
assertEquals("Image Border","0",imageTag.getAttribute("BORDER"));
assertEquals("Image Alt","See Signs in Theaters 8-2 - Starring Mel Gibson",imageTag.getAttribute("ALT"));
assertTrue("Second contained node should be Tag",containedNodes[1] instanceof Tag);
Tag tag1 = (Tag)containedNodes[1];
assertEquals("Tag Contents","font face=\"verdana,arial,helvetica\" SIZE=\"1\"",tag1.getText());
assertTrue("Third contained node should be Tag",containedNodes[2] instanceof Tag);
Tag tag2 = (Tag)containedNodes[2];
assertEquals("Tag Contents","b",tag2.getText());
assertTrue("Fourth contained node should be a Tag",containedNodes[3] instanceof Tag);
Tag tag = (Tag)containedNodes[3];
assertTrue("Fourth contained node should be an EndTag",tag.isEndTag ());
assertEquals("Fourth Tag contents","/b",tag.getText());
assertTrue("Fifth contained node should be a Tag",containedNodes[4] instanceof Tag);
tag = (Tag)containedNodes[4];
assertTrue("Fifth contained node should be an EndTag",tag.isEndTag ());
assertEquals("Fifth Tag contents","/font",tag.getText());
}
public void testBaseRefLink() throws ParserException {
createParser("<html>\n"+
"<head>\n"+
"<TITLE>test page</TITLE>\n"+
"<BASE HREF=\"http://www.abc.com/\">\n"+
"<a href=\"home.cfm\">Home</a>\n"+
"...\n"+
"</html>","http://transfer.go.com");
parseAndAssertNodeCount(1);
assertTrue("Node 1 should be a HTML tag", node[0] instanceof Html);
Html html = (Html)node[0];
assertTrue("Html tag should have 2 children", 2 == html.getChildCount ());
assertTrue("Html 2nd child should be HEAD tag", html.getChild (1) instanceof HeadTag);
HeadTag head = (HeadTag)html.getChild (1);
assertTrue("Head tag should have 7 children", 7 == head.getChildCount ());
assertTrue("Head 6th child should be a link tag", head.getChild (5) instanceof LinkTag);
LinkTag linkTag = (LinkTag)head.getChild (5);
assertEquals("Resolved Link","http://www.abc.com/home.cfm",linkTag.getLink());
assertEquals("Resolved Link Text","Home",linkTag.getLinkText());
}
/**
* This is a reproduction of bug 617228, reported by
* Stephen J. Harrington. When faced with a link like :
* <A
* HREF="/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://loc
* al
* host/Testing/Report
* 1.html">20020702 Report 1</A>
*
* parser is unable to handle the link correctly due to the greater than
* symbol being confused to be the end of the tag.
*/
public void testQueryLink() throws ParserException {
createParser("<A \n"+
"HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>","http://transfer.go.com");
parseAndAssertNodeCount(1);
assertTrue("Node 1 should be a link tag",node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals("Resolved Link","http://transfer.go.com/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html",linkTag.getLink());
assertEquals("Resolved Link Text","20020702 Report 1",linkTag.getLinkText());
}
public void testNotMailtoLink() throws ParserException {
createParser("<A HREF=\"mailto.html\">not@for.real</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link Plain Text", "not@for.real", linkTag.toPlainTextString());
assertTrue("Link is not a mail link", !linkTag.isMailLink());
}
public void testMailtoLink() throws ParserException {
createParser("<A HREF=\"mailto:this@is.real\">this@is.real</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link Plain Text", "this@is.real", linkTag.toPlainTextString());
assertTrue("Link is a mail link", linkTag.isMailLink());
}
public void testJavascriptLink() throws ParserException {
createParser("<A HREF=\"javascript:alert('hello');\">say hello</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link Plain Text", "say hello", linkTag.toPlainTextString());
assertTrue("Link is a Javascript command", linkTag.isJavascriptLink());
}
public void testNotJavascriptLink() throws ParserException {
createParser("<A HREF=\"javascript_not.html\">say hello</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link Plain Text", "say hello", linkTag.toPlainTextString());
assertTrue("Link is not a Javascript command", !linkTag.isJavascriptLink());
}
public void testFTPLink() throws ParserException {
createParser("<A HREF=\"ftp://some.where.it\">my ftp</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link Plain Text", "my ftp", linkTag.toPlainTextString());
assertTrue("Link is a FTP site", linkTag.isFTPLink());
}
public void testNotFTPLink() throws ParserException {
createParser("<A HREF=\"ftp.html\">my ftp</A>","http://www.cj.com/");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link Plain Text", "my ftp", linkTag.toPlainTextString());
assertTrue("Link is not a FTP site", !linkTag.isFTPLink());
}
public void testRelativeLinkNotHTMLBug() throws ParserException {
createParser("<A HREF=\"newpage.html\">New Page</A>","http://www.mysite.com/books/some.asp");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
assertEquals("Link","http://www.mysite.com/books/newpage.html",linkTag.getLink());
}
public void testBadImageInLinkBug() throws ParserException {
createParser("<a href=\"registration.asp?EventID=1272\"><img border=\"0\" src=\"\\images\\register.gif\"</a>","http://www.fedpage.com/Event.asp?EventID=1272");
parseAndAssertNodeCount(1);
assertTrue("Node should be a HTMLLinkTag", node[0] instanceof LinkTag);
LinkTag linkTag = (LinkTag) node[0];
// Get the image tag from the link
Node insideNodes [] = new Node[10];
int j =0 ;
for (SimpleNodeIterator e = linkTag.children();e.hasMoreNodes();) {
insideNodes[j++]= e.nextNode();
}
assertEquals("Number of contained internal nodes",1,j);
assertTrue(insideNodes[0] instanceof ImageTag);
ImageTag imageTag = (ImageTag)insideNodes[0];
assertEquals("Image Tag Location","http://www.fedpage.com/images\\register.gif",imageTag.getImageURL());
}
/**
* This is an attempt to reproduce bug 677874
* reported by James Moliere. A link tag of the form
* <code>
* <a class=rlbA href=/news/866201.asp?0sl=-
* 32>Shoe bomber handed life sentence</a>
* </code>
* is not parsed correctly. The second '=' sign in the link causes
* the parser to treat it as a seperate attribute
*/
public void testLinkContainsEqualTo() throws Exception {
createParser(
"<a class=rlbA href=/news/866201.asp?0sl=-" +
"32>Shoe bomber handed life sentence</a>"
);
parseAndAssertNodeCount(1);
assertType("node type",LinkTag.class,node[0]);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals(
"link text",
"Shoe bomber handed life sentence",
linkTag.getLinkText()
);
assertStringEquals(
"link url",
"/news/866201.asp?0sl=-32",
linkTag.getLink()
);
}
/**
* Bug report by Cory Seefurth
* @throws Exception
*/
public void _testLinkWithJSP() throws Exception {
createParser(
"<a href=\"<%=Application(\"sURL\")% " +
">/literature/index.htm\">Literature</a>"
);
parseAndAssertNodeCount(1);
assertType("should be link tag",LinkTag.class,node[0]);
LinkTag linkTag = (LinkTag)node[0];
assertStringEquals("expected link","<%=Application(\"sURL\")%>/literature/index.htm",linkTag.getLink());
}
public void testTagSymbolsInLinkText() throws Exception {
createParser(
"<a href=\"/cataclysm/Langy-AnEmpireReborn-Ch2.shtml#story\"" +
"><< An Empire Reborn: Chapter 2 <<</a>"
);
parseAndAssertNodeCount(1);
assertType("node",LinkTag.class, node[0]);
LinkTag linkTag = (LinkTag)node[0];
assertEquals("link text","<< An Empire Reborn: Chapter 2 <<",linkTag.getLinkText());
}
/**
* See bug #813838 links not parsed correctly
*/
public void testPlainText() throws Exception
{
String html = "<a href=Cities/><b>Cities</b></a>";
createParser (html);
parseAndAssertNodeCount (1);
assertType("node", LinkTag.class, node[0]);
LinkTag linkTag = (LinkTag)node[0];
assertEquals ("plain text", "Cities", linkTag.toPlainTextString ());
}
/**
* See bug #982175 False Positives on ® entity
*/
public void testCharacterReferenceInLink() throws Exception
{
String html = "<a href=\"http://www.someplace.com/somepage.html?®ion=us\">Search By Region</a>" +
"<a href=\"http://www.someplace.com/somepage.html?®ion=&destination=184\">Search by Destination</a>";
createParser (html);
parseAndAssertNodeCount (2);
assertType("node", LinkTag.class, node[0]);
LinkTag linkTag = (LinkTag)node[0];
assertEquals ("link", "http://www.someplace.com/somepage.html?®ion=us", linkTag.getLink());
assertType("node", LinkTag.class, node[1]);
linkTag = (LinkTag)node[1];
assertEquals ("link", "http://www.someplace.com/somepage.html?®ion=&destination=184", linkTag.getLink());
}
}