// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Derrick Oswald // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/TagTests.java,v $ // $Author: derrickoswald $ // $Date: 2006/06/04 19:17:21 $ // $Revision: 1.15 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.tests.lexerTests; import org.htmlparser.Node; import org.htmlparser.Parser; import org.htmlparser.PrototypicalNodeFactory; import org.htmlparser.Tag; import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.tags.LinkTag; import org.htmlparser.tags.MetaTag; import org.htmlparser.tests.ParserTestCase; import org.htmlparser.util.ParserException; public class TagTests extends ParserTestCase { static { System.setProperty ("org.htmlparser.tests.lexerTests.TagTests", "TagTests"); } private static final String TEST_HTML = "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.0 Transitional//EN\">" + "<!-- Server: sf-web2 -->\n" + "<html lang=\"en\">\n" + " <head><link rel=\"stylesheet\" type=\"text/css\" href=\"http://sourceforge.net/cssdef.php\">\n" + " <meta http-equiv=\"Content-Type\" content=\"text/html; charset=iso-8859-1\">\n" + " <TITLE>SourceForge.net: Modify: 711073 - HTMLTagParser not threadsafe as a static variable in Tag</TITLE>\n" + " <SCRIPT language=\"JavaScript\" type=\"text/javascript\">\n" + " <!--\n" + " function help_window(helpurl) {\n" + " HelpWin = window.open( 'http://sourceforge.net' + helpurl,'HelpWindow','scrollbars=yes,resizable=yes,toolbar=no,height=400,width=400');\n" + " }\n" + " // -->\n" + " </SCRIPT>\n" + " <link rel=\"SHORTCUT ICON\" href=\"/images/favicon.ico\">\n" + "<!-- This is temp javascript for the jump button. If we could actually have a jump script on the server side that would be ideal -->\n" + "<script language=\"JavaScript\" type=\"text/javascript\">\n" + "<!--\n" + " function jump(targ,selObj,restore){ //v3.0\n" + " if (selObj.options[selObj.selectedIndex].value)\n" + " eval(targ+\".location='\"+selObj.options[selObj.selectedIndex].value+\"'\");\n" + " if (restore) selObj.selectedIndex=0;\n" + " }\n" + " //-->\n" + "</script>\n" + "<a href=\"http://normallink.com/sometext.html\">\n" + "<style type=\"text/css\">\n" + "<!--\n" + "A:link { text-decoration:none }\n" + "A:visited { text-decoration:none }\n" + "A:active { text-decoration:none }\n" + "A:hover { text-decoration:underline; color:#0066FF; }\n" + "-->\n" + "</style>\n" + "</head>\n" + "<body bgcolor=\"#FFFFFF\" text=\"#000000\" leftmargin=\"0\" topmargin=\"0\" marginwidth=\"0\" marginheight=\"0\" link=\"#003399\" vlink=\"#003399\" alink=\"#003399\">\n"; private int testProgress; public TagTests (String name) { super(name); } public void testTagWithQuotes() throws Exception { String testHtml = "<img src=\"http://g-images.amazon.com/images/G/01/merchants/logos/marshall-fields-logo-20.gif\" width=87 height=20 border=0 alt=\"Marshall Field's\">"; createParser(testHtml); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("alt","Marshall Field's",tag.getAttribute("ALT")); assertStringEquals( "html", testHtml, tag.toHtml() ); } public void testEmptyTag() throws Exception { String html = "<custom/>"; createParser(html); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("tag name","CUSTOM",tag.getTagName()); assertTrue("empty tag",tag.isEmptyXmlTag()); assertStringEquals( "html", html, tag.toHtml() ); } public void testTagWithCloseTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a>b\">"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a>b",tag.getAttribute("att")); } public void testTagWithOpenTagSymbolInAttribute() throws ParserException { createParser("<tag att=\"a<b\">"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("attribute","a<b",tag.getAttribute("att")); } public void testTagWithSingleQuote() throws ParserException { String html = "<tag att=\'a<b\'>"; createParser(html); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount(1); assertType("should be Tag",Tag.class,node[0]); Tag tag = (Tag)node[0]; assertStringEquals("html",html,tag.toHtml()); assertStringEquals("attribute","a<b",tag.getAttribute("att")); } /** * The following multi line test cases are from * bug #725749 Parser does not handle < and > in multi-line attributes * submitted by Joe Robins (zorblak) */ public void testMultiLine1 () throws ParserException { String html = "<meta name=\"foo\" content=\"foo<bar>\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar>", attribute2); } public void testMultiLine2 () throws ParserException { String html = "<meta name=\"foo\" content=\"foo<bar\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo<bar", attribute2); } public void testMultiLine3 () throws ParserException { String html = "<meta name=\"foo\" content=\"foobar>\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foobar>", attribute2); } public void testMultiLine4 () throws ParserException { String html = "<meta name=\"foo\" content=\"foo\nbar>\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo\nbar>", attribute2); } /** * Test multiline tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. */ public void testMultiLine5 () throws ParserException { // <meta name="foo" content="<foo> // bar"> String html = "<meta name=\"foo\" content=\"<foo>\nbar\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo>\nbar", attribute2); } /** * Test multiline broken tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. */ public void testMultiLine6 () throws ParserException { // <meta name="foo" content="foo> // bar"> String html = "<meta name=\"foo\" content=\"foo>\nbar\">"; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html, tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","foo>\nbar", attribute2); } /** * Test multiline split tag like attribute. * See feature request #725749 Handle < and > in multi-line attributes. */ public void testMultiLine7 () throws ParserException { // <meta name="foo" content="<foo // bar"> String html = "<meta name=\"foo\" content=\"<foo\nbar\""; createParser(html); parseAndAssertNodeCount (1); assertType ("should be MetaTag", MetaTag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html",html + ">", tag.toHtml ()); String attribute1 = tag.getAttribute ("NAME"); assertStringEquals ("attribute 1","foo", attribute1); String attribute2 = tag.getAttribute ("CONTENT"); assertStringEquals ("attribute 2","<foo\nbar", attribute2); } /** * End of multi line test cases. */ /** * Test multiple threads running against the parser. * See feature request #736144 Handle multi-threaded operation. */ public void testThreadSafety() throws Exception { createParser("<html></html>"); parser.setNodeFactory (new PrototypicalNodeFactory (true)); String testHtml1 = "<a HREF=\"/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html\">20020702 Report 1</A>" + TEST_HTML; String testHtml2 = "<a href=\"http://normallink.com/sometext.html\">" + TEST_HTML; ParsingThread parsingThread [] = new ParsingThread[100]; testProgress = 0; for (int i=0;i<parsingThread.length;i++) { if (i<parsingThread.length/2) parsingThread[i] = new ParsingThread(i,testHtml1,parsingThread.length); else parsingThread[i] = new ParsingThread(i,testHtml2,parsingThread.length); Thread thread = new Thread(parsingThread[i]); thread.start(); } int completionValue = computeCompletionValue(parsingThread.length); do { try { Thread.sleep(500); } catch (InterruptedException e) { } } while (testProgress!=completionValue); for (int i=0;i<parsingThread.length;i++) { if (!parsingThread[i].passed()) { assertNotNull("Thread "+i+" link 1",parsingThread[i].getLink1()); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); if (i<parsingThread.length/2) { assertStringEquals( "Thread "+i+", link 1:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink1().getLink() ); assertStringEquals( "Thread "+i+", link 2:", "http://normallink.com/sometext.html", parsingThread[i].getLink2().getLink() ); } else { assertStringEquals( "Thread "+i+", link 1:", "http://normallink.com/sometext.html", parsingThread[i].getLink1().getLink() ); assertNotNull("Thread "+i+" link 2",parsingThread[i].getLink2()); assertStringEquals( "Thread "+i+", link 2:", "/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html", parsingThread[i].getLink2().getLink() ); } } } } private int computeCompletionValue(int numThreads) { return numThreads * (numThreads - 1) / 2; } class ParsingThread implements Runnable { Parser mParser; int mId; LinkTag mLink1; LinkTag mLink2; boolean mResult; int mMax; ParsingThread(int id, String testHtml, int max) { mId = id; mMax = max; mParser = Parser.createParser(testHtml, null); } public void run() { try { mResult = false; Node linkTag [] = mParser.extractAllNodesThatMatch (new NodeClassFilter (LinkTag.class)).toNodeArray (); mLink1 = (LinkTag)linkTag[0]; mLink2 = (LinkTag)linkTag[1]; if (mId < mMax / 2) { if (mLink1.getLink().equals("/cgi-bin/view_search?query_text=postdate>20020701&txt_clr=White&bg_clr=Red&url=http://localhost/Testing/Report1.html") && mLink2.getLink().equals("http://normallink.com/sometext.html")) mResult = true; } else { if (mLink1.getLink().equals("http://normallink.com/sometext.html") && mLink2.getLink().equals("http://normallink.com/sometext.html")) mResult = true; } } catch (ParserException e) { System.err.println("Parser Exception"); e.printStackTrace(); } finally { testProgress += mId; } } public LinkTag getLink1() { return (mLink1); } public LinkTag getLink2() { return (mLink2); } public boolean passed() { return (mResult); } } /** * Test the toHTML method for a standalone attribute. */ public void testStandAloneToHTML () throws ParserException { String html = "<input disabled>"; createParser(html); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html", html, tag.toHtml ()); } /** * Test the toHTML method for a missing value attribute. */ public void testMissingValueToHTML () throws ParserException { String html = "<input disabled=>"; createParser(html); parser.setNodeFactory (new PrototypicalNodeFactory (true)); parseAndAssertNodeCount (1); assertType ("should be Tag", Tag.class, node[0]); Tag tag = (Tag)node[0]; assertStringEquals ("html", html, tag.toHtml ()); } }