// HTMLParser Library $Name: v1_6 $ - A java-based parser for HTML
// http://sourceforge.org/projects/htmlparser
// Copyright (C) 2004 Derrick Oswald
//
// Revision Control Information
//
// $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/tests/lexerTests/PageTests.java,v $
// $Author: derrickoswald $
// $Date: 2006/04/07 00:58:19 $
// $Revision: 1.20 $
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
package org.htmlparser.tests.lexerTests;
import java.io.IOException;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.lexer.Page;
import org.htmlparser.tests.ParserTestCase;
import org.htmlparser.util.ParserException;
public class PageTests extends ParserTestCase
{
static
{
System.setProperty ("org.htmlparser.tests.lexerTests.PageTests", "PageTests");
}
/**
* The default charset.
* This should be <code>ISO-8859-1</code>,
* see RFC 2616 (http://www.ietf.org/rfc/rfc2616.txt?number=2616) section 3.7.1
* Another alias is "8859_1".
*/
public static final String DEFAULT_CHARSET = "ISO-8859-1";
/**
* Base URI for absolute URL tests.
*/
static final String BASEURI = "http://a/b/c/d;p?q";
/**
* Page for absolute URL tests.
*/
public static Page mPage;
static
{
mPage = new Page ();
mPage.setBaseUrl (BASEURI);
}
/**
* Test the third level page class.
*/
public PageTests (String name)
{
super (name);
}
/**
* Test initialization with a null value.
*/
public void testNull () throws ParserException
{
try
{
new Page ((URLConnection)null);
assertTrue ("null value in constructor", false);
}
catch (IllegalArgumentException iae)
{
// expected outcome
}
try
{
new Page ((String)null);
assertTrue ("null value in constructor", false);
}
catch (IllegalArgumentException iae)
{
// expected outcome
}
}
/**
* Test initialization with a real value.
*/
public void testURLConnection () throws ParserException, IOException
{
String link;
URL url;
link = "http://www.ibm.com/jp/";
url = new URL (link);
new Page (url.openConnection ());
}
/**
* Test initialization with non-existant URL.
*/
public void testBadURLConnection () throws IOException
{
String link;
URL url;
link = "http://www.bigbogosity.org/";
url = new URL (link);
try
{
new Page (url.openConnection ());
}
catch (ParserException pe)
{
// expected response
}
}
//
// Tests from Appendix C Examples of Resolving Relative URI References
// RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax
// T. Berners-Lee et al.
// http://www.ietf.org/rfc/rfc2396.txt
// Within an object with a well-defined base URI of
// http://a/b/c/d;p?q
// the relative URI would be resolved as follows:
// C.1. Normal Examples
// g:h = g:h
// g = http://a/b/c/g
// ./g = http://a/b/c/g
// g/ = http://a/b/c/g/
// /g = http://a/g
// //g = http://g
// ?y = http://a/b/c/?y
// g?y = http://a/b/c/g?y
// #s = (current document)#s
// g#s = http://a/b/c/g#s
// g?y#s = http://a/b/c/g?y#s
// ;x = http://a/b/c/;x
// g;x = http://a/b/c/g;x
// g;x?y#s = http://a/b/c/g;x?y#s
// . = http://a/b/c/
// ./ = http://a/b/c/
// .. = http://a/b/
// ../ = http://a/b/
// ../g = http://a/b/g
// ../.. = http://a/
// ../../ = http://a/
// ../../g = http://a/g
public void test1 () throws ParserException
{
assertEquals ("test1 failed", "https:h", mPage.getAbsoluteURL ("https:h"));
}
public void test2 () throws ParserException
{
assertEquals ("test2 failed", "http://a/b/c/g", mPage.getAbsoluteURL ("g"));
}
public void test3 () throws ParserException
{
assertEquals ("test3 failed", "http://a/b/c/g", mPage.getAbsoluteURL ("./g"));
}
public void test4 () throws ParserException
{
assertEquals ("test4 failed", "http://a/b/c/g/", mPage.getAbsoluteURL ("g/"));
}
public void test5 () throws ParserException
{
assertEquals ("test5 failed", "http://a/g", mPage.getAbsoluteURL ("/g"));
}
public void test6 () throws ParserException
{
assertEquals ("test6 failed", "http://g", mPage.getAbsoluteURL ("//g"));
}
public void test7 () throws ParserException
{
assertEquals ("test7 strict failed", "http://a/b/c/?y", mPage.getAbsoluteURL ("?y", true));
assertEquals ("test7 non-strict failed", "http://a/b/c/d;p?y", mPage.getAbsoluteURL ("?y"));
}
public void test8 () throws ParserException
{
assertEquals ("test8 failed", "http://a/b/c/g?y", mPage.getAbsoluteURL ("g?y"));
}
public void test9 () throws ParserException
{
assertEquals ("test9 failed", "https:h", mPage.getAbsoluteURL ("https:h"));
}
public void test10 () throws ParserException
{
assertEquals ("test10 failed", "https:h", mPage.getAbsoluteURL ("https:h"));
}
// #s = (current document)#s
public void test11 () throws ParserException
{
assertEquals ("test11 failed", "http://a/b/c/g#s", mPage.getAbsoluteURL ("g#s"));
}
public void test12 () throws ParserException
{
assertEquals ("test12 failed", "http://a/b/c/g?y#s", mPage.getAbsoluteURL ("g?y#s"));
}
public void test13 () throws ParserException
{
assertEquals ("test13 failed", "http://a/b/c/;x", mPage.getAbsoluteURL (";x"));
}
public void test14 () throws ParserException
{
assertEquals ("test14 failed", "http://a/b/c/g;x", mPage.getAbsoluteURL ("g;x"));
}
public void test15 () throws ParserException
{
assertEquals ("test15 failed", "http://a/b/c/g;x?y#s", mPage.getAbsoluteURL ("g;x?y#s"));
}
public void test16 () throws ParserException
{
assertEquals ("test16 failed", "http://a/b/c/", mPage.getAbsoluteURL ("."));
}
public void test17 () throws ParserException
{
assertEquals ("test17 failed", "http://a/b/c/", mPage.getAbsoluteURL ("./"));
}
public void test18 () throws ParserException
{
assertEquals ("test18 failed", "http://a/b/", mPage.getAbsoluteURL (".."));
}
public void test19 () throws ParserException
{
assertEquals ("test19 failed", "http://a/b/", mPage.getAbsoluteURL ("../"));
}
public void test20 () throws ParserException
{
assertEquals ("test20 failed", "http://a/b/g", mPage.getAbsoluteURL ("../g"));
}
public void test21 () throws ParserException
{
assertEquals ("test21 failed", "http://a/", mPage.getAbsoluteURL ("../.."));
}
public void test22 () throws ParserException
{
assertEquals ("test22 failed", "http://a/g", mPage.getAbsoluteURL ("../../g"));
}
// C.2. Abnormal Examples
// Although the following abnormal examples are unlikely to occur in
// normal practice, all URI parsers should be capable of resolving them
// consistently. Each example uses the same base as above.
//
// An empty reference refers to the start of the current document.
//
// <> = (current document)
//
// Parsers must be careful in handling the case where there are more
// relative path ".." segments than there are hierarchical levels in the
// base URI's path. Note that the ".." syntax cannot be used to change
// the authority component of a URI.
//
// ../../../g = http://a/../g
// ../../../../g = http://a/../../g
//
// In practice, some implementations strip leading relative symbolic
// elements (".", "..") after applying a relative URI calculation, based
// on the theory that compensating for obvious author errors is better
// than allowing the request to fail. Thus, the above two references
// will be interpreted as "http://a/g" by some implementations.
//
// Similarly, parsers must avoid treating "." and ".." as special when
// they are not complete components of a relative path.
//
// /./g = http://a/./g
// /../g = http://a/../g
// g. = http://a/b/c/g.
// .g = http://a/b/c/.g
// g.. = http://a/b/c/g..
// ..g = http://a/b/c/..g
//
// Less likely are cases where the relative URI uses unnecessary or
// nonsensical forms of the "." and ".." complete path segments.
//
// ./../g = http://a/b/g
// ./g/. = http://a/b/c/g/
// g/./h = http://a/b/c/g/h
// g/../h = http://a/b/c/h
// g;x=1/./y = http://a/b/c/g;x=1/y
// g;x=1/../y = http://a/b/c/y
//
// All client applications remove the query component from the base URI
// before resolving relative URI. However, some applications fail to
// separate the reference's query and/or fragment components from a
// relative path before merging it with the base path. This error is
// rarely noticed, since typical usage of a fragment never includes the
// hierarchy ("/") character, and the query component is not normally
// used within relative references.
//
// g?y/./x = http://a/b/c/g?y/./x
// g?y/../x = http://a/b/c/g?y/../x
// g#s/./x = http://a/b/c/g#s/./x
// g#s/../x = http://a/b/c/g#s/../x
//
// Some parsers allow the scheme name to be present in a relative URI if
// it is the same as the base URI scheme. This is considered to be a
// loophole in prior specifications of partial URI [RFC1630]. Its use
// should be avoided.
//
// http:g = http:g ; for validating parsers
// | http://a/b/c/g ; for backwards compatibility
// public void test23 () throws HTMLParserException
// {
// assertEquals ("test23 failed", "http://a/../g", mPage.getAbsoluteURL ("../../../g"));
// }
// public void test24 () throws HTMLParserException
// {
// assertEquals ("test24 failed", "http://a/../../g", mPage.getAbsoluteURL ("../../../../g"));
// }
public void test23 () throws ParserException
{
assertEquals ("test23 failed", "http://a/g", mPage.getAbsoluteURL ("../../../g"));
}
public void test24 () throws ParserException
{
assertEquals ("test24 failed", "http://a/g", mPage.getAbsoluteURL ("../../../../g"));
}
public void test25 () throws ParserException
{
assertEquals ("test25 failed", "http://a/./g", mPage.getAbsoluteURL ("/./g"));
}
public void test26 () throws ParserException
{
assertEquals ("test26 failed", "http://a/../g", mPage.getAbsoluteURL ("/../g"));
}
public void test27 () throws ParserException
{
assertEquals ("test27 failed", "http://a/b/c/g.", mPage.getAbsoluteURL ("g."));
}
public void test28 () throws ParserException
{
assertEquals ("test28 failed", "http://a/b/c/.g", mPage.getAbsoluteURL (".g"));
}
public void test29 () throws ParserException
{
assertEquals ("test29 failed", "http://a/b/c/g..", mPage.getAbsoluteURL ("g.."));
}
public void test30 () throws ParserException
{
assertEquals ("test30 failed", "http://a/b/c/..g", mPage.getAbsoluteURL ("..g"));
}
public void test31 () throws ParserException
{
assertEquals ("test31 failed", "http://a/b/g", mPage.getAbsoluteURL ("./../g"));
}
public void test32 () throws ParserException
{
assertEquals ("test32 failed", "http://a/b/c/g/", mPage.getAbsoluteURL ("./g/."));
}
public void test33 () throws ParserException
{
assertEquals ("test33 failed", "http://a/b/c/g/h", mPage.getAbsoluteURL ("g/./h"));
}
public void test34 () throws ParserException
{
assertEquals ("test34 failed", "http://a/b/c/h", mPage.getAbsoluteURL ("g/../h"));
}
public void test35 () throws ParserException
{
assertEquals ("test35 failed", "http://a/b/c/g;x=1/y", mPage.getAbsoluteURL ("g;x=1/./y"));
}
public void test36 () throws ParserException
{
assertEquals ("test36 failed", "http://a/b/c/y", mPage.getAbsoluteURL ("g;x=1/../y"));
}
public void test37 () throws ParserException
{
assertEquals ("test37 failed", "http://a/b/c/g?y/./x", mPage.getAbsoluteURL ("g?y/./x"));
}
public void test38 () throws ParserException
{
assertEquals ("test38 failed", "http://a/b/c/g?y/../x", mPage.getAbsoluteURL ("g?y/../x"));
}
public void test39 () throws ParserException
{
assertEquals ("test39 failed", "http://a/b/c/g#s/./x", mPage.getAbsoluteURL ("g#s/./x"));
}
public void test40 () throws ParserException
{
assertEquals ("test40 failed", "http://a/b/c/g#s/../x", mPage.getAbsoluteURL ("g#s/../x"));
}
// public void test41 () throws HTMLParserException
// {
// assertEquals ("test41 failed", "http:g", mPage.getAbsoluteURL ("http:g"));
// }
public void test41 () throws ParserException
{
assertEquals ("test41 failed", "http://a/b/c/g", mPage.getAbsoluteURL ("http:g"));
}
}