/*********************************************************************** * * $CVSHeader$ * * This file is part of WebScarab, an Open Web Application Security * Project utility. For details, please see http://www.owasp.org/ * * Copyright (c) 2002 - 2004 Rogan Dawes * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. * * Getting Source * ============== * * Source for this application is maintained at Sourceforge.net, a * repository for free software projects. * * For details, please see http://www.sourceforge.net/projects/owasp * */ /* * Created on Jul 13, 2004 * */ package org.owasp.webscarab.model; import java.util.ArrayList; import java.net.MalformedURLException; /** * Represents a http or https url * @author knoppix */ public class HttpUrl implements Comparable<HttpUrl> { private static final HttpUrl[] nullPath = new HttpUrl[0]; private String _scheme; private String _host; private int _port; private String _path; private String _fragment = null; private String _query = null; private int _hashcode; /** * creates an HttpUrl by parsing the supplied string * @param url a String representation of the URL * @throws MalformedURLException if the url is not well-formed */ public HttpUrl(String url) throws MalformedURLException { if (url.indexOf('\n') > -1 || url.indexOf(' ') > -1) throw new MalformedURLException("Illegal characters in url: " + url); parseUrl(url); _hashcode = this.toString().hashCode(); } /** * Creates a new url, basing the supplied relative path against the supplied HttpUrl * @param url the existing base url * @param relative the relative path * @throws MalformedURLException if the url is malformed */ public HttpUrl(HttpUrl url, String relative) throws MalformedURLException { if (relative.indexOf('\n') > -1 || relative.indexOf(' ') > -1) throw new MalformedURLException("Illegal characters in relative : " + relative); // relative could be a fully qualified URL if (url == null || relative.startsWith("http://") || relative.startsWith("https://")) { parseUrl(relative); _hashcode = this.toString().hashCode(); return; } _scheme = url.getScheme(); _host = url.getHost(); _port = url.getPort(); if (relative.startsWith("/")) { // an absolute path _path = relative; } else { _path = relativePath(url.getPath(), relative); } splitFragQuery(); _path = _path.replaceAll(" ", "%20"); if (_query != null) _query = _query.replace(' ', '+'); if (_fragment != null) _fragment = _fragment.replaceAll(" ", "%20"); _hashcode = this.toString().hashCode(); } private void parseUrl(String url) throws MalformedURLException { int pos = url.indexOf("://"); if (pos == -1) throw new MalformedURLException("An URL must have a scheme!"); _scheme = url.substring(0, pos).toLowerCase(); if (!_scheme.equals("http") && !_scheme.equals("https")) throw new MalformedURLException("This class only supports HTTP or HTTPS schemes: '"+_scheme+"'"); int prev = pos + 3; pos = url.indexOf("/", prev); if (pos == -1) pos = url.length(); String hp = url.substring(prev, pos); int colon = hp.indexOf(":"); if (colon == -1) { _host = hp; if (_scheme.equals("http")) { _port = 80; } else if (_scheme.equals("https")) { _port = 443; } } else { try { _host = hp.substring(0, colon); _port = Integer.parseInt(hp.substring(colon + 1)); } catch (NumberFormatException nfe) { throw new MalformedURLException("Error parsing the port number: " + nfe); } } if ("".equals(_host)) throw new MalformedURLException("Host cannot be empty"); if (_port < 1 || _port > 65535) throw new MalformedURLException("Port out of range: " + _port); if (pos == url.length()) { _path = "/"; } else { _path = url.substring(pos); splitFragQuery(); } } private String relativePath(String oldPath, String relative) { if (!oldPath.endsWith("/")) { // trim the file part oldPath = parentPath(oldPath); } while (relative.startsWith("../") || relative.startsWith("./")) { if (relative.startsWith("./")) { // trim meaningless self-ref relative = relative.substring(2); } else { relative = relative.substring(3); if (oldPath.length()>1) { oldPath = parentPath(oldPath); } } } return oldPath + relative; } private void splitFragQuery() { // Anchors are meaningless to us in this context int hash = _path.indexOf("#"); if (hash > -1) _path = _path.substring(0, hash); int ques = _path.indexOf("?"); if (ques > -1) { _query = _path.substring(ques + 1); _path = _path.substring(0, ques); } int semi = _path.indexOf(";"); if (semi > -1) { _fragment = _path.substring(semi + 1); _path = _path.substring(0, semi); } } /** * returns the schem of the url * @return Returns the scheme. */ public String getScheme() { return _scheme; } /** * returns the host part of the url * @return Returns the host. */ public String getHost() { return _host; } /** * returns the port * @return Returns the port. */ public int getPort() { return _port; } /** * returns the "file path" of the URL, excluding any fragments or queries * @return Returns the path. */ public String getPath() { return _path; } /** * returns the fragment part of the url, or null if none exists * @return Returns the fragment. */ public String getFragment() { return _fragment; } /** * returns the query part of the url, or null if none exists * @return Returns the query. */ public String getQuery() { return _query; } /** * returns a string representation of the url, excluding any fragments * or query parts * @return the string representation of the URL, excluding any fragments or query */ public String getSHPP() { StringBuffer buff = new StringBuffer(); buff.append(_scheme).append("://"); buff.append(_host).append(":").append(_port); buff.append(_path); return buff.toString(); } /** * returns a string representation of the parameters passed to the url * @return the string representation of the parameters */ public String getParameters() { if (_fragment == null && _query == null) return null; StringBuffer buff = new StringBuffer(); if (_fragment != null) buff.append(";").append(_fragment); if (_query != null) buff.append("?").append(_query); return buff.toString(); } private String parentPath(String path) { int secondlast = path.lastIndexOf("/",path.length()-2); return path.substring(0,secondlast+1); } /** * returns the parent of this Url. * @return the parent of this Url, or null if this is a top-level Url */ public HttpUrl getParentUrl() { if (_scheme.equals("")) throw new NullPointerException("Should not be trying to get the parent of NULL URL"); try { if (_fragment != null || _query != null) { return new HttpUrl(getSHPP()); } else if (_path != null && _path.length() > 1) { String url = getSHPP(); int secondLast = url.lastIndexOf("/",url.length()-2); return new HttpUrl(url.substring(0, secondLast+1)); } else { return null; } } catch (MalformedURLException mue) { System.err.println("Malformed URL calculating parent path of " + toString()); return null; } } /** * returns an array containing the Url hierarchy, including this Url * @return an array of the Url hierarchy */ public HttpUrl[] getUrlHierarchy() { ArrayList<HttpUrl> list = new ArrayList<HttpUrl>(); list.add(this); HttpUrl url = getParentUrl(); while (url != null) { list.add(0, url); url = url.getParentUrl(); } return list.toArray(nullPath); } /** * returns a string representation of the URL, in fully qualified form * @return the fully qualifed url */ public String toString() { if (_scheme.equals("")) return "NULL URL"; StringBuffer buff = new StringBuffer(); buff.append(_scheme).append("://"); buff.append(_host).append(":").append(_port); return direct(buff).toString(); } /** * appends the /path;fragment?query part of the URL to the supplied buffer * @param buff a StrinBuffer to append the URL to * @return the buffer */ public StringBuffer direct(StringBuffer buff) { buff.append(_path); if (_fragment != null) buff.append(";").append(_fragment); if (_query != null) buff.append("?").append(_query); return buff; } /** * returns only the /path;fragment?query part of the URL * @return the /path;fragment?query part of the URL */ public String direct() { return direct(new StringBuffer()).toString(); } /** * Indicates whether some other object is "equal to" this one. * <p> * The <code>equals</code> method implements an equivalence relation * on non-null object references: * <ul> * <li>It is <i>reflexive</i>: for any non-null reference value * <code>x</code>, <code>x.equals(x)</code> should return * <code>true</code>. * <li>It is <i>symmetric</i>: for any non-null reference values * <code>x</code> and <code>y</code>, <code>x.equals(y)</code> * should return <code>true</code> if and only if * <code>y.equals(x)</code> returns <code>true</code>. * <li>It is <i>transitive</i>: for any non-null reference values * <code>x</code>, <code>y</code>, and <code>z</code>, if * <code>x.equals(y)</code> returns <code>true</code> and * <code>y.equals(z)</code> returns <code>true</code>, then * <code>x.equals(z)</code> should return <code>true</code>. * <li>It is <i>consistent</i>: for any non-null reference values * <code>x</code> and <code>y</code>, multiple invocations of * <tt>x.equals(y)</tt> consistently return <code>true</code> * or consistently return <code>false</code>, provided no * information used in <code>equals</code> comparisons on the * objects is modified. * <li>For any non-null reference value <code>x</code>, * <code>x.equals(null)</code> should return <code>false</code>. * </ul> * <p> * The <tt>equals</tt> method for class <code>Object</code> implements * the most discriminating possible equivalence relation on objects; * that is, for any non-null reference values <code>x</code> and * <code>y</code>, this method returns <code>true</code> if and only * if <code>x</code> and <code>y</code> refer to the same object * (<code>x == y</code> has the value <code>true</code>). * <p> * Note that it is generally necessary to override the <tt>hashCode</tt> * method whenever this method is overridden, so as to maintain the * general contract for the <tt>hashCode</tt> method, which states * that equal objects must have equal hash codes. * * @param o the reference object with which to compare. * @return <code>true</code> if this object is the same as the obj * argument; <code>false</code> otherwise. * @see #hashCode() * @see java.util.Hashtable */ public boolean equals(Object o) { if (! (o instanceof HttpUrl)) return false; if (_hashcode != o.hashCode()) return false; return compareTo((HttpUrl) o) == 0; } /** * Compares this object with the specified object for order. Returns a * negative integer, zero, or a positive integer as this object is less * than, equal to, or greater than the specified object.<p> * * In the foregoing description, the notation * <tt>sgn(</tt><i>expression</i><tt>)</tt> designates the mathematical * <i>signum</i> function, which is defined to return one of <tt>-1</tt>, * <tt>0</tt>, or <tt>1</tt> according to whether the value of <i>expression</i> * is negative, zero or positive. * * The implementor must ensure <tt>sgn(x.compareTo(y)) == * -sgn(y.compareTo(x))</tt> for all <tt>x</tt> and <tt>y</tt>. (This * implies that <tt>x.compareTo(y)</tt> must throw an exception iff * <tt>y.compareTo(x)</tt> throws an exception.)<p> * * The implementor must also ensure that the relation is transitive: * <tt>(x.compareTo(y)>0 && y.compareTo(z)>0)</tt> implies * <tt>x.compareTo(z)>0</tt>.<p> * * Finally, the implementer must ensure that <tt>x.compareTo(y)==0</tt> * implies that <tt>sgn(x.compareTo(z)) == sgn(y.compareTo(z))</tt>, for * all <tt>z</tt>.<p> * * It is strongly recommended, but <i>not</i> strictly required that * <tt>(x.compareTo(y)==0) == (x.equals(y))</tt>. Generally speaking, any * class that implements the <tt>Comparable</tt> interface and violates * this condition should clearly indicate this fact. The recommended * language is "Note: this class has a natural ordering that is * inconsistent with equals." * @param o the Object to be compared. * @return a negative integer, zero, or a positive integer as this object * is less than, equal to, or greater than the specified object. */ public int compareTo(HttpUrl url) { if (url == null) return 1; int result; result = _scheme.compareTo(url.getScheme()); if (result != 0) return result; result = _host.compareTo(url.getHost()); if (result != 0) return result; result = _port - url.getPort(); if (result != 0) return result; result = _path.compareTo(url.getPath()); if (result != 0) return result; if (_fragment == null) { if (url.getFragment() == null) { result = 0; } else { result = -1; } } else { if (url.getFragment() == null) { result = 1; } else { result = _fragment.compareTo(url.getFragment()); } } if (result != 0) return result; if (_query == null) { if (url.getQuery() == null) { result = 0; } else { result = -1; } } else { if (url.getQuery() == null) { result = 1; } else { result = _query.compareTo(url.getQuery()); } } return result; } /** * Returns a hash code value for the object. This method is * supported for the benefit of hashtables such as those provided by * <code>java.util.Hashtable</code>. * <p> * The general contract of <code>hashCode</code> is: * <ul> * <li>Whenever it is invoked on the same object more than once during * an execution of a Java application, the <tt>hashCode</tt> method * must consistently return the same integer, provided no information * used in <tt>equals</tt> comparisons on the object is modified. * This integer need not remain consistent from one execution of an * application to another execution of the same application. * <li>If two objects are equal according to the <tt>equals(Object)</tt> * method, then calling the <code>hashCode</code> method on each of * the two objects must produce the same integer result. * <li>It is <em>not</em> required that if two objects are unequal * according to the {@link java.lang.Object#equals(java.lang.Object)} * method, then calling the <tt>hashCode</tt> method on each of the * two objects must produce distinct integer results. However, the * programmer should be aware that producing distinct integer results * for unequal objects may improve the performance of hashtables. * </ul> * <p> * As much as is reasonably practical, the hashCode method defined by * class <tt>HttpUrl</tt> does return distinct integers for distinct * objects. (This is implemented by converting the url to a String, and * returning the String's hashCode() ) * * @return a hash code value for this object. * @see java.lang.Object#equals(java.lang.Object) * @see java.util.Hashtable */ public int hashCode() { return _hashcode; } }