/*
*
*
* Copyright 1990-2009 Sun Microsystems, Inc. All Rights Reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License version
* 2 only, as published by the Free Software Foundation.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License version 2 for more details (a copy is
* included at /legal/license.txt).
*
* You should have received a copy of the GNU General Public License
* version 2 along with this work; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA
*
* Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa
* Clara, CA 95054 or visit www.sun.com if you need additional
* information or have any questions.
*/
package com.sun.midp.io;
import java.io.IOException;
/**
* A parsed HTTP (or subclass of) URL. Based on RFC 2396.
* <p>
* Handles IPv6 hosts, check host[0] for a "[".
* Can be used for relative URL's that do not have authorities.
* Can be used for FTP URL's that do not have the username and passwords.
* <p>
* Any elements not specified are represented by null, except a
* non-specified port, which is represented by a -1.
*/
public class HttpUrl {
/** Scheme of the URL or null. */
public String scheme;
/** Authority (host [port]) of the URL. */
public String authority;
/** Path of the URL or null. */
public String path;
/** Query of the URL or null. */
public String query;
/** Fragment of the URL or null. */
public String fragment;
/** hHst of the authority or null. */
public String host;
/** Port of the authority or -1 for not specified. */
public int port = -1;
/** Machine of the host or null. */
public String machine;
/** Domain of the host or null. */
public String domain;
/**
* Construct a HttpUrl.
*
* @param url HTTP URL to parse
*
* @exception IllegalArgumentException if there is a space in the URL or
* the port is not numeric
*/
public HttpUrl(String url) {
int afterScheme = 0;
int length;
int endOfScheme;
if (url == null) {
return;
}
length = url.length();
if (length == 0) {
return;
}
// ":" can mark a the scheme in a absolute URL which has a "//".
endOfScheme = url.indexOf(':');
if (endOfScheme != -1) {
if (endOfScheme == length - 1) {
// just a scheme
scheme = url.substring(0, endOfScheme);
return;
}
if (endOfScheme < length - 2 &&
url.charAt(endOfScheme + 1) == '/' &&
url.charAt(endOfScheme + 2) == '/') {
// found "://", get the scheme
scheme = url.substring(0, endOfScheme);
afterScheme = endOfScheme + 1;
}
}
parseAfterScheme(url, afterScheme, length);
}
/**
* Construct a HttpUrl from a scheme and partial HTTP URL.
*
* @param theScheme the protocol component of an HTTP URL
* @param partialUrl HTTP URL to parse
*
* @exception IllegalArgumentException if there is a space in the URL or
* the port is not numeric
*/
public HttpUrl(String theScheme, String partialUrl) {
int length;
scheme = theScheme;
if (partialUrl == null) {
return;
}
length = partialUrl.length();
if (length == 0) {
return;
}
parseAfterScheme(partialUrl, 0, length);
}
/**
* Parse the part of the HTTP URL after the scheme.
*
* @param url the part of the HTTP URL after the ":" of the scheme
* @param afterScheme index of the first char after the scheme
* @param length length of the url
*
* @exception IllegalArgumentException if there is a space in the URL or
* the port is not numeric
*/
private void parseAfterScheme(String url, int afterScheme, int length) {
int start;
int startOfAuthority;
int endOfUrl;
int endOfAuthority;
int endOfPath;
int endOfQuery;
int endOfHost;
int startOfPort;
int endOfPort;
int lastDot;
int startOfDomain;
if (url.indexOf(' ') != -1 || url.indexOf('\r') != -1 ||
url.indexOf('\n') != -1 || url.indexOf('\u0007') != -1) {
throw new IllegalArgumentException("Space character in URL");
}
endOfUrl = length;
endOfAuthority = endOfUrl;
endOfPath = endOfUrl;
endOfQuery = endOfUrl;
if (url.startsWith("//", afterScheme)) {
// do not include the "//"
startOfAuthority = afterScheme + 2;
} else {
// no authority, the path starts at 0 and may not begin with a "/"
startOfAuthority = afterScheme;
}
/*
* Since all of the elements after the authority are optional
* and they can contain the delimiter of the element before it.
* Work backwards since we know the end of the last item and will
* know the end of the next item when find the start of the current
* item.
*/
start = url.indexOf('#', startOfAuthority);
if (start != -1) {
endOfAuthority = start;
endOfPath = start;
endOfQuery = start;
// do not include the "#"
start++;
// do not parse an empty fragment
if (start < endOfUrl) {
fragment = url.substring(start, endOfUrl);
}
}
start = url.indexOf('?', startOfAuthority);
if (start != -1 && start < endOfQuery) {
endOfAuthority = start;
endOfPath = start;
// do not include the "?"
start++;
// do not parse an empty query
if (start < endOfQuery) {
query = url.substring(start, endOfQuery);
}
}
if (startOfAuthority == afterScheme) {
// no authority, the path starts after scheme
start = afterScheme;
} else {
// this is not relative URL so the path must begin with "/"
// can be [IPv6/60].../
int posSqBr = url.indexOf(']', startOfAuthority);
if (posSqBr > -1) {
start = url.indexOf('/', posSqBr);
} else {
start = url.indexOf('/', startOfAuthority);
}
}
// do not parse an empty path
if (start != -1 && start < endOfPath) {
endOfAuthority = start;
path = url.substring(start, endOfPath);
}
if (startOfAuthority >= endOfAuthority) {
return;
}
authority = url.substring(startOfAuthority, endOfAuthority);
endOfPort = authority.length();
// get the port first, to find the end of the host
// IPv6 address have brackets around them and can have ":"'s
start = authority.indexOf(']');
if (start == -1) {
startOfPort = authority.indexOf(':');
} else {
startOfPort = authority.indexOf(':', start);
}
if (startOfPort != -1) {
endOfHost = startOfPort;
// do not include the ":"
startOfPort++;
// do not try parse an empty port
if (startOfPort < endOfPort) {
try {
port = Integer.parseInt(authority.substring(
startOfPort,
endOfPort));
if (port < 0) {
throw new
IllegalArgumentException("invalid port format");
}
if (port == 0 || port > 0xFFFF) {
throw new IllegalArgumentException(
"port out of legal range");
}
} catch (NumberFormatException nfe) {
throw new IllegalArgumentException("invalid port format");
}
}
} else {
endOfHost = endOfPort;
}
// there could be a port but no host
if (endOfHost < 1) {
return;
}
// get the host
host = authority.substring(0, endOfHost);
// the last char of the host must not be a minus sign or period
int hostLength = host.length();
if ((host.lastIndexOf('.') == hostLength - 1)
|| (host.lastIndexOf('-') == hostLength - 1)) {
throw new IllegalArgumentException("invalid host format");
}
// find the machine name and domain, if not host is not an IP address
if (host.charAt(0) == '[') {
if (!isValidIPv6Address(host)) {
throw new IllegalArgumentException("invalid IPv6 format");
}
return;
}
if (Character.isDigit(host.charAt(0))) {
if (!isValidIPv4Address(host)) {
throw new IllegalArgumentException("invalid IPv4 format");
}
return;
}
if (!isValidHostName(host)) {
throw new IllegalArgumentException("invalid host format");
}
startOfDomain = host.indexOf('.');
if (startOfDomain != -1) {
// do not include the "."
startOfDomain++;
if (startOfDomain < host.length()) {
domain = host.substring(startOfDomain, host.length());
}
machine = host.substring(0, startOfDomain - 1);
} else {
machine = host;
}
}
/**
* Adds a base URL to this URL if this URL is a relative one.
* Afterwards this URL will be an absolute URL.
*
* @param baseUrl an absolute URL
*
* @exception IllegalArgumentException if there is a space in the URL or
* the port is not numeric
* @exception IOException if an I/O error occurs processing the URL
*/
public void addBaseUrl(String baseUrl) throws IOException {
addBaseUrl(new HttpUrl(baseUrl));
}
/**
* Adds a base URL to this URL if this URL is a relative one.
* Afterwards this URL will be an absolute URL.
*
* @param baseUrl a parsed absolute URL
*/
public void addBaseUrl(HttpUrl baseUrl) {
String basePath;
if (authority != null) {
return;
}
scheme = baseUrl.scheme;
authority = baseUrl.authority;
if (path == null) {
path = baseUrl.path;
return;
}
if (path.charAt(0) == '/' || baseUrl.path == null ||
baseUrl.path.charAt(0) != '/') {
return;
}
// find the base path
basePath = baseUrl.path.substring(0, baseUrl.path.lastIndexOf('/'));
path = basePath + '/' + path;
}
/**
* Converts this URL into a string.
*
* @return string representation of this URL
*/
public String toString() {
StringBuffer url = new StringBuffer();
if (scheme != null) {
url.append(scheme);
url.append(':');
}
if (authority != null || scheme != null) {
url.append('/');
url.append('/');
}
if (authority != null) {
url.append(authority);
}
if (path != null) {
url.append(path);
}
if (query != null) {
url.append('?');
url.append(query);
}
if (fragment != null) {
url.append('#');
url.append(fragment);
}
return url.toString();
}
/**
* Checks is IPv6 address has a valid format.
*
* @param address the string representation of IPv6 address
* @return true when IPv6 address has valid format else false
*/
private boolean isValidIPv6Address(String address) {
int addressLength = address.length();
if (addressLength < 4) { // empty IPv6
return false;
}
if (address.charAt(0) != '[' || address.charAt(addressLength - 1) != ']') {
return false;
}
String IPv6 = address.substring(1, addressLength - 1);
// Format according to RFC 3513
int IPv6Length = addressLength - 2;
int ptrChar = 0;
int numHexPieces = 0; // number of 16-bit pieces in the address
char currChar = 0;
String hexString = null;
String separator = null;
int length;
boolean isDoubleColon = false;
boolean lastSeparator = true;
while (ptrChar < IPv6Length) {
currChar = IPv6.charAt(ptrChar);
if (isHex(currChar)) {
hexString = getNextHexValue(IPv6, ptrChar, false);
length = hexString.length();
if (length > 4) { // 16-bit value couldn't contain more than 4 digits
return false;
}
ptrChar += length;
lastSeparator = false;
} else if (currChar == ':') { // colon
if (++numHexPieces > 7) { // more than 8 hex pieces
return false;
}
separator = getNextHexValue(IPv6, ptrChar, true);
length = separator.length();
if (separator.equals("::")) { // double colon
// double colon
if (isDoubleColon) { // double colon twice
return false;
}
isDoubleColon = true;
} else if (length > 1) { // wrong separator
return false;
} else { // separator is equal ":"
if (ptrChar == 0) { // first symbol is ":"
return false;
}
if (isDoubleColon && numHexPieces > 6) { // no more 7 pieces when "::"
return false;
}
}
ptrChar += length;
lastSeparator = true;
} else if (currChar == '.') { // IPv4 suffix
if (hexString == null || !isDecimal(hexString)) {
// previous hex piece must be start of IPv4
return false;
}
if (!((!isDoubleColon && numHexPieces == 6) || (isDoubleColon && numHexPieces < 6))) {
return false;
}
ptrChar -= hexString.length();
return isValidIPv4Address(IPv6.substring(ptrChar));
} else if (currChar == '/') { // bit prefix
break;
} else { // wrong symbol
return false;
}
} // end of while
if (lastSeparator && separator.equals(":")) {
return false;
}
if (!((!isDoubleColon && numHexPieces == 7) || (isDoubleColon && numHexPieces < 7))) {
return false;
}
if (currChar == '/') { // bit prefix
ptrChar++;
String decString = getNextDecValue(IPv6, ptrChar, false);
length = decString.length();
if (length == 0 || ptrChar + length < IPv6Length) {
return false;
}
int i = Integer.parseInt(decString);
if (i < 1 || i > 128) {
return false;
}
}
return true;
}
/**
* Checks is IPv4 address has a valid format.
*
* @param address the string representation of IPv4 address
* @return true when IPv4 address has valid format else false
*/
private boolean isValidIPv4Address(String address) {
if (address.length() < 7) { // less than 0.0.0.0
return false;
}
int IPv4Length = address.length();
int ptrChar = 0;
int numDecPieces = 0; // number of 8-bit pieces in the address
char currChar;
String decString;
String separator;
int length, value;
boolean lastSeparator = true;
while (ptrChar < IPv4Length) {
currChar = address.charAt(ptrChar);
if (Character.isDigit(currChar)) {
decString = getNextDecValue(address, ptrChar, false);
value = Integer.parseInt(decString);
if (value < 0 || value > 255) {
return false;
}
ptrChar += decString.length();
lastSeparator = false;
} else if (currChar == '.') {
if (++numDecPieces > 3) { // more than 4 hex pieces
return false;
}
separator = getNextDecValue(address, ptrChar, true);
length = separator.length();
if (length > 1) {
return false;
}
ptrChar += length;
lastSeparator = true;
} else { // wrong symbol
return false;
}
} // end of while
if (lastSeparator || numDecPieces < 3) {
return false;
}
return true;
}
/**
* Checks is host name has a valid format (RFC 2396).
*
* @param hose the host name for checking
* @return true when the host name has a valid format
*/
private boolean isValidHostName(String host) {
char currChar;
int ptrChar = 0;
int lenDomain = 0;
while (ptrChar < host.length()) {
currChar = host.charAt(ptrChar++);
if (currChar == '.') {
if (lenDomain == 0) {
return false;
}
lenDomain = 0;
} else if (currChar == '-' || Character.isDigit(currChar)) {
if (lenDomain == 0) {
return false;
}
lenDomain++;
} else if (Character.isLowerCase(currChar) || Character.isUpperCase(currChar)) {
lenDomain++;
} else {
return false;
}
}
return true;
}
/**
* Checks is the next symbol is hex.
*
* @param sym the given symbol
* @return true when symbol is hex
*/
private boolean isHex(char sym) {
return (Character.isDigit(sym) || "ABCDEFabcdef".indexOf(sym) > -1);
}
/**
* Gets the next hex substring.
*
* @param str the source string
* @param offset the start index of substring
* @param isSeparator false when we need hex value else separator
* @return the hex substring or separator
*/
private String getNextHexValue(String str, int offset, boolean isSeparator) {
StringBuffer strOut = new StringBuffer();
int length = str.length();
int i = offset;
char sym;
while (i < length) {
sym = str.charAt(i++);
if ((!isHex(sym) && !isSeparator) || ((isHex(sym) || sym == '/') && isSeparator)) {
break;
}
strOut.append(sym);
}
return strOut.toString();
}
/**
* Gets the next decimal substring.
*
* @param str the source string
* @param offset the start index of substring
* @param isSeparator false when we need dec value else separator
* @return the dec substring or separator
*/
private String getNextDecValue(String str, int offset, boolean isSeparator) {
StringBuffer strOut = new StringBuffer();
int length = str.length();
int i = offset;
char sym;
while (i < length) {
sym = str.charAt(i++);
if ((!Character.isDigit(sym) && !isSeparator) || (Character.isDigit(sym) && isSeparator)) {
break;
}
strOut.append(sym);
}
return strOut.toString();
}
/**
* Checks is the given string contains decimal symbols only.
*
* @param str the source string
* @return true when all symbols are dacimal else false
*/
private boolean isDecimal(String str) {
for (int i = 0; i < str.length(); i++) {
if (!Character.isDigit(str.charAt(i))) {
return false;
}
}
return true;
}
}