/*
* #!
* Ontopia Engine
* #-
* Copyright (C) 2001 - 2013 The Ontopia Project
* #-
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* !#
*/
package net.ontopia.infoset.impl.basic;
import java.io.Externalizable;
import java.io.File;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.net.MalformedURLException;
import java.net.URL;
import net.ontopia.infoset.core.LocatorIF;
import net.ontopia.utils.OntopiaRuntimeException;
import net.ontopia.utils.StringUtils;
/**
* PUBLIC: A Uniform Resource Identifier locator. Only URI locators
* should be used with this locator class. The notation is 'URI'.<p>
*
* The address is always normalized by the constructor. The address
* given to the constructor <b>must</b> be absolute.<p>
*/
public class URILocator extends AbstractLocator implements Externalizable {
protected String address;
protected short schemeEnd; // the ':' char in the scheme part
protected short authorityEnd; // last char in authority part
protected short lastSlash; // last slash in directory path
protected short fragmentStart; // index of fragment '#'
static {
try {
net.ontopia.net.data.Handler.install();
} catch (SecurityException e) {
// Fail silently if there are security issues.
} catch (NoClassDefFoundError e) {
// This happens on Google AppEngine, but is not really a problem
// since the data-URL handler is rarely used. See
// https://github.com/ontopia/ontopia/issues/118
}
}
/**
* INTERNAL: No-argument constructor used by serialization. Do not
* use this constructor in application code.
*/
public URILocator() {
}
/**
* PUBLIC: Creates a URILocator representing the URI given. Note
* that the URI string should be in external form, and that it
* must be absolute.
*/
public URILocator(String address) throws MalformedURLException {
this.address = normalize(address);
}
/**
* PUBLIC: Creates a URILocator representing the URL given.
*/
public URILocator(URL url) throws MalformedURLException {
this.address = normalize(url.toExternalForm());
}
/**
* PUBLIC: Creates a URILocator containing a file URL referring
* to the file represented by the File object.<p>
*
* @since 1.3.4
*/
public URILocator(File file) {
try {
String path = file.getAbsolutePath();
if (File.separatorChar != '/')
path = path.replace(File.separatorChar, '/');
if (!path.startsWith("/"))
path = "/" + path;
if (!path.endsWith("/") && file.isDirectory())
path = path + "/";
path = "file:" + path;
this.address = normalize(escapeFilePath(path));
} catch (MalformedURLException e) {
throw new OntopiaRuntimeException("INTERNAL ERROR: File " + file +
" produced malformed URL", e);
}
}
/**
* INTERNAL: Special constructor used when resolving a URI relative
* to a base URI. Since the base URI is already normalized we can
* avoid repeating the normalization, and thus save time.
*/
protected URILocator(String normalized, short schemeEnd, short authorityEnd,
short lastSlash, short fragmentStart) {
this.address = normalized;
this.schemeEnd = schemeEnd;
this.authorityEnd = authorityEnd;
this.lastSlash = lastSlash;
this.fragmentStart = fragmentStart;
}
protected String normalize(String address) throws MalformedURLException {
authorityEnd = -1;
lastSlash = -1;
fragmentStart = -1;
char[] uri = new char[address.length() + 100]; // working buffer
address.getChars(0, address.length(), uri, 0); // copy into buffer
int length = decodeURI(uri, address.length());
schemeEnd = (short) getScheme(uri, length);
if (schemeEnd == -1)
throw new MalformedURLException("No valid scheme in URI: " + address);
if (StringUtils.regionEquals("file", uri, 0, 4) ||
StringUtils.regionEquals("jar:file", uri, 0, 8) ||
StringUtils.regionEquals("classpath", uri, 0, 9))
length = parseFileUrl(uri, schemeEnd, length);
else if (StringUtils.regionEquals("//", uri, schemeEnd+1, 2))
length = parseHierarchicalUrl(uri, schemeEnd, length);
return new String(uri, 0, length);
}
// --------------------------------------------------------------------------
// LocatorIF implementation
// --------------------------------------------------------------------------
public String getNotation() {
return "URI";
}
public String getAddress() {
return address;
}
public LocatorIF resolveAbsolute(String rel) {
int length = rel.length();
if (length == 0) {
if (fragmentStart == -1)
return this;
else
return new URILocator(address.substring(0, fragmentStart),
schemeEnd, authorityEnd, lastSlash, (short) -1);
}
switch(rel.charAt(0)) {
case '#':
if (fragmentStart == -1)
return new URIFragmentLocator(address.intern(), rel.substring(1),
schemeEnd, authorityEnd, lastSlash);
else
return new URIFragmentLocator(address.substring(0, fragmentStart).intern(),
rel.substring(1),
schemeEnd, authorityEnd, lastSlash);
case '/':
if (length != 1 && rel.charAt(1) == '/') { // begins with "//"
if (authorityEnd == -1)
throw new OntopiaRuntimeException(new MalformedURLException("Base URI is not hierarchical"));
return new URILocator(address.substring(0, schemeEnd+1) + rel,
schemeEnd, authorityEnd, lastSlash,
fragmentStart);
} else
// FIXME: should normalize absolute path
return new URILocator(address.substring(0, authorityEnd) + rel,
schemeEnd, authorityEnd, lastSlash,
fragmentStart);
} // no default needed; the rest of the method _is_ the default
try {
char[] relative = rel.toCharArray();
// does the URI have a scheme?
if (getScheme(relative, relative.length) != -1)
return new URILocator(rel);
// scan for slashes in URI
int ix;
for (ix = 0; ix < length && relative[ix] != '/'; ix++)
;
// there were slashes, use constructor for unnormalized URIs,
// so that the normalizer resolves the directory for us
// (also do this if rel is "." or "..")
if (ix < length || rel.equals(".") || rel.equals("..")) {
if (lastSlash == -1) // no directory part
// the "/" here is important, as it was normalized away and needs
// to be added back
return new URILocator(address.substring(0, authorityEnd + 1) + "/" +
rel);
else
return new URILocator(address.substring(0, lastSlash + 1) + rel);
}
// there were no slashes, so this is a pure file name
if (lastSlash == -1) // base has no directory part
return new URILocator(address + rel,
schemeEnd, authorityEnd, lastSlash,
fragmentStart);
else
return new URILocator(address.substring(0, lastSlash + 1) + rel,
schemeEnd, authorityEnd, lastSlash,
fragmentStart);
}
catch (MalformedURLException e) {
throw new OntopiaRuntimeException(e);
}
}
public String getExternalForm() {
return toExternalForm(address);
}
static String toExternalForm(String address) {
// need to escape characters that are not unreserved or reserved
char[] tmp = new char[address.length() * 6]; // worst case scenario
int pos = 0;
// we don't escape % because if it's present in the URI it's because
// we didn't unescape it on the way in.
for (int ix = 0; ix < address.length(); ix++) {
char ch = address.charAt(ix);
if ((ch >= 'a' && ch <= 'z') || // a-z
(ch >= '?' && ch <= 'Z') || // ? @ A-Z
(ch >= '%' && ch <= ';') || // % & ' ( ) * + , - . / 0-9 : ;
ch == '#' | ch == '!' || ch == '$' || ch == '=' || ch == '_' || ch == '~' ||
(ch == '|' && ix == 7)) // file:/X|/; special case...
tmp[pos++] = ch;
else { // have to escape
tmp[pos++] = '%';
if (ch <= 0x7F) {
// 0xxxxxxx
addByte(tmp, pos, ch);
pos += 2;
} else if (ch <= 0x07FF) {
// 110xxxxx 10xxxxxx
addByte(tmp, pos, (ch >> 6) | 0xC0);
pos += 2;
tmp[pos++] = '%';
addByte(tmp, pos, (ch & 0x3F) | 0x80);
pos += 2;
} else {
// 1110xxxx 10xxxxxx 10xxxxxx
addByte(tmp, pos, (ch >> 12) | 0xE0);
pos += 2;
tmp[pos++] = '%';
addByte(tmp, pos, ((ch >> 6) & 0x3F) | 0x80);
pos += 2;
tmp[pos++] = '%';
addByte(tmp, pos, (ch & 0x3F) | 0x80);
pos += 2;
}
}
}
return new String(tmp, 0, pos);
}
private static void addByte(char[] tmp, int pos, int ch) {
tmp[pos] = encodeHexDigit((ch & 0x00F0) >> 4);
tmp[pos + 1] = encodeHexDigit(ch & 0x000F);
}
// --------------------------------------------------------------------------
// URI parsing
// --------------------------------------------------------------------------
/**
* INTERNAL: Parses and normalizes a file:/ URL.
* @param ix The index of the last character in the scheme (':')
* @return Index of last character in URI.
*/
private int parseFileUrl(char[] uri, int ix, int length)
throws MalformedURLException {
if (ix+2 >= length)
throw new MalformedURLException("File URL has only scheme name.");
// STEP 1: deal with hostname and initial slashes
// file:///home/ -> file:/home/
// file://localhost/home -> file://localhost/home/
// file:/home/ -> file:/home/
// file://graph/tmp/ -> file://graph/tmp/
ix++; // skip ':'
if (uri[ix] == '/') ix++; // skip ':/'
int chars = -1;
if (uri[ix] == '/') {
// three cases: '://server/home/', ':///home/' and '://localhost/home/'
if (ix+1 < length && uri[ix+1] == '/')
chars = 2; // it's ':///home/'; strip '//'
else
chars = 0; // it's '://server/home/', leave it
System.arraycopy(uri, ix+chars, uri, ix, length - (ix+chars));
length -= chars;
}
// STEP 2: deal with directory part
// INVARIANT: ix now index of first char after 'file:/'
if (chars == 0) {
authorityEnd = (short) ix;
return parseDirectoryPart(uri, ix, length);
} else {
authorityEnd = (short) (ix-1);
return parseDirectoryPart(uri, ix-1, length);
}
}
/**
* INTERNAL: Parses and normalizes a hierarchical URL.
* @param schemeEnd The index of the last character in the scheme (':')
* @return Index of last character in URI.
*/
private int parseHierarchicalUrl(char[] uri, int schemeEnd, int length)
throws MalformedURLException {
// ---parse authority
// [ [ userinfo "@" ] host [ : port ] ]
// the only thing we care about is the port number
// algorithm:
// scan outwards, stop on first '/' or the end
// after each ':' keep track of where it was and whether it was
// followed by non-digits
int ix = schemeEnd + 3; // skip over the '//'
int portStart = -1;
int hostStart = ix;
String port = null;
while (ix < length &&
uri[ix] != '/' && uri[ix] != '?' && uri[ix] != '#') {
if (uri[ix] == ':') { // may be port number, check out
ix++;
portStart = ix;
while (ix < length && uri[ix] >= '0' && uri[ix] <= '9')
ix++; // port numbers are pure digits, so scan for those
if (ix >= length ||
uri[ix] == '/' || uri[ix] == '?' || uri[ix] == '#') {
// terminated with correct char, so it's a port number
port = new String(uri, portStart, ix - portStart);
break; // this means we're done with the authority part
}
} else if (uri[ix] == '@')
hostStart = ix + 1;
ix++;
}
if (port != null && findPortDefault(uri, schemeEnd).equals(port)) {
// default port number used; remove
int offset = (ix - portStart) + 1;
System.arraycopy(uri, ix, uri, portStart - 1, length - ix);
ix -= offset;
length -= offset;
}
StringUtils.downCaseAscii(uri, hostStart, ix - hostStart);
// make sure authority part ends with a slash no matter what
if (uri[ix] != '/') {
length++; // we just lengthened the URI...
if (ix+1 < length)
// have to shift part after '/' out one notch
System.arraycopy(uri, ix, uri, ix+1, (length - ix) - 1);
uri[ix++] = '/';
}
authorityEnd = (short) ix;
if (ix+1 >= length)
return length;
return parseDirectoryPart(uri, ix, length);
}
public int parseDirectoryPart(char[] uri, int ix, int length)
throws MalformedURLException {
if (ix == length) { // we are at the last character, so just stop
lastSlash = -1;
return length;
}
int[] slashpos = new int[(length - authorityEnd) / 2 + 2];
slashpos[0] = authorityEnd;
int slashix = 0;
while (ix < length && uri[ix] != '?' && uri[ix] != '#') {
if (uri[ix] == '/') {
if (slashpos[slashix] == ix - 1) {// two successive slashes, remove one
System.arraycopy(uri, ix, uri, ix - 1, length - ix);
ix--;
length--;
}
// WARNING: This loop is time-critical in the extreme. Minor
// rearrangements to the tests here can cause the time needed
// to create URIs to double. This will then affect import
// times and other important operations as well. Care, and
// stopwatches, must be exercised if changes are made.
if (ix+2 < length && uri[ix+1] == '.') {
// handling ./ in URI
if (uri[ix+2] == '/') {
System.arraycopy(uri, ix+3, uri, ix+1, length - (ix+3));
length -= 2;
continue;
}
// handling ../ in URI and .. at end of URI
if (uri[ix+2] == '.' &&
((ix+3 < length && uri[ix+3] == '/') ||
ix+3 == length)) {
// removing 3 chars if ../, 2 chars if ..
int chars = 3;
if (ix+3 == length) chars = 2;
int offset;
if (ix == authorityEnd)
offset = chars;
else
offset = (ix+chars) - slashpos[slashix];
//debugPrint(uri, length, slashpos, slashix+1);
System.arraycopy(uri, ix+(chars+1), uri, slashpos[slashix] + 1,
length - (ix+(chars+1)));
ix = slashpos[slashix];
length -= offset;
if (slashix != 0)
slashix--;
continue;
}
} // end of ../ and ./ checking
if (ix != authorityEnd)
slashpos[++slashix] = ix;
}
ix++;
}
// last we check for /. at the end of the directory part, and remove it
if (slashpos[slashix] + 2 == ix && uri[ix-1] == '.') {
if (slashix != 0)
slashix--;
System.arraycopy(uri, ix, uri, ix-1, length - ix);
length--;
ix--;
}
lastSlash = (short) slashpos[slashix];
// ---parse query, and fragment
while (ix < length && uri[ix] != '#')
ix++;
if (ix < length && uri[ix] == '#') {
fragmentStart = (short) ix;
// fragment syntax, RFC 2396, page 27
//
// fragment = *uric
// uric = reserved | unreserved | escaped
// reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
// "$" | ","
// unreserved = alphanum | mark
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" |
// "(" | ")"
ix++; // skip the '#' to begin checking
while (ix < length &&
((uri[ix] >= 'a' && uri[ix] <= 'z') ||
(uri[ix] >= '?' && uri[ix] <= 'Z') || // ? @ A-Z
(uri[ix] >= '&' && uri[ix] <= '9') || // & ' ( ) * + , - . / 0-9
uri[ix] == '!' ||
uri[ix] == '$' ||
uri[ix] == ':' ||
uri[ix] == ';' ||
uri[ix] == '=' ||
uri[ix] == '_' ||
uri[ix] == '~' ||
uri[ix] == '%')) { // to support percent-escaping
if (uri[ix] == '%') ix += 2;
ix++;
}
if (ix < length)
throw new MalformedURLException("Illegal character in fragment: '" + uri[ix] +
"' at position " + ix + " of: '" +
new String(uri) + "'");
}
return length;
}
/**
* Parses the scheme part of a URI.
* @return The index of the last char in the scheme, which will be ':' or
* -1 if there is no scheme.
*/
private int getScheme(char[] uri, int length) {
// RFC 2396, section 3.1
// scheme = alpha *( alpha | digit | "+" | "-" | "." )
int index = 0;
while((index < length) &&
((uri[index] >= 'a' && uri[index] <= 'z') || // lowalpha
(uri[index] >= 'A' && uri[index] <= 'Z') || // upalpha
(uri[index] >= '0' && uri[index] <= '9') || // digit
uri[index] == '+' ||
uri[index] == '-' ||
uri[index] == '.'))
index++;
if (index == 0 || index >= length || uri[index] != ':')
return -1;
return index;
}
/**
* Decodes escape codes in URIs in place in the character array. Returns
* length of URI in the character array.
*/
private int decodeURI(char[] uri, int length)
throws MalformedURLException {
while (length > 0 && uri[length-1] == ' ')
length--;
int pos = 0; // pos to write
int ix; // index to read
for (ix = 0; ix < length && uri[ix] == ' '; ix++)
;
for (; ix < length; ix++) {
switch(uri[ix]) {
case '%':
if (ix + 2 >= length)
throw new MalformedURLException("Incomplete percent-escape at end of URI");
char ch = (char) (decodeHexDigit(uri[ix+1]) * 16 +
decodeHexDigit(uri[ix+2]));
if (ch != 38 && ch != 37 && ch != 35) {
// it's not #, & or %, so we can unescape it
uri[pos++] = ch;
ix += 2;
} else
// it *is* #, & or %. therefore must leave alone
uri[pos++] = '%';
break;
case '+':
uri[pos++] = ' ';
break;
default:
uri[pos++] = uri[ix];
}
}
return pos;
}
private int decodeHexDigit(char ch) throws MalformedURLException {
if (ch >= '0' && ch <= '9')
return ch - '0';
else if (ch >= 'A' && ch <= 'F')
return (ch - 'A') + 10;
else if (ch >= 'a' && ch <= 'f')
return (ch - 'a') + 10;
else
throw new MalformedURLException("Invalid percent-escape code containing '" + ch + "' as hex digit in");
}
private String findPortDefault(char[] uri, int schemeEnd) {
if (StringUtils.regionEquals("http", uri, 0, schemeEnd))
return "80";
else if (StringUtils.regionEquals("https", uri, 0, schemeEnd))
return "443";
else if (StringUtils.regionEquals("shttp", uri, 0, schemeEnd))
return "80";
else if (StringUtils.regionEquals("ftp", uri, 0, schemeEnd))
return "21";
else if (StringUtils.regionEquals("ldap", uri, 0, schemeEnd))
return "389";
else if (StringUtils.regionEquals("gopher", uri, 0, schemeEnd))
return "70";
else
return "dummy value";
}
/**
* Escapes the given file path so that illegal characters in the
* path are correctly escaped.
*/
private static String escapeFilePath(String path) {
// only the following does not need to be escaped
// unreserved = alphanum | mark
// mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")"
// we don't escape slashes, because those are not allowed in file names
char[] tmp = new char[path.length() * 6]; // more than enough
int pos = 0;
for (int ix = 0; ix < path.length(); ix++) {
char ch = path.charAt(ix);
if ((ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
(ch >= '0' && ch <= '9') ||
(ch >= '\'' && ch <= '*') ||
ch == '!' || ch == '-' || ch == '.' || ch == '_' || ch == '~')
tmp[pos++] = ch;
else if (ch > 0x7F) {
// UTF-8-encode the character
if (ch < 0x07FF) {
// 0000 0080-0000 07FF 110xxxxx 10xxxxxx
int codeval = (ch >> 6) | 0xC0;
tmp[pos++] = '%';
tmp[pos++] = encodeHexDigit(codeval >> 4);
tmp[pos++] = encodeHexDigit(codeval & 0x0F);
codeval = (ch & 0x003F) | 0x80;
tmp[pos++] = '%';
tmp[pos++] = encodeHexDigit(codeval >> 4);
tmp[pos++] = encodeHexDigit(codeval & 0x0F);
} else if (ch < 0xFFFF) {
// 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
int codeval = (ch >> 12) | 0xE0;
tmp[pos++] = '%';
tmp[pos++] = encodeHexDigit(codeval >> 4);
tmp[pos++] = encodeHexDigit(codeval & 0x0F);
codeval = ((ch & 0x0FFF) >> 6) | 0x80;
tmp[pos++] = '%';
tmp[pos++] = encodeHexDigit(codeval >> 4);
tmp[pos++] = encodeHexDigit(codeval & 0x0F);
codeval = ((ch & 0x003F) >> 6) | 0x80;
tmp[pos++] = '%';
tmp[pos++] = encodeHexDigit(codeval >> 4);
tmp[pos++] = encodeHexDigit(codeval & 0x0F);
} else
throw new OntopiaRuntimeException("INTERNAL ERROR: Only BMP characters supported");
} else {
tmp[pos++] = '%';
tmp[pos++] = encodeHexDigit(ch >> 4);
tmp[pos++] = encodeHexDigit(ch & 0x0F);
}
}
return new String(tmp, 0, pos);
}
private static char encodeHexDigit(int value) {
if (value <= 9)
return (char) ('0' + value);
else
return (char) ('A' + (value - 10));
}
// --- Debugging methods
@SuppressWarnings("unused")
private void debugPrint(char[] uri, int length, int[] indexes, int count) {
System.out.println("\n" + new String(uri, 0, length));
int next = 0;
for (int ix = 0; ix < length; ix++) {
if (indexes[next] == ix) {
System.out.print("^");
next++;
} else
System.out.print(" ");
}
System.out.println("");
}
// --------------------------------------------------------------------------
// Misc
// --------------------------------------------------------------------------
public int hashCode() {
return address.hashCode();
}
public boolean equals(Object object) {
try {
LocatorIF locator = (LocatorIF)object;
return address.equals(locator.getAddress()) &&
locator.getNotation().equals("URI");
} catch (ClassCastException e) {
return false; // In case the object is not a locator
} catch (NullPointerException e) {
return false; // In case the object is null
}
}
// --------------------------------------------------------------------------
// Externalization
// --------------------------------------------------------------------------
public void writeExternal(ObjectOutput out) throws IOException {
out.writeUTF(address);
}
public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException {
address = in.readUTF();
}
// --------------------------------------------------------------------------
// Utility method
// --------------------------------------------------------------------------
/**
* INTERNAL: Parses the URI and returns an instance of URILocator if
* the URI is valid. If the URI is invalid null is returned.
*
* @since 3.0
*/
public static URILocator create(String uriAddress) {
try {
return new URILocator(uriAddress);
} catch (MalformedURLException e) {
return null;
}
}
}