//
// ========================================================================
// Copyright (c) 1995-2017 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// All rights reserved. This program and the accompanying materials
// are made available under the terms of the Eclipse Public License v1.0
// and Apache License v2.0 which accompanies this distribution.
//
// The Eclipse Public License is available at
// http://www.eclipse.org/legal/epl-v10.html
//
// The Apache License v2.0 is available at
// http://www.opensource.org/licenses/apache2.0.php
//
// You may elect to redistribute this code under either of these licenses.
// ========================================================================
//
package org.eclipse.jetty.util;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.eclipse.jetty.util.Utf8Appendable.NotUtf8Exception;
import org.eclipse.jetty.util.log.Log;
import org.eclipse.jetty.util.log.Logger;
/**
* URI Utility methods.
* <p>
* This class assists with the decoding and encoding or HTTP URI's.
* It differs from the java.net.URL class as it does not provide
* communications ability, but it does assist with query string
* formatting.
* </p>
*
* @see UrlEncoded
*/
public class URIUtil
implements Cloneable
{
private static final Logger LOG = Log.getLogger(URIUtil.class);
public static final String SLASH="/";
public static final String HTTP="http";
public static final String HTTPS="https";
// Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
public static final Charset __CHARSET=StandardCharsets.UTF_8 ;
private URIUtil()
{}
/* ------------------------------------------------------------ */
/** Encode a URI path.
* This is the same encoding offered by URLEncoder, except that
* the '/' character is not encoded.
* @param path The path the encode
* @return The encoded path
*/
public static String encodePath(String path)
{
if (path==null || path.length()==0)
return path;
StringBuilder buf = encodePath(null,path,0);
return buf==null?path:buf.toString();
}
/* ------------------------------------------------------------ */
/** Encode a URI path.
* @param path The path the encode
* @param buf StringBuilder to encode path into (or null)
* @return The StringBuilder or null if no substitutions required.
*/
public static StringBuilder encodePath(StringBuilder buf, String path)
{
return encodePath(buf,path,0);
}
/* ------------------------------------------------------------ */
/** Encode a URI path.
* @param path The path the encode
* @param buf StringBuilder to encode path into (or null)
* @return The StringBuilder or null if no substitutions required.
*/
private static StringBuilder encodePath(StringBuilder buf, String path, int offset)
{
byte[] bytes=null;
if (buf==null)
{
loop: for (int i=offset;i<path.length();i++)
{
char c=path.charAt(i);
switch(c)
{
case '%':
case '?':
case ';':
case '#':
case '"':
case '\'':
case '<':
case '>':
case ' ':
case '[':
case '\\':
case ']':
case '^':
case '`':
case '{':
case '|':
case '}':
buf=new StringBuilder(path.length()*2);
break loop;
default:
if (c>127)
{
bytes=path.getBytes(URIUtil.__CHARSET);
buf=new StringBuilder(path.length()*2);
break loop;
}
}
}
if (buf==null)
return null;
}
int i;
loop: for (i=offset;i<path.length();i++)
{
char c=path.charAt(i);
switch(c)
{
case '%':
buf.append("%25");
continue;
case '?':
buf.append("%3F");
continue;
case ';':
buf.append("%3B");
continue;
case '#':
buf.append("%23");
continue;
case '"':
buf.append("%22");
continue;
case '\'':
buf.append("%27");
continue;
case '<':
buf.append("%3C");
continue;
case '>':
buf.append("%3E");
continue;
case ' ':
buf.append("%20");
continue;
case '[':
buf.append("%5B");
continue;
case '\\':
buf.append("%5C");
continue;
case ']':
buf.append("%5D");
continue;
case '^':
buf.append("%5E");
continue;
case '`':
buf.append("%60");
continue;
case '{':
buf.append("%7B");
continue;
case '|':
buf.append("%7C");
continue;
case '}':
buf.append("%7D");
continue;
default:
if (c>127)
{
bytes=path.getBytes(URIUtil.__CHARSET);
break loop;
}
buf.append(c);
}
}
if (bytes!=null)
{
for (;i<bytes.length;i++)
{
byte c=bytes[i];
switch(c)
{
case '%':
buf.append("%25");
continue;
case '?':
buf.append("%3F");
continue;
case ';':
buf.append("%3B");
continue;
case '#':
buf.append("%23");
continue;
case '"':
buf.append("%22");
continue;
case '\'':
buf.append("%27");
continue;
case '<':
buf.append("%3C");
continue;
case '>':
buf.append("%3E");
continue;
case ' ':
buf.append("%20");
continue;
case '[':
buf.append("%5B");
continue;
case '\\':
buf.append("%5C");
continue;
case ']':
buf.append("%5D");
continue;
case '^':
buf.append("%5E");
continue;
case '`':
buf.append("%60");
continue;
case '{':
buf.append("%7B");
continue;
case '|':
buf.append("%7C");
continue;
case '}':
buf.append("%7D");
continue;
default:
if (c<0)
{
buf.append('%');
TypeUtil.toHex(c,buf);
}
else
buf.append((char)c);
}
}
}
return buf;
}
/* ------------------------------------------------------------ */
/** Encode a URI path.
* @param path The path the encode
* @param buf StringBuilder to encode path into (or null)
* @param encode String of characters to encode. % is always encoded.
* @return The StringBuilder or null if no substitutions required.
*/
public static StringBuilder encodeString(StringBuilder buf,
String path,
String encode)
{
if (buf==null)
{
for (int i=0;i<path.length();i++)
{
char c=path.charAt(i);
if (c=='%' || encode.indexOf(c)>=0)
{
buf=new StringBuilder(path.length()<<1);
break;
}
}
if (buf==null)
return null;
}
for (int i=0;i<path.length();i++)
{
char c=path.charAt(i);
if (c=='%' || encode.indexOf(c)>=0)
{
buf.append('%');
StringUtil.append(buf,(byte)(0xff&c),16);
}
else
buf.append(c);
}
return buf;
}
/* ------------------------------------------------------------ */
/* Decode a URI path and strip parameters
*/
public static String decodePath(String path)
{
return decodePath(path,0,path.length());
}
/* ------------------------------------------------------------ */
/* Decode a URI path and strip parameters of UTF-8 path
*/
public static String decodePath(String path, int offset, int length)
{
try
{
Utf8StringBuilder builder=null;
int end=offset+length;
for (int i=offset;i<end;i++)
{
char c = path.charAt(i);
switch(c)
{
case '%':
if (builder==null)
{
builder=new Utf8StringBuilder(path.length());
builder.append(path,offset,i-offset);
}
if ((i+2)<end)
{
char u=path.charAt(i+1);
if (u=='u')
{
int codepoint=0xffff&TypeUtil.parseInt(path,i+2,4,16);
char[] chars = Character.toChars(codepoint);
for (char ch:chars)
builder.append(ch);
i+=5;
}
else
{
builder.append((byte)(0xff&(TypeUtil.convertHexDigit(u)*16+TypeUtil.convertHexDigit(path.charAt(i+2)))));
i+=2;
}
}
else
{
throw new IllegalArgumentException("Bad URI % encoding");
}
break;
case ';':
if (builder==null)
{
builder=new Utf8StringBuilder(path.length());
builder.append(path,offset,i-offset);
}
while(++i<end)
{
if (path.charAt(i)=='/')
{
builder.append('/');
break;
}
}
break;
default:
if (builder!=null)
builder.append(c);
break;
}
}
if (builder!=null)
return builder.toString();
if (offset==0 && length==path.length())
return path;
return path.substring(offset,end);
}
catch(NotUtf8Exception e)
{
LOG.warn(path.substring(offset,offset+length)+" "+e);
LOG.debug(e);
return decodeISO88591Path(path,offset,length);
}
}
/* ------------------------------------------------------------ */
/* Decode a URI path and strip parameters of ISO-8859-1 path
*/
private static String decodeISO88591Path(String path, int offset, int length)
{
StringBuilder builder=null;
int end=offset+length;
for (int i=offset;i<end;i++)
{
char c = path.charAt(i);
switch(c)
{
case '%':
if (builder==null)
{
builder=new StringBuilder(path.length());
builder.append(path,offset,i-offset);
}
if ((i+2)<end)
{
char u=path.charAt(i+1);
if (u=='u')
{
// TODO this is wrong. This is a codepoint not a char
builder.append((char)(0xffff&TypeUtil.parseInt(path,i+2,4,16)));
i+=5;
}
else
{
builder.append((byte)(0xff&(TypeUtil.convertHexDigit(u)*16+TypeUtil.convertHexDigit(path.charAt(i+2)))));
i+=2;
}
}
else
{
throw new IllegalArgumentException();
}
break;
case ';':
if (builder==null)
{
builder=new StringBuilder(path.length());
builder.append(path,offset,i-offset);
}
while(++i<end)
{
if (path.charAt(i)=='/')
{
builder.append('/');
break;
}
}
break;
default:
if (builder!=null)
builder.append(c);
break;
}
}
if (builder!=null)
return builder.toString();
if (offset==0 && length==path.length())
return path;
return path.substring(offset,end);
}
/* ------------------------------------------------------------ */
/** Add two encoded URI path segments.
* Handles null and empty paths, path and query params
* (eg ?a=b or ;JSESSIONID=xxx) and avoids duplicate '/'
* @param p1 URI path segment (should be encoded)
* @param p2 URI path segment (should be encoded)
* @return Legally combined path segments.
*/
public static String addEncodedPaths(String p1, String p2)
{
if (p1==null || p1.length()==0)
{
if (p1!=null && p2==null)
return p1;
return p2;
}
if (p2==null || p2.length()==0)
return p1;
int split=p1.indexOf(';');
if (split<0)
split=p1.indexOf('?');
if (split==0)
return p2+p1;
if (split<0)
split=p1.length();
StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
buf.append(p1);
if (buf.charAt(split-1)=='/')
{
if (p2.startsWith(URIUtil.SLASH))
{
buf.deleteCharAt(split-1);
buf.insert(split-1,p2);
}
else
buf.insert(split,p2);
}
else
{
if (p2.startsWith(URIUtil.SLASH))
buf.insert(split,p2);
else
{
buf.insert(split,'/');
buf.insert(split+1,p2);
}
}
return buf.toString();
}
/* ------------------------------------------------------------ */
/** Add two Decoded URI path segments.
* Handles null and empty paths. Path and query params (eg ?a=b or
* ;JSESSIONID=xxx) are not handled
* @param p1 URI path segment (should be decoded)
* @param p2 URI path segment (should be decoded)
* @return Legally combined path segments.
*/
public static String addPaths(String p1, String p2)
{
if (p1==null || p1.length()==0)
{
if (p1!=null && p2==null)
return p1;
return p2;
}
if (p2==null || p2.length()==0)
return p1;
boolean p1EndsWithSlash = p1.endsWith(SLASH);
boolean p2StartsWithSlash = p2.startsWith(SLASH);
if (p1EndsWithSlash && p2StartsWithSlash)
{
if (p2.length()==1)
return p1;
if (p1.length()==1)
return p2;
}
StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
buf.append(p1);
if (p1.endsWith(SLASH))
{
if (p2.startsWith(SLASH))
buf.setLength(buf.length()-1);
}
else
{
if (!p2.startsWith(SLASH))
buf.append(SLASH);
}
buf.append(p2);
return buf.toString();
}
/* ------------------------------------------------------------ */
/** Return the parent Path.
* Treat a URI like a directory path and return the parent directory.
* @param p the path to return a parent reference to
* @return the parent path of the URI
*/
public static String parentPath(String p)
{
if (p==null || URIUtil.SLASH.equals(p))
return null;
int slash=p.lastIndexOf('/',p.length()-2);
if (slash>=0)
return p.substring(0,slash+1);
return null;
}
/* ------------------------------------------------------------ */
/**
* Convert a decoded path to a canonical form.
* <p>
* All instances of "." and ".." are factored out.
* </p>
* <p>
* Null is returned if the path tries to .. above its root.
* </p>
* @param path the path to convert, decoded, with path separators '/' and no queries.
* @return the canonical path, or null if path traversal above root.
*/
public static String canonicalPath(String path)
{
if (path == null || path.isEmpty())
return path;
boolean slash = true;
int end = path.length();
int i = 0;
loop:
while (i<end)
{
char c = path.charAt(i);
switch(c)
{
case '/':
slash = true;
break;
case '.':
if (slash)
break loop;
slash = false;
break;
default:
slash = false;
}
i++;
}
if(i==end)
return path;
StringBuilder canonical = new StringBuilder(path.length());
canonical.append(path,0,i);
int dots = 1;
i++;
while (i<=end)
{
char c = i<end?path.charAt(i):'\0';
switch(c)
{
case '\0':
case '/':
switch(dots)
{
case 0:
if (c!='\0')
canonical.append(c);
break;
case 1:
break;
case 2:
if (canonical.length()<2)
return null;
canonical.setLength(canonical.length()-1);
canonical.setLength(canonical.lastIndexOf("/")+1);
break;
default:
while (dots-->0)
canonical.append('.');
if (c!='\0')
canonical.append(c);
}
slash = true;
dots = 0;
break;
case '.':
if (dots>0)
dots++;
else if (slash)
dots = 1;
else
canonical.append('.');
slash = false;
break;
default:
while (dots-->0)
canonical.append('.');
canonical.append(c);
dots = 0;
slash = false;
}
i++;
}
return canonical.toString();
}
/* ------------------------------------------------------------ */
/**
* Convert a path to a cananonical form.
* <p>
* All instances of "." and ".." are factored out.
* </p>
* <p>
* Null is returned if the path tries to .. above its root.
* </p>
* @param path the path to convert (expects URI/URL form, encoded, and with path separators '/')
* @return the canonical path, or null if path traversal above root.
*/
public static String canonicalEncodedPath(String path)
{
if (path == null || path.isEmpty())
return path;
boolean slash = true;
int end = path.length();
int i = 0;
loop:
while (i<end)
{
char c = path.charAt(i);
switch(c)
{
case '/':
slash = true;
break;
case '.':
if (slash)
break loop;
slash = false;
break;
case '?':
return path;
default:
slash = false;
}
i++;
}
if(i==end)
return path;
StringBuilder canonical = new StringBuilder(path.length());
canonical.append(path,0,i);
int dots = 1;
i++;
while (i<=end)
{
char c = i<end?path.charAt(i):'\0';
switch(c)
{
case '\0':
case '/':
case '?':
switch(dots)
{
case 0:
if (c!='\0')
canonical.append(c);
break;
case 1:
if (c=='?')
canonical.append(c);
break;
case 2:
if (canonical.length()<2)
return null;
canonical.setLength(canonical.length()-1);
canonical.setLength(canonical.lastIndexOf("/")+1);
if (c=='?')
canonical.append(c);
break;
default:
while (dots-->0)
canonical.append('.');
if (c!='\0')
canonical.append(c);
}
slash = true;
dots = 0;
break;
case '.':
if (dots>0)
dots++;
else if (slash)
dots = 1;
else
canonical.append('.');
slash = false;
break;
default:
while (dots-->0)
canonical.append('.');
canonical.append(c);
dots = 0;
slash = false;
}
i++;
}
return canonical.toString();
}
/* ------------------------------------------------------------ */
/** Convert a path to a compact form.
* All instances of "//" and "///" etc. are factored out to single "/"
* @param path the path to compact
* @return the compacted path
*/
public static String compactPath(String path)
{
if (path==null || path.length()==0)
return path;
int state=0;
int end=path.length();
int i=0;
loop:
while (i<end)
{
char c=path.charAt(i);
switch(c)
{
case '?':
return path;
case '/':
state++;
if (state==2)
break loop;
break;
default:
state=0;
}
i++;
}
if (state<2)
return path;
StringBuilder buf = new StringBuilder(path.length());
buf.append(path,0,i);
loop2:
while (i<end)
{
char c=path.charAt(i);
switch(c)
{
case '?':
buf.append(path,i,end);
break loop2;
case '/':
if (state++==0)
buf.append(c);
break;
default:
state=0;
buf.append(c);
}
i++;
}
return buf.toString();
}
/* ------------------------------------------------------------ */
/**
* @param uri URI
* @return True if the uri has a scheme
*/
public static boolean hasScheme(String uri)
{
for (int i=0;i<uri.length();i++)
{
char c=uri.charAt(i);
if (c==':')
return true;
if (!(c>='a'&&c<='z' ||
c>='A'&&c<='Z' ||
(i>0 &&(c>='0'&&c<='9' ||
c=='.' ||
c=='+' ||
c=='-'))
))
break;
}
return false;
}
/* ------------------------------------------------------------ */
/**
* Create a new URI from the arguments, handling IPv6 host encoding and default ports
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @param path the URI path
* @param query the URI query
* @return A String URI
*/
public static String newURI(String scheme,String server, int port,String path,String query)
{
StringBuilder builder = newURIBuilder(scheme, server, port);
builder.append(path);
if (query!=null && query.length()>0)
builder.append('?').append(query);
return builder.toString();
}
/* ------------------------------------------------------------ */
/**
* Create a new URI StringBuilder from the arguments, handling IPv6 host encoding and default ports
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
* @return a StringBuilder containing URI prefix
*/
public static StringBuilder newURIBuilder(String scheme,String server, int port)
{
StringBuilder builder = new StringBuilder();
appendSchemeHostPort(builder, scheme, server, port);
return builder;
}
/* ------------------------------------------------------------ */
/**
* Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports
* @param url StringBuilder to append to
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
*/
public static void appendSchemeHostPort(StringBuilder url,String scheme,String server, int port)
{
url.append(scheme).append("://").append(HostPort.normalizeHost(server));
if (port > 0)
{
switch(scheme)
{
case "http":
if (port!=80)
url.append(':').append(port);
break;
case "https":
if (port!=443)
url.append(':').append(port);
break;
default:
url.append(':').append(port);
}
}
}
/* ------------------------------------------------------------ */
/**
* Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports
* @param url StringBuffer to append to
* @param scheme the URI scheme
* @param server the URI server
* @param port the URI port
*/
public static void appendSchemeHostPort(StringBuffer url,String scheme,String server, int port)
{
synchronized (url)
{
url.append(scheme).append("://").append(HostPort.normalizeHost(server));
if (port > 0)
{
switch(scheme)
{
case "http":
if (port!=80)
url.append(':').append(port);
break;
case "https":
if (port!=443)
url.append(':').append(port);
break;
default:
url.append(':').append(port);
}
}
}
}
public static boolean equalsIgnoreEncodings(String uriA, String uriB)
{
int lenA=uriA.length();
int lenB=uriB.length();
int a=0;
int b=0;
while (a<lenA && b<lenB)
{
int oa=uriA.charAt(a++);
int ca=oa;
if (ca=='%')
ca=TypeUtil.convertHexDigit(uriA.charAt(a++))*16+TypeUtil.convertHexDigit(uriA.charAt(a++));
int ob=uriB.charAt(b++);
int cb=ob;
if (cb=='%')
cb=TypeUtil.convertHexDigit(uriB.charAt(b++))*16+TypeUtil.convertHexDigit(uriB.charAt(b++));
if (ca=='/' && oa!=ob)
return false;
if (ca!=cb )
return URIUtil.decodePath(uriA).equals(URIUtil.decodePath(uriB));
}
return a==lenA && b==lenB;
}
public static boolean equalsIgnoreEncodings(URI uriA, URI uriB)
{
if (uriA.equals(uriB))
return true;
if (uriA.getScheme()==null)
{
if (uriB.getScheme()!=null)
return false;
}
else if (!uriA.getScheme().equals(uriB.getScheme()))
return false;
if (uriA.getAuthority()==null)
{
if (uriB.getAuthority()!=null)
return false;
}
else if (!uriA.getAuthority().equals(uriB.getAuthority()))
return false;
return equalsIgnoreEncodings(uriA.getPath(),uriB.getPath());
}
/**
* @param uri A URI to add the path to
* @param path A decoded path element
* @return URI with path added.
*/
public static URI addPath(URI uri, String path)
{
String base = uri.toASCIIString();
StringBuilder buf = new StringBuilder(base.length()+path.length()*3);
buf.append(base);
if (buf.charAt(base.length()-1)!='/')
buf.append('/');
int offset=path.charAt(0)=='/'?1:0;
encodePath(buf,path,offset);
return URI.create(buf.toString());
}
public static URI getJarSource(URI uri)
{
try
{
if (!"jar".equals(uri.getScheme()))
return uri;
// Get SSP (retaining encoded form)
String s = uri.getRawSchemeSpecificPart();
int bang_slash = s.indexOf("!/");
if (bang_slash>=0)
s=s.substring(0,bang_slash);
return new URI(s);
}
catch(URISyntaxException e)
{
throw new IllegalArgumentException(e);
}
}
public static String getJarSource(String uri)
{
if (!uri.startsWith("jar:"))
return uri;
int bang_slash = uri.indexOf("!/");
return (bang_slash>=0)?uri.substring(4,bang_slash):uri.substring(4);
}
}