/*
* eXist Open Source Native XML Database
* Copyright (C) 2001-05 The eXist Project
* http://exist-db.org
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*
* $Id$
*/
package org.exist.xquery.util;
import java.io.UnsupportedEncodingException;
import java.net.URISyntaxException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import org.exist.xmldb.XmldbURI;
/**
* Utilities for URI related functions
*
* @author Pierrick Brihaye <pierrick.brihaye@free.fr>
*/
public class URIUtils {
public static String encodeForURI(String uriPart) {
String result = urlEncodeUtf8(uriPart);
result = result.replaceAll("\\+", "%20");
//result = result.replaceAll("%23", "#");
result = result.replaceAll("%2D", "-");
result = result.replaceAll("%5F", "_");
result = result.replaceAll("%2E", ".");
//result = result.replaceAll("%21", "!");
result = result.replaceAll("%7E", "~");
result = result.replaceAll("\\*", "%2A");
//result = result.replaceAll("%27", "'");
//result = result.replaceAll("%28", "(");
//result = result.replaceAll("%29", ")");
return result;
}
public static String iriToURI(String uriPart) {
String result = urlEncodeUtf8(uriPart);
result = result.replaceAll("%23", "#");
result = result.replaceAll("%2D", "-");
result = result.replaceAll("%5F", "_");
result = result.replaceAll("%2E", ".");
result = result.replaceAll("%21", "!");
result = result.replaceAll("%7E", "~");
result = result.replaceAll("%2A", "*");
result = result.replaceAll("%27", "'");
result = result.replaceAll("%28", "(");
result = result.replaceAll("%29", ")");
result = result.replaceAll("%3B", ";");
result = result.replaceAll("%2F", "/");
result = result.replaceAll("%3F", "?");
result = result.replaceAll("%3A", ":");
result = result.replaceAll("%40", "@");
result = result.replaceAll("%26", "&");
result = result.replaceAll("%3D", "=");
result = result.replaceAll("%2B", "+");
result = result.replaceAll("%24", "\\$");
result = result.replaceAll("%2C", ",");
result = result.replaceAll("%5B", "[");
result = result.replaceAll("%5D", "]");
result = result.replaceAll("%25", "%");
return result;
}
public static String escapeHtmlURI(String uri){
String result = urlEncodeUtf8(uri);
//TODO : to be continued
result = result.replaceAll("\\+", " ");
result = result.replaceAll("%20", " ");
result = result.replaceAll("%23", "#");
result = result.replaceAll("%2D", "-");
result = result.replaceAll("%5F", "_");
result = result.replaceAll("%2E", ".");
result = result.replaceAll("%21", "!");
result = result.replaceAll("%7E", "~");
result = result.replaceAll("%2A", "*");
result = result.replaceAll("%27", "'");
result = result.replaceAll("%28", "(");
result = result.replaceAll("%29", ")");
result = result.replaceAll("%3B", ";");
result = result.replaceAll("%2F", "/");
result = result.replaceAll("%3F", "?");
result = result.replaceAll("%3A", ":");
result = result.replaceAll("%40", "@");
result = result.replaceAll("%26", "&");
result = result.replaceAll("%3D", "=");
result = result.replaceAll("%2B", "+");
result = result.replaceAll("%24", "\\$");
result = result.replaceAll("%2C", ",");
result = result.replaceAll("%5B", "[");
result = result.replaceAll("%5D", "]");
result = result.replaceAll("%25", "%");
return result;
}
/**
* This method is a wrapper for {@link java.net.URLEncoder#encode(java.lang.String,java.lang.String)}
* It calls this method, suppying the url parameter as
* the first parameter, and "UTF-8" (the W3C recommended
* encoding) as the second. UnsupportedEncodingExceptions
* are wrapped in a runtime exception.
*
* IMPORTANT: the java.net.URLEncoder class encodes a space (" ")
* as a "+". The proper method of encoding spaces in the path of
* a URI is with "%20", so this method will replace all instances of "+"
* in the encoded string with "%20" before returning. This means that
* XmldbURIs constructed from java.net.URLEncoder#encoded strings
* will not be String equivalents of XmldbURIs created with the result of
* calls to this function.
*
* @param uri The uri to encode
* @return The UTF-8 encoded value of the supplied uri
*/
public static String urlEncodeUtf8(String uri) {
try {
String almostEncoded = URLEncoder.encode(uri, "UTF-8");
return almostEncoded.replaceAll("\\+","%20");
} catch(UnsupportedEncodingException e) {
//wrap with a runtime Exception
throw new RuntimeException(e);
}
}
/**
* This method decodes the provided uri for human readability. The
* method simply wraps URLDecoder.decode(uri,"UTF-8). It is places here
* to provide a friendly way to decode URIs encoded by urlEncodeUtf8()
*
* @param uri The uri to decode
* @return The decoded value of the supplied uri
*/
public static String urlDecodeUtf8(String uri) {
try {
return URLDecoder.decode(uri, "UTF-8");
} catch(UnsupportedEncodingException e) {
//wrap with a runtime Exception
throw new RuntimeException(e);
}
}
/**
* This method decodes the provided uri for human readability. The
* method simply wraps URLDecoder.decode(uri,"UTF-8). It is places here
* to provide a friendly way to decode URIs encoded by urlEncodeUtf8()
*
* @param uri The uri to decode
* @return The decoded value of the supplied uri
*/
public static String urlDecodeUtf8(XmldbURI uri) {
try {
return URLDecoder.decode(uri.toString(), "UTF-8");
} catch(UnsupportedEncodingException e) {
//wrap with a runtime Exception
throw new RuntimeException(e);
}
}
/**
* This method splits the supplied url on the character
* '/' then URL encodes the segments between, returning
* a URL encoded version of the passed url, leaving any
* occurrence of '/' as it is.
*
* @param url The path to encode
* @return A UTF-8 URL encoded string
*/
public static String urlEncodePartsUtf8(String url) {
String[] split = url.split("/",-1);
StringBuilder ret = new StringBuilder(url.length());
for(int i=0;i<split.length;i++) {
ret.append(urlEncodeUtf8(split[i]));
if(i<split.length-1) {
ret.append("/");
}
}
return ret.toString();
}
/**
* This method ensure that a collection path (e.g. /db/[])
* is properly URL encoded. Uses W3C recommended UTF-8
* encoding.
*
* @param path The path to check
* @return A UTF-8 URL encoded string
*/
public static String ensureUrlEncodedUtf8(String path) {
try {
XmldbURI uri = XmldbURI.xmldbUriFor(path);
return uri.getRawCollectionPath();
} catch (URISyntaxException e) {
return URIUtils.urlEncodePartsUtf8(path);
}
}
/**
* This method creates an <code>XmldbURI</code> by encoding the provided
* string, then calling XmldbURI.xmldbUriFor(String) with the result of that
* encoding
*
* @param path The path to encode and create an XmldbURI from
* @return A UTF-8 URI encoded string
* @throws URISyntaxException A URISyntaxException is thrown if the path
* cannot be parsed by XmldbURI, after being encoded by
* <code>urlEncodePartsUtf8</code>
*/
public static XmldbURI encodeXmldbUriFor(String path) throws URISyntaxException {
return XmldbURI.xmldbUriFor(URIUtils.urlEncodePartsUtf8(path));
}
}