/*
* Copyright 2000-2001,2004 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jetspeed.util;
/**
Take a URI and encode it so that it can be stored on all filesystems and HTTP
values
@author <a href="mailto:burton@apache.org">Kevin A. Burton</a>
@author <a href="mailto:sgala@hisitech.com">Santiago Gala</a>
@version $Id: URIEncoder.java,v 1.11 2004/02/23 03:23:42 jford Exp $
*/
public class URIEncoder {
/**
A list of invalid characters that can't exist within filenames. If they
appear then the DiskCache will escape them. The current list is in part
based on Microsoft Knowledge Base article Q177506 (because DOS filesystems
are more generally limited than UNIX filesystems).
SGP: Windows NT refuses to take "?", so I add it to the list.
Additionally, if we encode "?", the jdk runtime logic decodes it twice
for "file:" urls, giving a filename with a space in it. I have fixed
it in JetspeedDiskCacheEntry.java, avoiding the creation of a new URL when
getFile() is not null.
*/
public static final String[] INVALID_CHARACTERS = { "\\",
"/",
":",
"*",
"\"",
"<",
">",
"|",
"+",
"?" };
public static final String[] CODED_CHARACTERS = { "#" + (int)'\\' + ";",
"#" + (int)'/' + ";",
"#" + (int)':' + ";",
"#" + (int)'*' + ";",
"#" + (int)'"' + ";",
"#" + (int)'<' + ";",
"#" + (int)'>' + ";",
"#" + (int)'|' + ";",
"#" + (int)'+' + ";",
"#" + (int)'?' + ";"
};
/**
Encode the given URI
*/
public static String encode( String uri ) {
if ( uri == null ) {
throw new IllegalArgumentException( "URI may not be null. " );
}
/*
Very basic encoding... should work for most circumstances.
files like http://www.apache.org:80/index.html
will be changed to:
http_www.apache.org___80.index.html
- a "_" -> "__"
- a "://" -> "_"
- a "/" -> "_"
- a ":" -> "___"
*/
StringBuffer buffer = new StringBuffer( uri );
StringUtils.replaceAll( buffer, "_", "__" );
StringUtils.replaceAll( buffer, "://", "_" );
StringUtils.replaceAll( buffer, "/", "_" );
StringUtils.replaceAll( buffer, ":", "___" );
//if there are any characters that can't be stored in a filesystem encode
//them now
encodeQueryData( buffer );
return buffer.toString();
}
/**
Decode the given URI.
*/
public static String decode( String uri ) {
if ( uri == null ) {
throw new IllegalArgumentException( "URI may not be null. " );
}
String newURI = "";
int start = uri.indexOf("_");
String protocol = null;
//SGP: needed if uri does not contain protocol but contains "_"
if( uri.charAt( start + 1 ) == '_' ) {
start = -1;
}
if ( start > -1 ) {
protocol = uri.substring( 0, start );
}
newURI = uri.substring( start + 1, uri.length() );
StringBuffer buffer = new StringBuffer( newURI );
StringUtils.replaceAll( buffer, "___", ":" );
StringUtils.replaceAll( buffer, "_", "/" );
StringUtils.replaceAll( buffer, "_", "/" );
//now the original "__" should be slashes so replace them with a single "_"
StringUtils.replaceAll( buffer, "//", "_" );
if ( protocol != null ) {
buffer.replace( 0, 0, "://" ); //prepend string
buffer.replace( 0, 0, protocol ); //prepend protocol
}
decodeQueryData( buffer );
return buffer.toString();
}
/**
<p>
If this data contains any INVALID_CHARACTERS encode the data into a target
String.
</p>
<p>
NOTE: the algorithm between encode and decode is shared, if you modify one
you should modify the other.
</p>
@see decode(String data)
*/
private static StringBuffer encodeQueryData( StringBuffer data ) {
for (int i = 0; i < INVALID_CHARACTERS.length; ++i ) {
String source = INVALID_CHARACTERS[i];
String coded = CODED_CHARACTERS[i];
data = StringUtils.replaceAll( data, source, coded );
}
return data;
}
/**
<p>
If this data contains any encoded INVALID_CHARACTERS, decode the data back
into the source string
</p>
<p>
NOTE: the algorithm between encode and decode is shared, if you modify one
you should modify the other.
</p>
@see encode(String data)
*/
private static StringBuffer decodeQueryData( StringBuffer data ) {
for (int i = 0; i < INVALID_CHARACTERS.length; ++i ) {
String source = INVALID_CHARACTERS[i];
String coded = CODED_CHARACTERS[i];
data = StringUtils.replaceAll( data, coded, source );
}
return data;
}
}