/*
* Copyright 2000-2013 Enonic AS
* http://www.enonic.com/license
*/
package com.enonic.esl.net;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;
public class URLUtil
{
private static BitSet dontNeedEncoding;
static
{
dontNeedEncoding = new BitSet( 256 );
int i;
for ( i = 'a'; i <= 'z'; i++ )
{
dontNeedEncoding.set( i );
}
for ( i = 'A'; i <= 'Z'; i++ )
{
dontNeedEncoding.set( i );
}
for ( i = '0'; i <= '9'; i++ )
{
dontNeedEncoding.set( i );
}
dontNeedEncoding.set( ' ' ); /*
* encoding a space to a + is done in the
* encode() method
*/
dontNeedEncoding.set( '-' );
dontNeedEncoding.set( '_' );
dontNeedEncoding.set( '.' );
dontNeedEncoding.set( '*' );
}
private final static int caseDiff = ( 'a' - 'A' );
/**
* Private constructor. No instantiation allowed.
*/
private URLUtil()
{
}
public static String encode( String s )
{
boolean needToChange = false;
boolean wroteUnencodedChar = false;
int maxBytesPerChar = 10; // rather arbitrary limit, but safe for now
StringBuffer out = new StringBuffer( s.length() );
ByteArrayOutputStream buf = new ByteArrayOutputStream( maxBytesPerChar );
OutputStreamWriter writer;
try
{
writer = new OutputStreamWriter( buf, "UTF-8" );
}
catch ( UnsupportedEncodingException uee )
{
return null;
}
for ( int i = 0; i < s.length(); i++ )
{
int c = (int) s.charAt( i );
if ( dontNeedEncoding.get( c ) )
{
if ( c == ' ' )
{
c = '+';
needToChange = true;
}
out.append( (char) c );
wroteUnencodedChar = true;
}
else
{
// convert to external encoding before hex conversion
try
{
if ( wroteUnencodedChar )
{ // Fix for 4407610
writer = new OutputStreamWriter( buf, "UTF-8" );
wroteUnencodedChar = false;
}
writer.write( c );
/*
* If this character represents the start of a Unicode
* surrogate pair, then pass in two characters. It's not
* clear what should be done if a bytes reserved in the
* surrogate pairs range occurs outside of a legal surrogate
* pair. For now, just treat it as if it were any other
* character.
*/
if ( c >= 0xD800 && c <= 0xDBFF )
{
/*
* (Integer.toHexString(c) + " is high
* surrogate");
*/
if ( ( i + 1 ) < s.length() )
{
int d = (int) s.charAt( i + 1 );
/*
* ("\tExamining " +
* Integer.toHexString(d));
*/
if ( d >= 0xDC00 && d <= 0xDFFF )
{
/*
* ("\t" +
* Integer.toHexString(d) + " is low
* surrogate");
*/
writer.write( d );
i++;
}
}
}
writer.flush();
}
catch ( IOException e )
{
buf.reset();
continue;
}
byte[] bufferBytes = buf.toByteArray();
for ( byte bufferByte : bufferBytes )
{
out.append( '%' );
char ch = Character.forDigit( ( bufferByte >> 4 ) & 0xF, 16 );
// converting to use uppercase letter as part of
// the hex value if ch is a letter.
if ( Character.isLetter( ch ) )
{
ch -= caseDiff;
}
out.append( ch );
ch = Character.forDigit( bufferByte & 0xF, 16 );
if ( Character.isLetter( ch ) )
{
ch -= caseDiff;
}
out.append( ch );
}
buf.reset();
needToChange = true;
}
}
return ( needToChange ? out.toString() : s );
}
public static String decode( String s )
{
boolean needToChange = false;
StringBuffer sb = new StringBuffer();
int numChars = s.length();
int i = 0;
while ( i < numChars )
{
char c = s.charAt( i );
switch ( c )
{
case '+':
sb.append( ' ' );
i++;
needToChange = true;
break;
case '%':
/*
* Starting with this instance of %, process all consecutive
* substrings of the form %xy. Each substring %xy will yield
* a byte. Convert all consecutive bytes obtained this way
* to whatever character(s) they represent in the provided
* encoding.
*/
try
{
// (numChars-i)/3 is an upper bound for the number
// of remaining bytes
byte[] bytes = new byte[( numChars - i ) / 3];
int pos = 0;
while ( ( ( i + 2 ) < numChars ) && ( c == '%' ) )
{
bytes[pos++] = (byte) Integer.parseInt( s.substring( i + 1, i + 3 ), 16 );
i += 3;
if ( i < numChars )
{
c = s.charAt( i );
}
}
// A trailing, incomplete byte encoding such as
// "%x" will cause an exception to be thrown
if ( ( i < numChars ) && ( c == '%' ) )
{
throw new IllegalArgumentException( "URLDecoder: Incomplete trailing escape (%) pattern" );
}
sb.append( new String( bytes, 0, pos, "UTF-8" ) );
}
catch ( UnsupportedEncodingException uee )
{
return null;
}
catch ( NumberFormatException e )
{
throw new IllegalArgumentException(
"URLDecoder: Illegal hex characters in escape (%) pattern - " + e.getMessage() );
}
needToChange = true;
break;
default:
sb.append( c );
i++;
break;
}
}
return ( needToChange ? sb.toString() : s );
}
public static Map<String, String[]> decodeParameterMap( Map<String, String[]> parameterMap )
{
return decodeParameterMap( parameterMap, "ISO-8859-1" );
}
public static Map<String, String[]> decodeParameterMap( Map<String, String[]> parameterMap, String inputEncoding )
{
Map<String, String[]> decodedParameterMap = new HashMap<String, String[]>( parameterMap.size() );
try
{
for ( Map.Entry<String, String[]> entry : parameterMap.entrySet() )
{
// decode key
String key = entry.getKey();
key = new String( key.getBytes( inputEncoding ), "UTF-8" );
// decode value(s)
String[] values = entry.getValue();
for ( int i = 0; i < values.length; i++ )
{
values[i] = new String( values[i].getBytes( inputEncoding ), "UTF-8" );
}
decodedParameterMap.put( key, values );
}
}
catch ( UnsupportedEncodingException uee )
{
throw new IllegalStateException( uee.getMessage() );
}
return decodedParameterMap;
}
}