URLUtil.java example

Explorer
cms-ce-master
- modules
/*
 * Copyright 2000-2013 Enonic AS
 * http://www.enonic.com/license
 */
package com.enonic.esl.net;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.util.HashMap;
import java.util.Map;

public class URLUtil
{

    private static BitSet dontNeedEncoding;

    static
    {
        dontNeedEncoding = new BitSet( 256 );
        int i;
        for ( i = 'a'; i <= 'z'; i++ )
        {
            dontNeedEncoding.set( i );
        }
        for ( i = 'A'; i <= 'Z'; i++ )
        {
            dontNeedEncoding.set( i );
        }
        for ( i = '0'; i <= '9'; i++ )
        {
            dontNeedEncoding.set( i );
        }
        dontNeedEncoding.set( ' ' ); /*
                                     * encoding a space to a + is done in the
                                     * encode() method
                                     */
        dontNeedEncoding.set( '-' );
        dontNeedEncoding.set( '_' );
        dontNeedEncoding.set( '.' );
        dontNeedEncoding.set( '*' );
    }

    private final static int caseDiff = ( 'a' - 'A' );

    /**
     * Private constructor. No instantiation allowed.
     */
    private URLUtil()
    {
    }

    public static String encode( String s )
    {

        boolean needToChange = false;
        boolean wroteUnencodedChar = false;
        int maxBytesPerChar = 10; // rather arbitrary limit, but safe for now
        StringBuffer out = new StringBuffer( s.length() );
        ByteArrayOutputStream buf = new ByteArrayOutputStream( maxBytesPerChar );
        OutputStreamWriter writer;
        try
        {
            writer = new OutputStreamWriter( buf, "UTF-8" );
        }
        catch ( UnsupportedEncodingException uee )
        {
            return null;
        }
        for ( int i = 0; i < s.length(); i++ )
        {
            int c = (int) s.charAt( i );
            if ( dontNeedEncoding.get( c ) )
            {
                if ( c == ' ' )
                {
                    c = '+';
                    needToChange = true;
                }
                out.append( (char) c );
                wroteUnencodedChar = true;
            }
            else
            {
                // convert to external encoding before hex conversion
                try
                {
                    if ( wroteUnencodedChar )
                    { // Fix for 4407610
                        writer = new OutputStreamWriter( buf, "UTF-8" );
                        wroteUnencodedChar = false;
                    }
                    writer.write( c );
                    /*
                     * If this character represents the start of a Unicode
                     * surrogate pair, then pass in two characters. It's not
                     * clear what should be done if a bytes reserved in the
                     * surrogate pairs range occurs outside of a legal surrogate
                     * pair. For now, just treat it as if it were any other
                     * character.
                     */
                    if ( c >= 0xD800 && c <= 0xDBFF )
                    {
                        /*
                         * (Integer.toHexString(c) + " is high
                         * surrogate");
                         */
                        if ( ( i + 1 ) < s.length() )
                        {
                            int d = (int) s.charAt( i + 1 );
                            /*
                             * ("\tExamining " +
                             * Integer.toHexString(d));
                             */
                            if ( d >= 0xDC00 && d <= 0xDFFF )
                            {
                                /*
                                 * ("\t" +
                                 * Integer.toHexString(d) + " is low
                                 * surrogate");
                                 */
                                writer.write( d );
                                i++;
                            }
                        }
                    }
                    writer.flush();
                }
                catch ( IOException e )
                {
                    buf.reset();
                    continue;
                }
                byte[] bufferBytes = buf.toByteArray();
                for ( byte bufferByte : bufferBytes )
                {
                    out.append( '%' );
                    char ch = Character.forDigit( ( bufferByte >> 4 ) & 0xF, 16 );
                    // converting to use uppercase letter as part of
                    // the hex value if ch is a letter.
                    if ( Character.isLetter( ch ) )
                    {
                        ch -= caseDiff;
                    }
                    out.append( ch );
                    ch = Character.forDigit( bufferByte & 0xF, 16 );
                    if ( Character.isLetter( ch ) )
                    {
                        ch -= caseDiff;
                    }
                    out.append( ch );
                }
                buf.reset();
                needToChange = true;
            }
        }

        return ( needToChange ? out.toString() : s );
    }

    public static String decode( String s )
    {

        boolean needToChange = false;
        StringBuffer sb = new StringBuffer();
        int numChars = s.length();
        int i = 0;

        while ( i < numChars )
        {
            char c = s.charAt( i );
            switch ( c )
            {
                case '+':
                    sb.append( ' ' );
                    i++;
                    needToChange = true;
                    break;
                case '%':
                    /*
                     * Starting with this instance of %, process all consecutive
                     * substrings of the form %xy. Each substring %xy will yield
                     * a byte. Convert all consecutive bytes obtained this way
                     * to whatever character(s) they represent in the provided
                     * encoding.
                     */

                    try
                    {

                        // (numChars-i)/3 is an upper bound for the number
                        // of remaining bytes
                        byte[] bytes = new byte[( numChars - i ) / 3];
                        int pos = 0;

                        while ( ( ( i + 2 ) < numChars ) && ( c == '%' ) )
                        {
                            bytes[pos++] = (byte) Integer.parseInt( s.substring( i + 1, i + 3 ), 16 );
                            i += 3;
                            if ( i < numChars )
                            {
                                c = s.charAt( i );
                            }
                        }

                        // A trailing, incomplete byte encoding such as
                        // "%x" will cause an exception to be thrown

                        if ( ( i < numChars ) && ( c == '%' ) )
                        {
                            throw new IllegalArgumentException( "URLDecoder: Incomplete trailing escape (%) pattern" );
                        }

                        sb.append( new String( bytes, 0, pos, "UTF-8" ) );
                    }
                    catch ( UnsupportedEncodingException uee )
                    {
                        return null;
                    }
                    catch ( NumberFormatException e )
                    {
                        throw new IllegalArgumentException(
                            "URLDecoder: Illegal hex characters in escape (%) pattern - " + e.getMessage() );
                    }
                    needToChange = true;
                    break;
                default:
                    sb.append( c );
                    i++;
                    break;
            }
        }

        return ( needToChange ? sb.toString() : s );
    }

    public static Map<String, String[]> decodeParameterMap( Map<String, String[]> parameterMap )
    {
        return decodeParameterMap( parameterMap, "ISO-8859-1" );
    }

    public static Map<String, String[]> decodeParameterMap( Map<String, String[]> parameterMap, String inputEncoding )
    {
        Map<String, String[]> decodedParameterMap = new HashMap<String, String[]>( parameterMap.size() );
        try
        {
            for ( Map.Entry<String, String[]> entry : parameterMap.entrySet() )
            {
                // decode key
                String key = entry.getKey();
                key = new String( key.getBytes( inputEncoding ), "UTF-8" );

                // decode value(s)
                String[] values = entry.getValue();
                for ( int i = 0; i < values.length; i++ )
                {
                    values[i] = new String( values[i].getBytes( inputEncoding ), "UTF-8" );
                }

                decodedParameterMap.put( key, values );
            }
        }
        catch ( UnsupportedEncodingException uee )
        {
            throw new IllegalStateException( uee.getMessage() );
        }

        return decodedParameterMap;
    }
}