StringUtil.java example

Explorer
cms-ce-master
- modules
/*
 * Copyright 2000-2013 Enonic AS
 * http://www.enonic.com/license
 */
package com.enonic.esl.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StreamTokenizer;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.StringTokenizer;

public final class StringUtil
{
    /**
     * Hex characters.
     */
    private final static char[] HEX_CHARS = "0123456789ABCDEF".toCharArray();

    // Extended Control Characters (used by Windows for special characters)

    private static final int ECC_START = 0x80; // 128

    private static final int ECC_END = 0x9f; // 159

    private static final String[] ECC_MAP;

    static
    {
        ECC_MAP = new String[32]; // hex  (int)
        ECC_MAP[2] = "‚"; // 0x82 (130)
        ECC_MAP[3] = "ƒ"; // 0x83 (131)
        ECC_MAP[4] = "„"; // 0x84 (132)
        ECC_MAP[5] = "&ldots;"; // 0x85 (133)
        ECC_MAP[6] = "†"; // 0x86 (134)
        ECC_MAP[7] = "‡"; // 0x87 (135)
        ECC_MAP[9] = "‰"; // 0x89 (137)
        ECC_MAP[10] = "Š"; // 0x8A (138)
        ECC_MAP[11] = "‹"; // 0x8B (139)
        ECC_MAP[12] = "Œ"; // 0x8C (140)
        ECC_MAP[17] = "‘"; // 0x91 (145)
        ECC_MAP[18] = "’"; // 0x92 (146)
        ECC_MAP[19] = "“"; // 0x93 (147)
        ECC_MAP[20] = "”"; // 0x94 (148)
        ECC_MAP[21] = "•"; // 0x95 (149)
        ECC_MAP[22] = "–"; // 0x96 (150)
        ECC_MAP[23] = "—"; // 0x97 (151)
        ECC_MAP[24] = "˜"; // 0x98 (152)
        ECC_MAP[25] = "™"; // 0x99 (153)
        ECC_MAP[26] = "š"; // 0x9A (154)
        ECC_MAP[27] = "›"; // 0x9B (155)
        ECC_MAP[28] = "œ"; // 0x9C (156)
        ECC_MAP[31] = "Ÿ"; // 0x9F (159)
    }

    /**
     * StringUtil constructor comment.
     */
    private StringUtil()
    {
        super();
    }

    public static boolean isIntegerString( String str )
    {

        if ( str == null || str.length() == 0 )
        {
            return false;
        }
        for ( int i = 0; i < str.length(); i++ )
        {
            if ( ( str.charAt( i ) < '0' || str.charAt( i ) > '9' ) && !( i == 0 && str.charAt( i ) == '-' ) )
            {
                return false;
            }
        }

        // check for leading zeros
        if ( str.charAt( 0 ) == '0' && str.length() > 1 )
        {
            return false;
        }

        return true;
    }

    /**
     * Simple format of XML files. Removes unecesserary white space and indents (optional).
     * <p/>
     * String xml xml file to format int indent if > 0, applies indenting
     */
    public static String formatXML( String xml, int indent )
    {
        try
        {
            int currentIndent = 0;
            int indentIncr = ( indent > 0 ? indent : 0 );
            StringTokenizer st = new StringTokenizer( xml, "<>!?/[\n ", true );
            StringBuffer xmlString = new StringBuffer( 1024 );
            LinkedList<String> list = new LinkedList<String>();

            while ( st.hasMoreTokens() )
            {
                String t = st.nextToken();
                if ( "<".equals( t ) )
                {
                    t = st.nextToken();
                    while ( "\n".equals( t ) )
                    {
                        t = st.nextToken();
                    }

                    if ( "?".equals( t ) || "!".equals( t ) )
                    {
                        for ( int i = 0; i < currentIndent; i++ )
                        {
                            xmlString.append( ' ' );
                        }
                        xmlString.append( "<" );
                        xmlString.append( t );
                        while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) )
                        {
                            if ( !"\n".equals( t ) )
                            {
                                xmlString.append( t );
                            }
                        }
                        xmlString.append( ">" );
                    }
                    else if ( "/".equals( t ) )
                    {
                        if ( currentIndent > 0 )
                        {
                            currentIndent -= indentIncr;
                        }
                        char prevChar = xmlString.charAt( xmlString.length() - 1 );
                        if ( prevChar == '>' )
                        {
                            xmlString.append( "\n" );
                            for ( int i = 0; i < currentIndent; i++ )
                            {
                                xmlString.append( ' ' );
                            }
                        }
                        xmlString.append( "<" );
                        xmlString.append( t );
                        while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) )
                        {
                            if ( !"\n".equals( t ) )
                            {
                                xmlString.append( t );
                            }
                        }
                        xmlString.append( t );

                        list.removeLast();
                    }
                    else if ( "[".equals( t ) )
                    {
                        xmlString.append( "<" );
                        xmlString.append( t );
                        while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) )
                        {
                            xmlString.append( t );
                        }
                        xmlString.append( ">" );
                    }
                    else
                    {
                        xmlString.append( "\n" );
                        for ( int i = 0; i < currentIndent; i++ )
                        {
                            xmlString.append( ' ' );
                        }
                        xmlString.append( "<" );
                        xmlString.append( t );
                        String element = t;
                        String lastToken = t;
                        while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) )
                        {
                            if ( !"\n".equals( t ) )
                            {
                                lastToken = t;
                                xmlString.append( t );
                            }
                        }
                        xmlString.append( ">" );
                        if ( !"/".equals( lastToken ) )
                        {
                            currentIndent += indentIncr;
                            list.add( element );
                        }
                    }
                }
                else if ( " ".equals( t ) && xmlString.charAt( xmlString.length() - 1 ) != '>' )
                {
                    xmlString.append( t );
                }
                else if ( !"\n".equals( t ) && !" ".equals( t ) )
                {
                    xmlString.append( t );
                }
            }

            return xmlString.toString();
        }
        catch ( Exception e )
        {
            // If xml formatting fails (for instance with comments), return it unformatted
            return xml;
        }
    }

    static public String mergeInts( int[] values, String delimiter )
    {
        if ( values == null || values.length == 0 )
        {
            return null;
        }

        StringBuffer merged = new StringBuffer( String.valueOf( values[0] ) );
        for ( int i = 1; i < values.length; i++ )
        {
            merged.append( delimiter );
            merged.append( values[i] );
        }
        return merged.toString();
    }

    static public String[] splitString( String str, char delim )
    {
        return splitString( str, String.valueOf( delim ) );
    }

    static public String[] splitString( String str, String delim )
    {
        return splitString( str, delim, false );
    }

    static public String[] splitString( String str, String delim, boolean includeLastIfEmpty )
    {
        ArrayList<String> result = new ArrayList<String>();

        if ( str != null && str.length() > 0 )
        {
            int idx = str.indexOf( delim );

            String substr;
            while ( idx != -1 )
            {
                substr = str.substring( 0, idx );
                result.add( substr );

                str = str.substring( idx + 1 );
                idx = str.indexOf( delim );
            }

            if ( str.length() > 0 || includeLastIfEmpty )
            {
                result.add( str );
            }
        }

        return result.toArray( new String[0] );
    }

    static public String stripControlChars( String text )
    {
        StringBuffer newString = new StringBuffer();
        for ( int i = 0; i < text.length(); i++ )
        {
            char chr = text.charAt( i );
            if ( (int) chr >= 0x20 )
            {
                newString.append( chr );
            }
        }
        return newString.toString();
    }

    /**
     * Replace Unicode extended control characters (ECCs) which Windows uses for special characters with correct HTML entity. See mapping in
     * table below. ECCs in string not used by Windows are removed.
     * <p/>
     * <table border="0" cellspacing="0" cellpadding="2"> <tr> <th>Description</th> <th>Hex Value</th> <th>HTML Entity</th>
     * <th><pre> </pre></th>
     * </tr> <tr> <td>low left rising single quote</td> <td>82</td> <td>&lsquor;</td> <td>‚</td> </tr> <tr> <td>small italic f,
     * function of, f florin</td> <td>83</td> <td>&fnof;</td> <td>ƒ</td> </tr> <tr> <td>low left rising double quote</td>
     * <td>84</td> <td>&ldquor;</td> <td>„</td> </tr> <tr> <td>low horizontal ellipsis</td> <td>85</td> <td>&ldots;</td>
     * <td>&ldots;</td> </tr> <tr> <td>dagger mark</td> <td>86</td> <td>&dagger;</td> <td>†</td> </tr> <tr> <td>double dagger
     * mark</td> <td>87</td> <td>&Dagger;</td> <td>‡</td> </tr> <tr> <td>per thousand (mille) sign</td> <td>89</td>
     * <td>&permil;</td> <td>‰</td> </tr> <tr> <td>capital S caron or hacek</td> <td>8A</td> <td>&Scaron;</td>
     * <td>Š</td> </tr> <tr> <td>left single angle quote mark (guillemet)</td> <td>8B</td> <td>&lsaquo;</td> <td>‹</td>
     * </tr> <tr> <td>capital OE ligature</td> <td>8C</td> <td>&OElig;</td> <td>Œ</td> </tr> <tr> <td>left single quotation mark,
     * high right rising single quote</td> <td>91</td> <td>&lsquo;</td> <td>‘</td> </tr> <tr> <td>right single quote mark</td>
     * <td>92</td> <td>&rsquo;</td> <td>’</td> </tr> <tr> <td>left double quotation mark, high right rising double quote</td>
     * <td>93</td> <td>&ldquo;</td> <td>“</td> </tr> <tr> <td>right double quote mark</td> <td>94</td> <td>&rdquo;</td>
     * <td>”</td> </tr> <tr> <td>round filled bullet</td> <td>95</td> <td>&bull;</td> <td>•</td> </tr> <tr> <td>en dash</td>
     * <td>96</td> <td>&ndash;</td> <td>–</td> </tr> <tr> <td>em dash</td> <td>97</td> <td>&mdash;</td> <td>—</td> </tr>
     * <tr> <td>small spacing tilde accent</td> <td>98</td> <td>&tilde;</td> <td>˜</td> </tr> <tr> <td>trademark sign</td>
     * <td>99</td> <td>&trade;</td> <td>™</td> </tr> <tr> <td>small s caron or hacek</td> <td>9A</td> <td>&scaron;</td>
     * <td>š</td> </tr> <tr> <td>right single angle quote mark (guillemet)</td> <td>9B</td> <td>&rsaquo;</td> <td>›</td>
     * </tr> <tr> <td>small oe ligature</td> <td>9C</td> <td>&oelig;</td> <td>œ</td> </tr> <tr> <td>capital Y dieresis or
     * umlaut</td> <td>9F</td> <td>&Yuml;</td> <td>Ÿ</td> </tr> </table>
     *
     * @param str the string to do the replacing on
     * @return the resulting string
     */
    public static String replaceECC( String str )
    {
        StringBuffer sb = new StringBuffer( str );
        for ( int i = 0; i < sb.length(); i++ )
        {
            char c = sb.charAt( i );
            if ( c >= ECC_START && c <= ECC_END )
            {
                String entity = ECC_MAP[c - ECC_START];
                if ( entity != null )
                {
                    sb.replace( i, i + 1, entity );
                    i += entity.length() - 1;
                }
                else
                {
                    sb.deleteCharAt( i );
                    i--;
                }
            }
        }

        return sb.toString();
    }

    public static String replaceAll( String text, String what, String with )
    {
        int startPos = text.indexOf( what );
        if ( startPos < 0 )
        {
            return text;
        }

        int currentPos = 0;
        StringBuffer result = new StringBuffer();
        //char[] chars = text.toCharArray();
        do
        {
            if ( currentPos < startPos )
            {
                result.append( text.substring( currentPos, startPos ) );
            }
            result.append( with );
            currentPos = startPos + what.length();
            startPos = text.indexOf( what, currentPos );
        }
        while ( startPos >= 0 );

        if ( currentPos < text.length() )
        {
            result.append( text.substring( currentPos ) );
        }

        return result.toString();
    }

    public static void replaceString( StringBuffer text, String what, String with )
    {

        String s = text.toString();
        int startPos = s.indexOf( what );
        if ( startPos == -1 )
        {
            return;
        }

        replaceString( text, what, with, startPos );
    }

    private static void replaceString( StringBuffer text, String what, String with, int startPos )
    {

        text.replace( startPos, ( startPos + what.length() ), with );
    }

    /**
     * Return value as hex.
     */
    private static String toHex( byte[] value )
    {
        char[] chars = new char[value.length * 2];

        for ( int i = 0; i < value.length; i++ )
        {
            int a = ( value[i] >> 4 ) & 0x0F;
            int b = value[i] & 0x0F;

            chars[i * 2] = HEX_CHARS[a];
            chars[i * 2 + 1] = HEX_CHARS[b];
        }

        return new String( chars );
    }

    /**
     * Return value as hex.
     */
    public static String toHex( short value )
    {
        byte[] bytes = new byte[2];

        bytes[0] = (byte) ( ( value >> 8 ) & 0xFF );
        bytes[1] = (byte) ( value & 0xFF );

        return toHex( bytes );
    }

    /**
     * Return value as hex.
     */
    public static String toHex( int value )
    {
        byte[] bytes = new byte[4];

        bytes[0] = (byte) ( ( value >> 24 ) & 0xFF );
        bytes[1] = (byte) ( ( value >> 16 ) & 0xFF );
        bytes[2] = (byte) ( ( value >> 8 ) & 0xFF );
        bytes[3] = (byte) ( value & 0xFF );

        return toHex( bytes );
    }

    /**
     * Return value as hex.
     */
    public static String toHex( long value )
    {
        byte[] bytes = new byte[8];

        bytes[0] = (byte) ( ( value >> 56 ) & 0xFF );
        bytes[1] = (byte) ( ( value >> 48 ) & 0xFF );
        bytes[2] = (byte) ( ( value >> 40 ) & 0xFF );
        bytes[3] = (byte) ( ( value >> 32 ) & 0xFF );
        bytes[4] = (byte) ( ( value >> 24 ) & 0xFF );
        bytes[5] = (byte) ( ( value >> 16 ) & 0xFF );
        bytes[6] = (byte) ( ( value >> 8 ) & 0xFF );
        bytes[7] = (byte) ( value & 0xFF );

        return toHex( bytes );
    }

    public static String getXMLSafeString( String input )
    {
        // This ancient stuff was found in XMLTool.createElement(Document doc, Element root, String name, String text, String sortAttribute, String sortValue)
        // Origin unknown, but it gets around a famous problem with xml parsing, Character reference "" is an invalid XML character.
        // It is basically a workaround that replaces a crazy character with ' (')
        StringBuffer sb = new StringBuffer( input );
        for ( int i = 0; i < sb.length(); i++ )
        {
            int c = sb.charAt( i );
            if ( c == 26 ) // illegal character (special single quote)
            {
                sb.replace( i, i + 1, "'" );
            }
            else if ( c < 33 && c != '\t' && c != '\n' && c != '\r' )
            {
                sb.replace( i, i + 1, " " );
            }
        }
        return sb.toString();
    }

}