/* * Copyright 2000-2013 Enonic AS * http://www.enonic.com/license */ package com.enonic.esl.util; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StreamTokenizer; import java.io.StringReader; import java.util.ArrayList; import java.util.LinkedList; import java.util.StringTokenizer; public final class StringUtil { /** * Hex characters. */ private final static char[] HEX_CHARS = "0123456789ABCDEF".toCharArray(); // Extended Control Characters (used by Windows for special characters) private static final int ECC_START = 0x80; // 128 private static final int ECC_END = 0x9f; // 159 private static final String[] ECC_MAP; static { ECC_MAP = new String[32]; // hex (int) ECC_MAP[2] = "‚"; // 0x82 (130) ECC_MAP[3] = "ƒ"; // 0x83 (131) ECC_MAP[4] = "„"; // 0x84 (132) ECC_MAP[5] = "&ldots;"; // 0x85 (133) ECC_MAP[6] = "†"; // 0x86 (134) ECC_MAP[7] = "‡"; // 0x87 (135) ECC_MAP[9] = "‰"; // 0x89 (137) ECC_MAP[10] = "Š"; // 0x8A (138) ECC_MAP[11] = "‹"; // 0x8B (139) ECC_MAP[12] = "Œ"; // 0x8C (140) ECC_MAP[17] = "‘"; // 0x91 (145) ECC_MAP[18] = "’"; // 0x92 (146) ECC_MAP[19] = "“"; // 0x93 (147) ECC_MAP[20] = "”"; // 0x94 (148) ECC_MAP[21] = "•"; // 0x95 (149) ECC_MAP[22] = "–"; // 0x96 (150) ECC_MAP[23] = "—"; // 0x97 (151) ECC_MAP[24] = "˜"; // 0x98 (152) ECC_MAP[25] = "™"; // 0x99 (153) ECC_MAP[26] = "š"; // 0x9A (154) ECC_MAP[27] = "›"; // 0x9B (155) ECC_MAP[28] = "œ"; // 0x9C (156) ECC_MAP[31] = "Ÿ"; // 0x9F (159) } /** * StringUtil constructor comment. */ private StringUtil() { super(); } public static boolean isIntegerString( String str ) { if ( str == null || str.length() == 0 ) { return false; } for ( int i = 0; i < str.length(); i++ ) { if ( ( str.charAt( i ) < '0' || str.charAt( i ) > '9' ) && !( i == 0 && str.charAt( i ) == '-' ) ) { return false; } } // check for leading zeros if ( str.charAt( 0 ) == '0' && str.length() > 1 ) { return false; } return true; } /** * Simple format of XML files. Removes unecesserary white space and indents (optional). * <p/> * String xml xml file to format int indent if > 0, applies indenting */ public static String formatXML( String xml, int indent ) { try { int currentIndent = 0; int indentIncr = ( indent > 0 ? indent : 0 ); StringTokenizer st = new StringTokenizer( xml, "<>!?/[\n ", true ); StringBuffer xmlString = new StringBuffer( 1024 ); LinkedList<String> list = new LinkedList<String>(); while ( st.hasMoreTokens() ) { String t = st.nextToken(); if ( "<".equals( t ) ) { t = st.nextToken(); while ( "\n".equals( t ) ) { t = st.nextToken(); } if ( "?".equals( t ) || "!".equals( t ) ) { for ( int i = 0; i < currentIndent; i++ ) { xmlString.append( ' ' ); } xmlString.append( "<" ); xmlString.append( t ); while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) ) { if ( !"\n".equals( t ) ) { xmlString.append( t ); } } xmlString.append( ">" ); } else if ( "/".equals( t ) ) { if ( currentIndent > 0 ) { currentIndent -= indentIncr; } char prevChar = xmlString.charAt( xmlString.length() - 1 ); if ( prevChar == '>' ) { xmlString.append( "\n" ); for ( int i = 0; i < currentIndent; i++ ) { xmlString.append( ' ' ); } } xmlString.append( "<" ); xmlString.append( t ); while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) ) { if ( !"\n".equals( t ) ) { xmlString.append( t ); } } xmlString.append( t ); list.removeLast(); } else if ( "[".equals( t ) ) { xmlString.append( "<" ); xmlString.append( t ); while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) ) { xmlString.append( t ); } xmlString.append( ">" ); } else { xmlString.append( "\n" ); for ( int i = 0; i < currentIndent; i++ ) { xmlString.append( ' ' ); } xmlString.append( "<" ); xmlString.append( t ); String element = t; String lastToken = t; while ( st.hasMoreTokens() && !( t = st.nextToken() ).equals( ">" ) ) { if ( !"\n".equals( t ) ) { lastToken = t; xmlString.append( t ); } } xmlString.append( ">" ); if ( !"/".equals( lastToken ) ) { currentIndent += indentIncr; list.add( element ); } } } else if ( " ".equals( t ) && xmlString.charAt( xmlString.length() - 1 ) != '>' ) { xmlString.append( t ); } else if ( !"\n".equals( t ) && !" ".equals( t ) ) { xmlString.append( t ); } } return xmlString.toString(); } catch ( Exception e ) { // If xml formatting fails (for instance with comments), return it unformatted return xml; } } static public String mergeInts( int[] values, String delimiter ) { if ( values == null || values.length == 0 ) { return null; } StringBuffer merged = new StringBuffer( String.valueOf( values[0] ) ); for ( int i = 1; i < values.length; i++ ) { merged.append( delimiter ); merged.append( values[i] ); } return merged.toString(); } static public String[] splitString( String str, char delim ) { return splitString( str, String.valueOf( delim ) ); } static public String[] splitString( String str, String delim ) { return splitString( str, delim, false ); } static public String[] splitString( String str, String delim, boolean includeLastIfEmpty ) { ArrayList<String> result = new ArrayList<String>(); if ( str != null && str.length() > 0 ) { int idx = str.indexOf( delim ); String substr; while ( idx != -1 ) { substr = str.substring( 0, idx ); result.add( substr ); str = str.substring( idx + 1 ); idx = str.indexOf( delim ); } if ( str.length() > 0 || includeLastIfEmpty ) { result.add( str ); } } return result.toArray( new String[0] ); } static public String stripControlChars( String text ) { StringBuffer newString = new StringBuffer(); for ( int i = 0; i < text.length(); i++ ) { char chr = text.charAt( i ); if ( (int) chr >= 0x20 ) { newString.append( chr ); } } return newString.toString(); } /** * Replace Unicode extended control characters (ECCs) which Windows uses for special characters with correct HTML entity. See mapping in * table below. ECCs in string not used by Windows are removed. * <p/> * <table border="0" cellspacing="0" cellpadding="2"> <tr> <th>Description</th> <th>Hex Value</th> <th>HTML Entity</th> * <th><pre> </pre></th> * </tr> <tr> <td>low left rising single quote</td> <td>82</td> <td>&lsquor;</td> <td>‚</td> </tr> <tr> <td>small italic f, * function of, f florin</td> <td>83</td> <td>&fnof;</td> <td>ƒ</td> </tr> <tr> <td>low left rising double quote</td> * <td>84</td> <td>&ldquor;</td> <td>„</td> </tr> <tr> <td>low horizontal ellipsis</td> <td>85</td> <td>&ldots;</td> * <td>&ldots;</td> </tr> <tr> <td>dagger mark</td> <td>86</td> <td>&dagger;</td> <td>†</td> </tr> <tr> <td>double dagger * mark</td> <td>87</td> <td>&Dagger;</td> <td>‡</td> </tr> <tr> <td>per thousand (mille) sign</td> <td>89</td> * <td>&permil;</td> <td>‰</td> </tr> <tr> <td>capital S caron or hacek</td> <td>8A</td> <td>&Scaron;</td> * <td>Š</td> </tr> <tr> <td>left single angle quote mark (guillemet)</td> <td>8B</td> <td>&lsaquo;</td> <td>‹</td> * </tr> <tr> <td>capital OE ligature</td> <td>8C</td> <td>&OElig;</td> <td>Œ</td> </tr> <tr> <td>left single quotation mark, * high right rising single quote</td> <td>91</td> <td>&lsquo;</td> <td>‘</td> </tr> <tr> <td>right single quote mark</td> * <td>92</td> <td>&rsquo;</td> <td>’</td> </tr> <tr> <td>left double quotation mark, high right rising double quote</td> * <td>93</td> <td>&ldquo;</td> <td>“</td> </tr> <tr> <td>right double quote mark</td> <td>94</td> <td>&rdquo;</td> * <td>”</td> </tr> <tr> <td>round filled bullet</td> <td>95</td> <td>&bull;</td> <td>•</td> </tr> <tr> <td>en dash</td> * <td>96</td> <td>&ndash;</td> <td>–</td> </tr> <tr> <td>em dash</td> <td>97</td> <td>&mdash;</td> <td>—</td> </tr> * <tr> <td>small spacing tilde accent</td> <td>98</td> <td>&tilde;</td> <td>˜</td> </tr> <tr> <td>trademark sign</td> * <td>99</td> <td>&trade;</td> <td>™</td> </tr> <tr> <td>small s caron or hacek</td> <td>9A</td> <td>&scaron;</td> * <td>š</td> </tr> <tr> <td>right single angle quote mark (guillemet)</td> <td>9B</td> <td>&rsaquo;</td> <td>›</td> * </tr> <tr> <td>small oe ligature</td> <td>9C</td> <td>&oelig;</td> <td>œ</td> </tr> <tr> <td>capital Y dieresis or * umlaut</td> <td>9F</td> <td>&Yuml;</td> <td>Ÿ</td> </tr> </table> * * @param str the string to do the replacing on * @return the resulting string */ public static String replaceECC( String str ) { StringBuffer sb = new StringBuffer( str ); for ( int i = 0; i < sb.length(); i++ ) { char c = sb.charAt( i ); if ( c >= ECC_START && c <= ECC_END ) { String entity = ECC_MAP[c - ECC_START]; if ( entity != null ) { sb.replace( i, i + 1, entity ); i += entity.length() - 1; } else { sb.deleteCharAt( i ); i--; } } } return sb.toString(); } public static String replaceAll( String text, String what, String with ) { int startPos = text.indexOf( what ); if ( startPos < 0 ) { return text; } int currentPos = 0; StringBuffer result = new StringBuffer(); //char[] chars = text.toCharArray(); do { if ( currentPos < startPos ) { result.append( text.substring( currentPos, startPos ) ); } result.append( with ); currentPos = startPos + what.length(); startPos = text.indexOf( what, currentPos ); } while ( startPos >= 0 ); if ( currentPos < text.length() ) { result.append( text.substring( currentPos ) ); } return result.toString(); } public static void replaceString( StringBuffer text, String what, String with ) { String s = text.toString(); int startPos = s.indexOf( what ); if ( startPos == -1 ) { return; } replaceString( text, what, with, startPos ); } private static void replaceString( StringBuffer text, String what, String with, int startPos ) { text.replace( startPos, ( startPos + what.length() ), with ); } /** * Return value as hex. */ private static String toHex( byte[] value ) { char[] chars = new char[value.length * 2]; for ( int i = 0; i < value.length; i++ ) { int a = ( value[i] >> 4 ) & 0x0F; int b = value[i] & 0x0F; chars[i * 2] = HEX_CHARS[a]; chars[i * 2 + 1] = HEX_CHARS[b]; } return new String( chars ); } /** * Return value as hex. */ public static String toHex( short value ) { byte[] bytes = new byte[2]; bytes[0] = (byte) ( ( value >> 8 ) & 0xFF ); bytes[1] = (byte) ( value & 0xFF ); return toHex( bytes ); } /** * Return value as hex. */ public static String toHex( int value ) { byte[] bytes = new byte[4]; bytes[0] = (byte) ( ( value >> 24 ) & 0xFF ); bytes[1] = (byte) ( ( value >> 16 ) & 0xFF ); bytes[2] = (byte) ( ( value >> 8 ) & 0xFF ); bytes[3] = (byte) ( value & 0xFF ); return toHex( bytes ); } /** * Return value as hex. */ public static String toHex( long value ) { byte[] bytes = new byte[8]; bytes[0] = (byte) ( ( value >> 56 ) & 0xFF ); bytes[1] = (byte) ( ( value >> 48 ) & 0xFF ); bytes[2] = (byte) ( ( value >> 40 ) & 0xFF ); bytes[3] = (byte) ( ( value >> 32 ) & 0xFF ); bytes[4] = (byte) ( ( value >> 24 ) & 0xFF ); bytes[5] = (byte) ( ( value >> 16 ) & 0xFF ); bytes[6] = (byte) ( ( value >> 8 ) & 0xFF ); bytes[7] = (byte) ( value & 0xFF ); return toHex( bytes ); } public static String getXMLSafeString( String input ) { // This ancient stuff was found in XMLTool.createElement(Document doc, Element root, String name, String text, String sortAttribute, String sortValue) // Origin unknown, but it gets around a famous problem with xml parsing, Character reference "" is an invalid XML character. // It is basically a workaround that replaces a crazy character with ' (') StringBuffer sb = new StringBuffer( input ); for ( int i = 0; i < sb.length(); i++ ) { int c = sb.charAt( i ); if ( c == 26 ) // illegal character (special single quote) { sb.replace( i, i + 1, "'" ); } else if ( c < 33 && c != '\t' && c != '\n' && c != '\r' ) { sb.replace( i, i + 1, " " ); } } return sb.toString(); } }