/** * Copyright 2010 JBoss Inc * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.drools.core.util; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringWriter; import java.io.Writer; import java.net.URI; import java.net.URISyntaxException; import java.util.ArrayList; import java.util.Collection; import java.util.Iterator; import java.util.LinkedList; import java.util.List; /** * Ripped form commons StringUtil, unless specified: * * <p>Operations on {@link java.lang.String} that are * <code>null</code> safe.</p> * * <ul> * <li><b>IsEmpty/IsBlank</b> * - checks if a String contains text</li> * <li><b>Trim/Strip</b> * - removes leading and trailing whitespace</li> * <li><b>Equals</b> * - compares two strings null-safe</li> * <li><b>IndexOf/LastIndexOf/Contains</b> * - null-safe index-of checks * <li><b>IndexOfAny/LastIndexOfAny/IndexOfAnyBut/LastIndexOfAnyBut</b> * - index-of any of a set of Strings</li> * <li><b>ContainsOnly/ContainsNone</b> * - does String contains only/none of these characters</li> * <li><b>Substring/Left/Right/Mid</b> * - null-safe substring extractions</li> * <li><b>SubstringBefore/SubstringAfter/SubstringBetween</b> * - substring extraction relative to other strings</li> * <li><b>Split/Join</b> * - splits a String into an array of substrings and vice versa</li> * <li><b>Remove/Delete</b> * - removes part of a String</li> * <li><b>Replace/Overlay</b> * - Searches a String and replaces one String with another</li> * <li><b>Chomp/Chop</b> * - removes the last part of a String</li> * <li><b>LeftPad/RightPad/Center/Repeat</b> * - pads a String</li> * <li><b>UpperCase/LowerCase/SwapCase/Capitalize/Uncapitalize</b> * - changes the case of a String</li> * <li><b>CountMatches</b> * - counts the number of occurrences of one String in another</li> * <li><b>IsAlpha/IsNumeric/IsWhitespace/IsAsciiPrintable</b> * - checks the characters in a String</li> * <li><b>DefaultString</b> * - protects against a null input String</li> * <li><b>Reverse/ReverseDelimited</b> * - reverses a String</li> * <li><b>Abbreviate</b> * - abbreviates a string using ellipsis</li> * <li><b>Difference</b> * - compares two Strings and reports on their differences</li> * <li><b>LevensteinDistance</b> * - the number of changes needed to change one String into another</li> * </ul> * * <p>The <code>StringUtils</code> class defines certain words related to * String handling.</p> * * <ul> * <li>null - <code>null</code></li> * <li>empty - a zero-length string (<code>""</code>)</li> * <li>space - the space character (<code>' '</code>, char 32)</li> * <li>whitespace - the characters defined by {@link Character#isWhitespace(char)}</li> * <li>trim - the characters <= 32 as in {@link String#trim()}</li> * </ul> * * <p><code>StringUtils</code> handles <code>null</code> input Strings quietly. * That is to say that a <code>null</code> input will return <code>null</code>. * Where a <code>boolean</code> or <code>int</code> is being returned * details vary by method.</p> * * <p>A side effect of the <code>null</code> handling is that a * <code>NullPointerException</code> should be considered a bug in * <code>StringUtils</code> (except for deprecated methods).</p> * * <p>Methods in this class give sample code to explain their operation. * The symbol <code>*</code> is used to indicate any input including <code>null</code>.</p> * * @see java.lang.String * @author <a href="http://jakarta.apache.org/turbine/">Apache Jakarta Turbine</a> * @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a> * @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a> * @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a> * @author <a href="mailto:ed@apache.org">Ed Korthof</a> * @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a> * @author Stephen Colebourne * @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a> * @author Holger Krauth * @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a> * @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a> * @author Arun Mammen Thomas * @author Gary Gregory * @author Phil Steitz * @author Al Chou * @author Michael Davey * @author Reuben Sivan * @author Chris Hyzer * @since 1.0 * @version $Id$ */ public class StringUtils { /** * An empty immutable <code>String</code> array. */ public static final String[] EMPTY_STRING_ARRAY = new String[0]; // Performance testing notes (JDK 1.4, Jul03, scolebourne) // Whitespace: // Character.isWhitespace() is faster than WHITESPACE.indexOf() // where WHITESPACE is a string of all whitespace characters // // Character access: // String.charAt(n) versus toCharArray(), then array[n] // String.charAt(n) is about 15% worse for a 10K string // They are about equal for a length 50 string // String.charAt(n) is about 4 times better for a length 3 string // String.charAt(n) is best bet overall // // Append: // String.concat about twice as fast as StringBuilder.append // (not sure who tested this) /** * The empty String <code>""</code>. * @since 2.0 */ public static final String EMPTY = ""; /** * Represents a failed index search. * @since 2.1 */ public static final int INDEX_NOT_FOUND = -1; /** * <p>The maximum size to which the padding constant(s) can expand.</p> */ private static final int PAD_LIMIT = 8192; /** * <p><code>StringUtils</code> instances should NOT be constructed in * standard programming. Instead, the class should be used as * <code>StringUtils.trim(" foo ");</code>.</p> * * <p>This constructor is public to permit tools that require a JavaBean * instance to operate.</p> */ public StringUtils() { super(); } public static String ucFirst(final String name) { return name.toUpperCase().charAt( 0 ) + name.substring( 1 ); } // Empty checks //----------------------------------------------------------------------- /** * <p>Checks if a String is empty ("") or null.</p> * * <pre> * StringUtils.isEmpty(null) = true * StringUtils.isEmpty("") = true * StringUtils.isEmpty(" ") = false * StringUtils.isEmpty("bob") = false * StringUtils.isEmpty(" bob ") = false * </pre> * * <p>NOTE: This method changed in Lang version 2.0. * It no longer trims the String. * That functionality is available in isBlank().</p> * * @param str the String to check, may be null * @return <code>true</code> if the String is empty or null */ public static boolean isEmpty(final CharSequence str) { if ( str == null || str.length() == 0 ) { return true; } for ( int i = 0, length = str.length(); i < length; i++ ){ if ( str.charAt( i ) != ' ' ) { return false; } } return true; } // Padding //----------------------------------------------------------------------- /** * <p>Repeat a String <code>repeat</code> times to form a * new String.</p> * * <pre> * StringUtils.repeat(null, 2) = null * StringUtils.repeat("", 0) = "" * StringUtils.repeat("", 2) = "" * StringUtils.repeat("a", 3) = "aaa" * StringUtils.repeat("ab", 2) = "abab" * StringUtils.repeat("a", -2) = "" * </pre> * * @param str the String to repeat, may be null * @param repeat number of times to repeat str, negative treated as zero * @return a new String consisting of the original String repeated, * <code>null</code> if null String input */ public static String repeat(final String str, final int repeat) { // Performance tuned for 2.0 (JDK1.4) if ( str == null ) { return null; } if ( repeat <= 0 ) { return EMPTY; } final int inputLength = str.length(); if ( repeat == 1 || inputLength == 0 ) { return str; } if ( inputLength == 1 && repeat <= PAD_LIMIT ) { return padding( repeat, str.charAt( 0 ) ); } final int outputLength = inputLength * repeat; switch ( inputLength ) { case 1 : final char ch = str.charAt( 0 ); final char[] output1 = new char[outputLength]; for ( int i = repeat - 1; i >= 0; i-- ) { output1[i] = ch; } return new String( output1 ); case 2 : final char ch0 = str.charAt( 0 ); final char ch1 = str.charAt( 1 ); final char[] output2 = new char[outputLength]; for ( int i = repeat * 2 - 2; i >= 0; i--, i-- ) { output2[i] = ch0; output2[i + 1] = ch1; } return new String( output2 ); default : final StringBuilder buf = new StringBuilder( outputLength ); for ( int i = 0; i < repeat; i++ ) { buf.append( str ); } return buf.toString(); } } // Splitting //----------------------------------------------------------------------- /** * <p>Splits the provided text into an array, using whitespace as the * separator. * Whitespace is defined by {@link Character#isWhitespace(char)}.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as one separator. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>.</p> * * <pre> * StringUtils.split(null) = null * StringUtils.split("") = [] * StringUtils.split("abc def") = ["abc", "def"] * StringUtils.split("abc def") = ["abc", "def"] * StringUtils.split(" abc ") = ["abc"] * </pre> * * @param str the String to parse, may be null * @return an array of parsed Strings, <code>null</code> if null String input */ public static String[] split(String str) { return split( str, null, -1 ); } /** * <p>Splits the provided text into an array, separator specified. * This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as one separator. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>.</p> * * <pre> * StringUtils.split(null, *) = null * StringUtils.split("", *) = [] * StringUtils.split("a.b.c", '.') = ["a", "b", "c"] * StringUtils.split("a..b.c", '.') = ["a", "b", "c"] * StringUtils.split("a:b:c", '.') = ["a:b:c"] * StringUtils.split("a\tb\nc", null) = ["a", "b", "c"] * StringUtils.split("a b c", ' ') = ["a", "b", "c"] * </pre> * * @param str the String to parse, may be null * @param separatorChar the character used as the delimiter, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.0 */ public static String[] split(String str, char separatorChar) { return splitWorker( str, separatorChar, false ); } /** * <p>Splits the provided text into an array, separators specified. * This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as one separator. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <pre> * StringUtils.split(null, *) = null * StringUtils.split("", *) = [] * StringUtils.split("abc def", null) = ["abc", "def"] * StringUtils.split("abc def", " ") = ["abc", "def"] * StringUtils.split("abc def", " ") = ["abc", "def"] * StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"] * </pre> * * @param str the String to parse, may be null * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input */ public static String[] split(String str, String separatorChars) { return splitWorker( str, separatorChars, -1, false ); } /** * <p>Splits the provided text into an array with a maximum length, * separators specified.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as one separator.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <p>If more than <code>min</code> delimited substrings are found, the last * returned string includes all characters after the first <code>min - 1</code> * returned strings (including separator characters).</p> * * <pre> * StringUtils.split(null, *, *) = null * StringUtils.split("", *, *) = [] * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"] * StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"] * StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"] * StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"] * </pre> * * @param str the String to parse, may be null * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @param min the maximum number of elements to include in the * array. A zero or negative value implies no limit * @return an array of parsed Strings, <code>null</code> if null String input */ public static String[] split(String str, String separatorChars, int max) { return splitWorker( str, separatorChars, max, false ); } //----------------------------------------------------------------------- /** * <p>Splits the provided text into an array, using whitespace as the * separator, preserving all tokens, including empty tokens created by * adjacent separators. This is an alternative to using StringTokenizer. * Whitespace is defined by {@link Character#isWhitespace(char)}.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>.</p> * * <pre> * StringUtils.splitPreserveAllTokens(null) = null * StringUtils.splitPreserveAllTokens("") = [] * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"] * StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"] * StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""] * </pre> * * @param str the String to parse, may be <code>null</code> * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(final String str) { return splitWorker( str, null, -1, true ); } /** * <p>Splits the provided text into an array, separator specified, * preserving all tokens, including empty tokens created by adjacent * separators. This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>.</p> * * <pre> * StringUtils.splitPreserveAllTokens(null, *) = null * StringUtils.splitPreserveAllTokens("", *) = [] * StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"] * StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "", "b", "c"] * StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"] * StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"] * StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"] * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""] * StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""] * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"] * StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"] * StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""] * </pre> * * @param str the String to parse, may be <code>null</code> * @param separatorChar the character used as the delimiter, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(final String str, final char separatorChar) { return splitWorker( str, separatorChar, true ); } /** * Performs the logic for the <code>split</code> and * <code>splitPreserveAllTokens</code> methods that do not return a * maximum array length. * * @param str the String to parse, may be <code>null</code> * @param separatorChar the separate character * @param preserveAllTokens if <code>true</code>, adjacent separators are * treated as empty token separators; if <code>false</code>, adjacent * separators are treated as one separator. * @return an array of parsed Strings, <code>null</code> if null String input */ private static String[] splitWorker(final String str, final char separatorChar, final boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) if ( str == null ) { return null; } final int len = str.length(); if ( len == 0 ) { return EMPTY_STRING_ARRAY; } final List list = new ArrayList(); int i = 0, start = 0; boolean match = false; boolean lastMatch = false; while ( i < len ) { if ( str.charAt( i ) == separatorChar ) { if ( match || preserveAllTokens ) { list.add( str.substring( start, i ) ); match = false; lastMatch = true; } start = ++i; continue; } else { lastMatch = false; } match = true; i++; } if ( match || (preserveAllTokens && lastMatch) ) { list.add( str.substring( start, i ) ); } return (String[]) list.toArray( new String[list.size()] ); } /** * <p>Splits the provided text into an array, separators specified, * preserving all tokens, including empty tokens created by adjacent * separators. This is an alternative to using StringTokenizer.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * For more control over the split use the StrTokenizer class.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <pre> * StringUtils.splitPreserveAllTokens(null, *) = null * StringUtils.splitPreserveAllTokens("", *) = [] * StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"] * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"] * StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"] * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"] * StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""] * StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""] * StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"] * StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"] * StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"] * StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""] * </pre> * * @param str the String to parse, may be <code>null</code> * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(final String str, final String separatorChars) { return splitWorker( str, separatorChars, -1, true ); } /** * <p>Splits the provided text into an array with a maximum length, * separators specified, preserving all tokens, including empty tokens * created by adjacent separators.</p> * * <p>The separator is not included in the returned String array. * Adjacent separators are treated as separators for empty tokens. * Adjacent separators are treated as one separator.</p> * * <p>A <code>null</code> input String returns <code>null</code>. * A <code>null</code> separatorChars splits on whitespace.</p> * * <p>If more than <code>min</code> delimited substrings are found, the last * returned string includes all characters after the first <code>min - 1</code> * returned strings (including separator characters).</p> * * <pre> * StringUtils.splitPreserveAllTokens(null, *, *) = null * StringUtils.splitPreserveAllTokens("", *, *) = [] * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"] * StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"] * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"] * StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"] * StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"] * StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"] * StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"] * </pre> * * @param str the String to parse, may be <code>null</code> * @param separatorChars the characters used as the delimiters, * <code>null</code> splits on whitespace * @param min the maximum number of elements to include in the * array. A zero or negative value implies no limit * @return an array of parsed Strings, <code>null</code> if null String input * @since 2.1 */ public static String[] splitPreserveAllTokens(final String str, final String separatorChars, final int max) { return splitWorker( str, separatorChars, max, true ); } /** * Performs the logic for the <code>split</code> and * <code>splitPreserveAllTokens</code> methods that return a maximum array * length. * * @param str the String to parse, may be <code>null</code> * @param separatorChars the separate character * @param min the maximum number of elements to include in the * array. A zero or negative value implies no limit. * @param preserveAllTokens if <code>true</code>, adjacent separators are * treated as empty token separators; if <code>false</code>, adjacent * separators are treated as one separator. * @return an array of parsed Strings, <code>null</code> if null String input */ private static String[] splitWorker(final String str, final String separatorChars, final int max, final boolean preserveAllTokens) { // Performance tuned for 2.0 (JDK1.4) // Direct code is quicker than StringTokenizer. // Also, StringTokenizer uses isSpace() not isWhitespace() if ( str == null ) { return null; } final int len = str.length(); if ( len == 0 ) { return EMPTY_STRING_ARRAY; } final List list = new ArrayList(); int sizePlus1 = 1; int i = 0, start = 0; boolean match = false; boolean lastMatch = false; if ( separatorChars == null ) { // Null separator means use whitespace while ( i < len ) { if ( Character.isWhitespace( str.charAt( i ) ) ) { if ( match || preserveAllTokens ) { lastMatch = true; if ( sizePlus1++ == max ) { i = len; lastMatch = false; } list.add( str.substring( start, i ) ); match = false; } start = ++i; continue; } else { lastMatch = false; } match = true; i++; } } else if ( separatorChars.length() == 1 ) { // Optimise 1 character case final char sep = separatorChars.charAt( 0 ); while ( i < len ) { if ( str.charAt( i ) == sep ) { if ( match || preserveAllTokens ) { lastMatch = true; if ( sizePlus1++ == max ) { i = len; lastMatch = false; } list.add( str.substring( start, i ) ); match = false; } start = ++i; continue; } else { lastMatch = false; } match = true; i++; } } else { // standard case while ( i < len ) { if ( separatorChars.indexOf( str.charAt( i ) ) >= 0 ) { if ( match || preserveAllTokens ) { lastMatch = true; if ( sizePlus1++ == max ) { i = len; lastMatch = false; } list.add( str.substring( start, i ) ); match = false; } start = ++i; continue; } else { lastMatch = false; } match = true; i++; } } if ( match || (preserveAllTokens && lastMatch) ) { list.add( str.substring( start, i ) ); } return (String[]) list.toArray( new String[list.size()] ); } /** * <p>Returns padding using the specified delimiter repeated * to a given length.</p> * * <pre> * StringUtils.padding(0, 'e') = "" * StringUtils.padding(3, 'e') = "eee" * StringUtils.padding(-2, 'e') = IndexOutOfBoundsException * </pre> * * <p>Note: this method doesn't not support padding with * <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a> * as they require a pair of <code>char</code>s to be represented. * If you are needing to support full I18N of your applications * consider using {@link #repeat(String, int)} instead. * </p> * * @param repeat number of times to repeat delim * @param padChar character to repeat * @return String with repeated character * @throws IndexOutOfBoundsException if <code>repeat < 0</code> * @see #repeat(String, int) */ public static String padding(final int repeat, final char padChar) throws IndexOutOfBoundsException { if ( repeat < 0 ) { throw new IndexOutOfBoundsException( "Cannot pad a negative amount: " + repeat ); } final char[] buf = new char[repeat]; for ( int i = 0; i < buf.length; i++ ) { buf[i] = padChar; } return new String( buf ); } /** @param filePath the name of the file to open. Not sure if it can accept URLs or just filenames. Path handling could be better, and buffer sizes are hardcoded */ public static String readFileAsString(Reader reader) { try { StringBuilder fileData = new StringBuilder( 1000 ); char[] buf = new char[1024]; int numRead = 0; while ( (numRead = reader.read( buf )) != -1 ) { String readData = String.valueOf( buf, 0, numRead ); fileData.append( readData ); buf = new char[1024]; } reader.close(); return fileData.toString(); } catch ( IOException e ) { throw new RuntimeException( e ); } } /** * <p>Unescapes any Java literals found in the <code>String</code>. * For example, it will turn a sequence of <code>'\'</code> and * <code>'n'</code> into a newline character, unless the <code>'\'</code> * is preceded by another <code>'\'</code>.</p> * * @param str the <code>String</code> to unescape, may be null * @return a new unescaped <code>String</code>, <code>null</code> if null string input */ public static String unescapeJava(String str) { if (str == null) { return null; } try { StringWriter writer = new StringWriter(str.length()); unescapeJava(writer, str); return writer.toString(); } catch (IOException ioe) { // this should never ever happen while writing to a StringWriter throw new RuntimeException(ioe); } } /** * <p>Unescapes any Java literals found in the <code>String</code> to a * <code>Writer</code>.</p> * * <p>For example, it will turn a sequence of <code>'\'</code> and * <code>'n'</code> into a newline character, unless the <code>'\'</code> * is preceded by another <code>'\'</code>.</p> * * <p>A <code>null</code> string input has no effect.</p> * * @param out the <code>Writer</code> used to output unescaped characters * @param str the <code>String</code> to unescape, may be null * @throws IllegalArgumentException if the Writer is <code>null</code> * @throws IOException if error occurs on underlying Writer */ public static void unescapeJava(Writer out, String str) throws IOException { if (out == null) { throw new IllegalArgumentException("The Writer must not be null"); } if (str == null) { return; } int sz = str.length(); StringBuilder unicode = new StringBuilder(4); boolean hadSlash = false; boolean inUnicode = false; for (int i = 0; i < sz; i++) { char ch = str.charAt(i); if (inUnicode) { // if in unicode, then we're reading unicode // values in somehow unicode.append(ch); if (unicode.length() == 4) { // unicode now contains the four hex digits // which represents our unicode character try { int value = Integer.parseInt(unicode.toString(), 16); out.write((char) value); unicode.setLength(0); inUnicode = false; hadSlash = false; } catch (NumberFormatException nfe) { throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe); } } continue; } if (hadSlash) { // handle an escaped value hadSlash = false; switch (ch) { case '\\': out.write('\\'); break; case '\'': out.write('\''); break; case '\"': out.write('"'); break; case 'r': out.write('\r'); break; case 'f': out.write('\f'); break; case 't': out.write('\t'); break; case 'n': out.write('\n'); break; case 'b': out.write('\b'); break; case 'u': { // uh-oh, we're in unicode country.... inUnicode = true; break; } default : out.write(ch); break; } continue; } else if (ch == '\\') { hadSlash = true; continue; } out.write(ch); } if (hadSlash) { // then we're in the weird case of a \ at the end of the // string, let's output it anyway. out.write('\\'); } } /** * Apply the given relative path to the given path, * assuming standard Java folder separation (i.e. "/" separators); * @param path the path to start from (usually a full file path) * @param relativePath the relative path to apply * (relative to the full file path above) * @return the full file path that results from applying the relative path */ public static String applyRelativePath(String path, String relativePath) { int separatorIndex = path.lastIndexOf(FOLDER_SEPARATOR); if (separatorIndex != -1) { String newPath = path.substring(0, separatorIndex); if (!relativePath.startsWith(FOLDER_SEPARATOR)) { newPath += FOLDER_SEPARATOR; } return newPath + relativePath; } else { return relativePath; } } private static final String FOLDER_SEPARATOR = "/"; private static final String WINDOWS_FOLDER_SEPARATOR = "\\"; private static final String TOP_PATH = ".."; private static final String CURRENT_PATH = "."; private static final char EXTENSION_SEPARATOR = '.'; /** * Normalize the path by suppressing sequences like "path/.." and * inner simple dots. * <p>The result is convenient for path comparison. For other uses, * notice that Windows separators ("\") are replaced by simple slashes. * @param path the original path * @return the normalized path * * Borrowed from Spring, under the ASL2.0 license. */ public static String cleanPath(String path) { if (path == null) { return null; } String pathToUse = replace(path, WINDOWS_FOLDER_SEPARATOR, FOLDER_SEPARATOR); // Strip prefix from path to analyze, to not treat it as part of the // first path element. This is necessary to correctly parse paths like // "file:core/../core/io/Resource.class", where the ".." should just // strip the first "core" directory while keeping the "file:" prefix. int prefixIndex = pathToUse.indexOf(":"); String prefix = ""; if (prefixIndex != -1) { prefix = pathToUse.substring(0, prefixIndex + 1); pathToUse = pathToUse.substring(prefixIndex + 1); } if (pathToUse.startsWith(FOLDER_SEPARATOR)) { prefix = prefix + FOLDER_SEPARATOR; pathToUse = pathToUse.substring(1); } String[] pathArray = delimitedListToStringArray(pathToUse, FOLDER_SEPARATOR); List pathElements = new LinkedList(); int tops = 0; for (int i = pathArray.length - 1; i >= 0; i--) { String element = pathArray[i]; if (CURRENT_PATH.equals(element)) { // Points to current directory - drop it. } else if (TOP_PATH.equals(element)) { // Registering top path found. tops++; } else { if (tops > 0) { // Merging path element with element corresponding to top path. tops--; } else { // Normal path element found. pathElements.add(0, element); } } } // Remaining top paths need to be retained. for (int i = 0; i < tops; i++) { pathElements.add(0, TOP_PATH); } return prefix + collectionToDelimitedString(pathElements, FOLDER_SEPARATOR); } /** * Convenience method to return a Collection as a delimited (e.g. CSV) * String. E.g. useful for <code>toString()</code> implementations. * @param coll the Collection to display * @param delim the delimiter to use (probably a ",") * @param prefix the String to start each element with * @param suffix the String to end each element with * @return the delimited String * * Borrowed from Spring, under the ASL2.0 license. */ public static String collectionToDelimitedString(Collection coll, String delim, String prefix, String suffix) { if (coll == null || coll.isEmpty()) { return ""; } StringBuilder sb = new StringBuilder(); Iterator it = coll.iterator(); while (it.hasNext()) { sb.append(prefix).append(it.next()).append(suffix); if (it.hasNext()) { sb.append(delim); } } return sb.toString(); } /** * Convenience method to return a Collection as a delimited (e.g. CSV) * String. E.g. useful for <code>toString()</code> implementations. * @param coll the Collection to display * @param delim the delimiter to use (probably a ",") * @return the delimited String * * Borrowed from Spring, under the ASL2.0 license. */ public static String collectionToDelimitedString(Collection coll, String delim) { return collectionToDelimitedString(coll, delim, "", ""); } /** * Replace all occurences of a substring within a string with * another string. * @param inString String to examine * @param oldPattern String to replace * @param newPattern String to insert * @return a String with the replacements * * Borrowed from Spring, under the ASL2.0 license. */ public static String replace(String inString, String oldPattern, String newPattern) { if (isEmpty(inString) || isEmpty(oldPattern) || newPattern == null) { return inString; } StringBuilder sbuf = new StringBuilder(); // output StringBuilder we'll build up int pos = 0; // our position in the old string int index = inString.indexOf(oldPattern); // the index of an occurrence we've found, or -1 int patLen = oldPattern.length(); while (index >= 0) { sbuf.append(inString.substring(pos, index)); sbuf.append(newPattern); pos = index + patLen; index = inString.indexOf(oldPattern, pos); } sbuf.append(inString.substring(pos)); // remember to append any characters to the right of a match return sbuf.toString(); } public static URI toURI(String location) throws URISyntaxException { return new URI( StringUtils.replace( location, " ", "%20" ) ); } /** * Take a String which is a delimited list and convert it to a String array. * <p>A single delimiter can consists of more than one character: It will still * be considered as single delimiter string, rather than as bunch of potential * delimiter characters - in contrast to <code>tokenizeToStringArray</code>. * @param str the input String * @param delimiter the delimiter between elements (this is a single delimiter, * rather than a bunch individual delimiter characters) * @return an array of the tokens in the list * @see #tokenizeToStringArray * * Borrowed from Spring, under the ASL2.0 license. */ public static String[] delimitedListToStringArray(String str, String delimiter) { return delimitedListToStringArray(str, delimiter, null); } /** * Take a String which is a delimited list and convert it to a String array. * <p>A single delimiter can consists of more than one character: It will still * be considered as single delimiter string, rather than as bunch of potential * delimiter characters - in contrast to <code>tokenizeToStringArray</code>. * @param str the input String * @param delimiter the delimiter between elements (this is a single delimiter, * rather than a bunch individual delimiter characters) * @param charsToDelete a set of characters to delete. Useful for deleting unwanted * line breaks: e.g. "\r\n\f" will delete all new lines and line feeds in a String. * @return an array of the tokens in the list * @see #tokenizeToStringArray * * Borrowed from Spring, under the ASL2.0 license. */ public static String[] delimitedListToStringArray(String str, String delimiter, String charsToDelete) { if (str == null) { return new String[0]; } if (delimiter == null) { return new String[] {str}; } List result = new ArrayList(); if ("".equals(delimiter)) { for (int i = 0; i < str.length(); i++) { result.add(deleteAny(str.substring(i, i + 1), charsToDelete)); } } else { int pos = 0; int delPos = 0; while ((delPos = str.indexOf(delimiter, pos)) != -1) { result.add(deleteAny(str.substring(pos, delPos), charsToDelete)); pos = delPos + delimiter.length(); } if (str.length() > 0 && pos <= str.length()) { // Add rest of String, but not in case of empty input. result.add(deleteAny(str.substring(pos), charsToDelete)); } } return toStringArray(result); } /** * Copy the given Collection into a String array. * The Collection must contain String elements only. * @param collection the Collection to copy * @return the String array (<code>null</code> if the passed-in * Collection was <code>null</code>) * * Borrowed from Spring, under the ASL2.0 license. */ public static String[] toStringArray(Collection collection) { if (collection == null) { return null; } return (String[]) collection.toArray(new String[collection.size()]); } /** * Delete any character in a given String. * @param inString the original String * @param charsToDelete a set of characters to delete. * E.g. "az\n" will delete 'a's, 'z's and new lines. * @return the resulting String * * Borrowed from Spring, under the ASL2.0 license. */ public static String deleteAny(String inString, String charsToDelete) { if (isEmpty(inString) || isEmpty(charsToDelete)) { return inString; } StringBuilder out = new StringBuilder(); for (int i = 0; i < inString.length(); i++) { char c = inString.charAt(i); if (charsToDelete.indexOf(c) == -1) { out.append(c); } } return out.toString(); } public static String toString(Reader reader) throws IOException { if ( reader instanceof BufferedReader ) { return toString( (BufferedReader) reader ); } else { return toString( new BufferedReader( reader ) ); } } public static String toString(InputStream is) throws IOException { return toString( new BufferedReader(new InputStreamReader(is, "UTF-8") ) ); } public static String toString(BufferedReader reader) throws IOException { StringBuilder sb = new StringBuilder(); try { String line; boolean previousLine = false; while ((line = reader.readLine()) != null) { if ( previousLine ) { sb.append("\n"); } sb.append(line); previousLine = true; } } finally { reader.close(); } return sb.toString(); } }