/**
* Copyright 2010 JBoss Inc
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.drools.core.util;
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringWriter;
import java.io.Writer;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
/**
* Ripped form commons StringUtil, unless specified:
*
* <p>Operations on {@link java.lang.String} that are
* <code>null</code> safe.</p>
*
* <ul>
* <li><b>IsEmpty/IsBlank</b>
* - checks if a String contains text</li>
* <li><b>Trim/Strip</b>
* - removes leading and trailing whitespace</li>
* <li><b>Equals</b>
* - compares two strings null-safe</li>
* <li><b>IndexOf/LastIndexOf/Contains</b>
* - null-safe index-of checks
* <li><b>IndexOfAny/LastIndexOfAny/IndexOfAnyBut/LastIndexOfAnyBut</b>
* - index-of any of a set of Strings</li>
* <li><b>ContainsOnly/ContainsNone</b>
* - does String contains only/none of these characters</li>
* <li><b>Substring/Left/Right/Mid</b>
* - null-safe substring extractions</li>
* <li><b>SubstringBefore/SubstringAfter/SubstringBetween</b>
* - substring extraction relative to other strings</li>
* <li><b>Split/Join</b>
* - splits a String into an array of substrings and vice versa</li>
* <li><b>Remove/Delete</b>
* - removes part of a String</li>
* <li><b>Replace/Overlay</b>
* - Searches a String and replaces one String with another</li>
* <li><b>Chomp/Chop</b>
* - removes the last part of a String</li>
* <li><b>LeftPad/RightPad/Center/Repeat</b>
* - pads a String</li>
* <li><b>UpperCase/LowerCase/SwapCase/Capitalize/Uncapitalize</b>
* - changes the case of a String</li>
* <li><b>CountMatches</b>
* - counts the number of occurrences of one String in another</li>
* <li><b>IsAlpha/IsNumeric/IsWhitespace/IsAsciiPrintable</b>
* - checks the characters in a String</li>
* <li><b>DefaultString</b>
* - protects against a null input String</li>
* <li><b>Reverse/ReverseDelimited</b>
* - reverses a String</li>
* <li><b>Abbreviate</b>
* - abbreviates a string using ellipsis</li>
* <li><b>Difference</b>
* - compares two Strings and reports on their differences</li>
* <li><b>LevensteinDistance</b>
* - the number of changes needed to change one String into another</li>
* </ul>
*
* <p>The <code>StringUtils</code> class defines certain words related to
* String handling.</p>
*
* <ul>
* <li>null - <code>null</code></li>
* <li>empty - a zero-length string (<code>""</code>)</li>
* <li>space - the space character (<code>' '</code>, char 32)</li>
* <li>whitespace - the characters defined by {@link Character#isWhitespace(char)}</li>
* <li>trim - the characters <= 32 as in {@link String#trim()}</li>
* </ul>
*
* <p><code>StringUtils</code> handles <code>null</code> input Strings quietly.
* That is to say that a <code>null</code> input will return <code>null</code>.
* Where a <code>boolean</code> or <code>int</code> is being returned
* details vary by method.</p>
*
* <p>A side effect of the <code>null</code> handling is that a
* <code>NullPointerException</code> should be considered a bug in
* <code>StringUtils</code> (except for deprecated methods).</p>
*
* <p>Methods in this class give sample code to explain their operation.
* The symbol <code>*</code> is used to indicate any input including <code>null</code>.</p>
*
* @see java.lang.String
* @author <a href="http://jakarta.apache.org/turbine/">Apache Jakarta Turbine</a>
* @author <a href="mailto:jon@latchkey.com">Jon S. Stevens</a>
* @author <a href="mailto:dlr@finemaltcoding.com">Daniel Rall</a>
* @author <a href="mailto:gcoladonato@yahoo.com">Greg Coladonato</a>
* @author <a href="mailto:ed@apache.org">Ed Korthof</a>
* @author <a href="mailto:rand_mcneely@yahoo.com">Rand McNeely</a>
* @author Stephen Colebourne
* @author <a href="mailto:fredrik@westermarck.com">Fredrik Westermarck</a>
* @author Holger Krauth
* @author <a href="mailto:alex@purpletech.com">Alexander Day Chaffee</a>
* @author <a href="mailto:hps@intermeta.de">Henning P. Schmiedehausen</a>
* @author Arun Mammen Thomas
* @author Gary Gregory
* @author Phil Steitz
* @author Al Chou
* @author Michael Davey
* @author Reuben Sivan
* @author Chris Hyzer
* @since 1.0
* @version $Id$
*/
public class StringUtils {
/**
* An empty immutable <code>String</code> array.
*/
public static final String[] EMPTY_STRING_ARRAY = new String[0];
// Performance testing notes (JDK 1.4, Jul03, scolebourne)
// Whitespace:
// Character.isWhitespace() is faster than WHITESPACE.indexOf()
// where WHITESPACE is a string of all whitespace characters
//
// Character access:
// String.charAt(n) versus toCharArray(), then array[n]
// String.charAt(n) is about 15% worse for a 10K string
// They are about equal for a length 50 string
// String.charAt(n) is about 4 times better for a length 3 string
// String.charAt(n) is best bet overall
//
// Append:
// String.concat about twice as fast as StringBuilder.append
// (not sure who tested this)
/**
* The empty String <code>""</code>.
* @since 2.0
*/
public static final String EMPTY = "";
/**
* Represents a failed index search.
* @since 2.1
*/
public static final int INDEX_NOT_FOUND = -1;
/**
* <p>The maximum size to which the padding constant(s) can expand.</p>
*/
private static final int PAD_LIMIT = 8192;
/**
* <p><code>StringUtils</code> instances should NOT be constructed in
* standard programming. Instead, the class should be used as
* <code>StringUtils.trim(" foo ");</code>.</p>
*
* <p>This constructor is public to permit tools that require a JavaBean
* instance to operate.</p>
*/
public StringUtils() {
super();
}
public static String ucFirst(final String name) {
return name.toUpperCase().charAt( 0 ) + name.substring( 1 );
}
// Empty checks
//-----------------------------------------------------------------------
/**
* <p>Checks if a String is empty ("") or null.</p>
*
* <pre>
* StringUtils.isEmpty(null) = true
* StringUtils.isEmpty("") = true
* StringUtils.isEmpty(" ") = false
* StringUtils.isEmpty("bob") = false
* StringUtils.isEmpty(" bob ") = false
* </pre>
*
* <p>NOTE: This method changed in Lang version 2.0.
* It no longer trims the String.
* That functionality is available in isBlank().</p>
*
* @param str the String to check, may be null
* @return <code>true</code> if the String is empty or null
*/
public static boolean isEmpty(final CharSequence str) {
if ( str == null || str.length() == 0 ) {
return true;
}
for ( int i = 0, length = str.length(); i < length; i++ ){
if ( str.charAt( i ) != ' ' ) {
return false;
}
}
return true;
}
// Padding
//-----------------------------------------------------------------------
/**
* <p>Repeat a String <code>repeat</code> times to form a
* new String.</p>
*
* <pre>
* StringUtils.repeat(null, 2) = null
* StringUtils.repeat("", 0) = ""
* StringUtils.repeat("", 2) = ""
* StringUtils.repeat("a", 3) = "aaa"
* StringUtils.repeat("ab", 2) = "abab"
* StringUtils.repeat("a", -2) = ""
* </pre>
*
* @param str the String to repeat, may be null
* @param repeat number of times to repeat str, negative treated as zero
* @return a new String consisting of the original String repeated,
* <code>null</code> if null String input
*/
public static String repeat(final String str,
final int repeat) {
// Performance tuned for 2.0 (JDK1.4)
if ( str == null ) {
return null;
}
if ( repeat <= 0 ) {
return EMPTY;
}
final int inputLength = str.length();
if ( repeat == 1 || inputLength == 0 ) {
return str;
}
if ( inputLength == 1 && repeat <= PAD_LIMIT ) {
return padding( repeat,
str.charAt( 0 ) );
}
final int outputLength = inputLength * repeat;
switch ( inputLength ) {
case 1 :
final char ch = str.charAt( 0 );
final char[] output1 = new char[outputLength];
for ( int i = repeat - 1; i >= 0; i-- ) {
output1[i] = ch;
}
return new String( output1 );
case 2 :
final char ch0 = str.charAt( 0 );
final char ch1 = str.charAt( 1 );
final char[] output2 = new char[outputLength];
for ( int i = repeat * 2 - 2; i >= 0; i--, i-- ) {
output2[i] = ch0;
output2[i + 1] = ch1;
}
return new String( output2 );
default :
final StringBuilder buf = new StringBuilder( outputLength );
for ( int i = 0; i < repeat; i++ ) {
buf.append( str );
}
return buf.toString();
}
}
// Splitting
//-----------------------------------------------------------------------
/**
* <p>Splits the provided text into an array, using whitespace as the
* separator.
* Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as one separator.
* For more control over the split use the StrTokenizer class.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.</p>
*
* <pre>
* StringUtils.split(null) = null
* StringUtils.split("") = []
* StringUtils.split("abc def") = ["abc", "def"]
* StringUtils.split("abc def") = ["abc", "def"]
* StringUtils.split(" abc ") = ["abc"]
* </pre>
*
* @param str the String to parse, may be null
* @return an array of parsed Strings, <code>null</code> if null String input
*/
public static String[] split(String str) {
return split( str,
null,
-1 );
}
/**
* <p>Splits the provided text into an array, separator specified.
* This is an alternative to using StringTokenizer.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as one separator.
* For more control over the split use the StrTokenizer class.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.</p>
*
* <pre>
* StringUtils.split(null, *) = null
* StringUtils.split("", *) = []
* StringUtils.split("a.b.c", '.') = ["a", "b", "c"]
* StringUtils.split("a..b.c", '.') = ["a", "b", "c"]
* StringUtils.split("a:b:c", '.') = ["a:b:c"]
* StringUtils.split("a\tb\nc", null) = ["a", "b", "c"]
* StringUtils.split("a b c", ' ') = ["a", "b", "c"]
* </pre>
*
* @param str the String to parse, may be null
* @param separatorChar the character used as the delimiter,
* <code>null</code> splits on whitespace
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.0
*/
public static String[] split(String str,
char separatorChar) {
return splitWorker( str,
separatorChar,
false );
}
/**
* <p>Splits the provided text into an array, separators specified.
* This is an alternative to using StringTokenizer.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as one separator.
* For more control over the split use the StrTokenizer class.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
*
* <pre>
* StringUtils.split(null, *) = null
* StringUtils.split("", *) = []
* StringUtils.split("abc def", null) = ["abc", "def"]
* StringUtils.split("abc def", " ") = ["abc", "def"]
* StringUtils.split("abc def", " ") = ["abc", "def"]
* StringUtils.split("ab:cd:ef", ":") = ["ab", "cd", "ef"]
* </pre>
*
* @param str the String to parse, may be null
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @return an array of parsed Strings, <code>null</code> if null String input
*/
public static String[] split(String str,
String separatorChars) {
return splitWorker( str,
separatorChars,
-1,
false );
}
/**
* <p>Splits the provided text into an array with a maximum length,
* separators specified.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as one separator.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
*
* <p>If more than <code>min</code> delimited substrings are found, the last
* returned string includes all characters after the first <code>min - 1</code>
* returned strings (including separator characters).</p>
*
* <pre>
* StringUtils.split(null, *, *) = null
* StringUtils.split("", *, *) = []
* StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.split("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.split("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
* StringUtils.split("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
* </pre>
*
* @param str the String to parse, may be null
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @param min the maximum number of elements to include in the
* array. A zero or negative value implies no limit
* @return an array of parsed Strings, <code>null</code> if null String input
*/
public static String[] split(String str,
String separatorChars,
int max) {
return splitWorker( str,
separatorChars,
max,
false );
}
//-----------------------------------------------------------------------
/**
* <p>Splits the provided text into an array, using whitespace as the
* separator, preserving all tokens, including empty tokens created by
* adjacent separators. This is an alternative to using StringTokenizer.
* Whitespace is defined by {@link Character#isWhitespace(char)}.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.</p>
*
* <pre>
* StringUtils.splitPreserveAllTokens(null) = null
* StringUtils.splitPreserveAllTokens("") = []
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def") = ["abc", "", "def"]
* StringUtils.splitPreserveAllTokens(" abc ") = ["", "abc", ""]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(final String str) {
return splitWorker( str,
null,
-1,
true );
}
/**
* <p>Splits the provided text into an array, separator specified,
* preserving all tokens, including empty tokens created by adjacent
* separators. This is an alternative to using StringTokenizer.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.</p>
*
* <pre>
* StringUtils.splitPreserveAllTokens(null, *) = null
* StringUtils.splitPreserveAllTokens("", *) = []
* StringUtils.splitPreserveAllTokens("a.b.c", '.') = ["a", "b", "c"]
* StringUtils.splitPreserveAllTokens("a..b.c", '.') = ["a", "", "b", "c"]
* StringUtils.splitPreserveAllTokens("a:b:c", '.') = ["a:b:c"]
* StringUtils.splitPreserveAllTokens("a\tb\nc", null) = ["a", "b", "c"]
* StringUtils.splitPreserveAllTokens("a b c", ' ') = ["a", "b", "c"]
* StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", ""]
* StringUtils.splitPreserveAllTokens("a b c ", ' ') = ["a", "b", "c", "", ""]
* StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", a", "b", "c"]
* StringUtils.splitPreserveAllTokens(" a b c", ' ') = ["", "", a", "b", "c"]
* StringUtils.splitPreserveAllTokens(" a b c ", ' ') = ["", a", "b", "c", ""]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChar the character used as the delimiter,
* <code>null</code> splits on whitespace
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(final String str,
final char separatorChar) {
return splitWorker( str,
separatorChar,
true );
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that do not return a
* maximum array length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChar the separate character
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
*/
private static String[] splitWorker(final String str,
final char separatorChar,
final boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
if ( str == null ) {
return null;
}
final int len = str.length();
if ( len == 0 ) {
return EMPTY_STRING_ARRAY;
}
final List list = new ArrayList();
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
while ( i < len ) {
if ( str.charAt( i ) == separatorChar ) {
if ( match || preserveAllTokens ) {
list.add( str.substring( start,
i ) );
match = false;
lastMatch = true;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
if ( match || (preserveAllTokens && lastMatch) ) {
list.add( str.substring( start,
i ) );
}
return (String[]) list.toArray( new String[list.size()] );
}
/**
* <p>Splits the provided text into an array, separators specified,
* preserving all tokens, including empty tokens created by adjacent
* separators. This is an alternative to using StringTokenizer.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* For more control over the split use the StrTokenizer class.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
*
* <pre>
* StringUtils.splitPreserveAllTokens(null, *) = null
* StringUtils.splitPreserveAllTokens("", *) = []
* StringUtils.splitPreserveAllTokens("abc def", null) = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "def"]
* StringUtils.splitPreserveAllTokens("abc def", " ") = ["abc", "", def"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":") = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef:", ":") = ["ab", "cd", "ef", ""]
* StringUtils.splitPreserveAllTokens("ab:cd:ef::", ":") = ["ab", "cd", "ef", "", ""]
* StringUtils.splitPreserveAllTokens("ab::cd:ef", ":") = ["ab", "", cd", "ef"]
* StringUtils.splitPreserveAllTokens(":cd:ef", ":") = ["", cd", "ef"]
* StringUtils.splitPreserveAllTokens("::cd:ef", ":") = ["", "", cd", "ef"]
* StringUtils.splitPreserveAllTokens(":cd:ef:", ":") = ["", cd", "ef", ""]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(final String str,
final String separatorChars) {
return splitWorker( str,
separatorChars,
-1,
true );
}
/**
* <p>Splits the provided text into an array with a maximum length,
* separators specified, preserving all tokens, including empty tokens
* created by adjacent separators.</p>
*
* <p>The separator is not included in the returned String array.
* Adjacent separators are treated as separators for empty tokens.
* Adjacent separators are treated as one separator.</p>
*
* <p>A <code>null</code> input String returns <code>null</code>.
* A <code>null</code> separatorChars splits on whitespace.</p>
*
* <p>If more than <code>min</code> delimited substrings are found, the last
* returned string includes all characters after the first <code>min - 1</code>
* returned strings (including separator characters).</p>
*
* <pre>
* StringUtils.splitPreserveAllTokens(null, *, *) = null
* StringUtils.splitPreserveAllTokens("", *, *) = []
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 0) = ["ab", "cd", "ef"]
* StringUtils.splitPreserveAllTokens("ab:cd:ef", ":", 2) = ["ab", "cd:ef"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 2) = ["ab", " de fg"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 3) = ["ab", "", " de fg"]
* StringUtils.splitPreserveAllTokens("ab de fg", null, 4) = ["ab", "", "", "de fg"]
* </pre>
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the characters used as the delimiters,
* <code>null</code> splits on whitespace
* @param min the maximum number of elements to include in the
* array. A zero or negative value implies no limit
* @return an array of parsed Strings, <code>null</code> if null String input
* @since 2.1
*/
public static String[] splitPreserveAllTokens(final String str,
final String separatorChars,
final int max) {
return splitWorker( str,
separatorChars,
max,
true );
}
/**
* Performs the logic for the <code>split</code> and
* <code>splitPreserveAllTokens</code> methods that return a maximum array
* length.
*
* @param str the String to parse, may be <code>null</code>
* @param separatorChars the separate character
* @param min the maximum number of elements to include in the
* array. A zero or negative value implies no limit.
* @param preserveAllTokens if <code>true</code>, adjacent separators are
* treated as empty token separators; if <code>false</code>, adjacent
* separators are treated as one separator.
* @return an array of parsed Strings, <code>null</code> if null String input
*/
private static String[] splitWorker(final String str,
final String separatorChars,
final int max,
final boolean preserveAllTokens) {
// Performance tuned for 2.0 (JDK1.4)
// Direct code is quicker than StringTokenizer.
// Also, StringTokenizer uses isSpace() not isWhitespace()
if ( str == null ) {
return null;
}
final int len = str.length();
if ( len == 0 ) {
return EMPTY_STRING_ARRAY;
}
final List list = new ArrayList();
int sizePlus1 = 1;
int i = 0, start = 0;
boolean match = false;
boolean lastMatch = false;
if ( separatorChars == null ) {
// Null separator means use whitespace
while ( i < len ) {
if ( Character.isWhitespace( str.charAt( i ) ) ) {
if ( match || preserveAllTokens ) {
lastMatch = true;
if ( sizePlus1++ == max ) {
i = len;
lastMatch = false;
}
list.add( str.substring( start,
i ) );
match = false;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
} else if ( separatorChars.length() == 1 ) {
// Optimise 1 character case
final char sep = separatorChars.charAt( 0 );
while ( i < len ) {
if ( str.charAt( i ) == sep ) {
if ( match || preserveAllTokens ) {
lastMatch = true;
if ( sizePlus1++ == max ) {
i = len;
lastMatch = false;
}
list.add( str.substring( start,
i ) );
match = false;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
} else {
// standard case
while ( i < len ) {
if ( separatorChars.indexOf( str.charAt( i ) ) >= 0 ) {
if ( match || preserveAllTokens ) {
lastMatch = true;
if ( sizePlus1++ == max ) {
i = len;
lastMatch = false;
}
list.add( str.substring( start,
i ) );
match = false;
}
start = ++i;
continue;
} else {
lastMatch = false;
}
match = true;
i++;
}
}
if ( match || (preserveAllTokens && lastMatch) ) {
list.add( str.substring( start,
i ) );
}
return (String[]) list.toArray( new String[list.size()] );
}
/**
* <p>Returns padding using the specified delimiter repeated
* to a given length.</p>
*
* <pre>
* StringUtils.padding(0, 'e') = ""
* StringUtils.padding(3, 'e') = "eee"
* StringUtils.padding(-2, 'e') = IndexOutOfBoundsException
* </pre>
*
* <p>Note: this method doesn't not support padding with
* <a href="http://www.unicode.org/glossary/#supplementary_character">Unicode Supplementary Characters</a>
* as they require a pair of <code>char</code>s to be represented.
* If you are needing to support full I18N of your applications
* consider using {@link #repeat(String, int)} instead.
* </p>
*
* @param repeat number of times to repeat delim
* @param padChar character to repeat
* @return String with repeated character
* @throws IndexOutOfBoundsException if <code>repeat < 0</code>
* @see #repeat(String, int)
*/
public static String padding(final int repeat,
final char padChar) throws IndexOutOfBoundsException {
if ( repeat < 0 ) {
throw new IndexOutOfBoundsException( "Cannot pad a negative amount: " + repeat );
}
final char[] buf = new char[repeat];
for ( int i = 0; i < buf.length; i++ ) {
buf[i] = padChar;
}
return new String( buf );
}
/** @param filePath the name of the file to open. Not sure if it can accept URLs or just filenames. Path handling could be better, and buffer sizes are hardcoded
*/
public static String readFileAsString(Reader reader) {
try {
StringBuilder fileData = new StringBuilder( 1000 );
char[] buf = new char[1024];
int numRead = 0;
while ( (numRead = reader.read( buf )) != -1 ) {
String readData = String.valueOf( buf,
0,
numRead );
fileData.append( readData );
buf = new char[1024];
}
reader.close();
return fileData.toString();
} catch ( IOException e ) {
throw new RuntimeException( e );
}
}
/**
* <p>Unescapes any Java literals found in the <code>String</code>.
* For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
*
* @param str the <code>String</code> to unescape, may be null
* @return a new unescaped <code>String</code>, <code>null</code> if null string input
*/
public static String unescapeJava(String str) {
if (str == null) {
return null;
}
try {
StringWriter writer = new StringWriter(str.length());
unescapeJava(writer, str);
return writer.toString();
} catch (IOException ioe) {
// this should never ever happen while writing to a StringWriter
throw new RuntimeException(ioe);
}
}
/**
* <p>Unescapes any Java literals found in the <code>String</code> to a
* <code>Writer</code>.</p>
*
* <p>For example, it will turn a sequence of <code>'\'</code> and
* <code>'n'</code> into a newline character, unless the <code>'\'</code>
* is preceded by another <code>'\'</code>.</p>
*
* <p>A <code>null</code> string input has no effect.</p>
*
* @param out the <code>Writer</code> used to output unescaped characters
* @param str the <code>String</code> to unescape, may be null
* @throws IllegalArgumentException if the Writer is <code>null</code>
* @throws IOException if error occurs on underlying Writer
*/
public static void unescapeJava(Writer out, String str) throws IOException {
if (out == null) {
throw new IllegalArgumentException("The Writer must not be null");
}
if (str == null) {
return;
}
int sz = str.length();
StringBuilder unicode = new StringBuilder(4);
boolean hadSlash = false;
boolean inUnicode = false;
for (int i = 0; i < sz; i++) {
char ch = str.charAt(i);
if (inUnicode) {
// if in unicode, then we're reading unicode
// values in somehow
unicode.append(ch);
if (unicode.length() == 4) {
// unicode now contains the four hex digits
// which represents our unicode character
try {
int value = Integer.parseInt(unicode.toString(), 16);
out.write((char) value);
unicode.setLength(0);
inUnicode = false;
hadSlash = false;
} catch (NumberFormatException nfe) {
throw new RuntimeException("Unable to parse unicode value: " + unicode, nfe);
}
}
continue;
}
if (hadSlash) {
// handle an escaped value
hadSlash = false;
switch (ch) {
case '\\':
out.write('\\');
break;
case '\'':
out.write('\'');
break;
case '\"':
out.write('"');
break;
case 'r':
out.write('\r');
break;
case 'f':
out.write('\f');
break;
case 't':
out.write('\t');
break;
case 'n':
out.write('\n');
break;
case 'b':
out.write('\b');
break;
case 'u':
{
// uh-oh, we're in unicode country....
inUnicode = true;
break;
}
default :
out.write(ch);
break;
}
continue;
} else if (ch == '\\') {
hadSlash = true;
continue;
}
out.write(ch);
}
if (hadSlash) {
// then we're in the weird case of a \ at the end of the
// string, let's output it anyway.
out.write('\\');
}
}
/**
* Apply the given relative path to the given path,
* assuming standard Java folder separation (i.e. "/" separators);
* @param path the path to start from (usually a full file path)
* @param relativePath the relative path to apply
* (relative to the full file path above)
* @return the full file path that results from applying the relative path
*/
public static String applyRelativePath(String path, String relativePath) {
int separatorIndex = path.lastIndexOf(FOLDER_SEPARATOR);
if (separatorIndex != -1) {
String newPath = path.substring(0, separatorIndex);
if (!relativePath.startsWith(FOLDER_SEPARATOR)) {
newPath += FOLDER_SEPARATOR;
}
return newPath + relativePath;
}
else {
return relativePath;
}
}
private static final String FOLDER_SEPARATOR = "/";
private static final String WINDOWS_FOLDER_SEPARATOR = "\\";
private static final String TOP_PATH = "..";
private static final String CURRENT_PATH = ".";
private static final char EXTENSION_SEPARATOR = '.';
/**
* Normalize the path by suppressing sequences like "path/.." and
* inner simple dots.
* <p>The result is convenient for path comparison. For other uses,
* notice that Windows separators ("\") are replaced by simple slashes.
* @param path the original path
* @return the normalized path
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String cleanPath(String path) {
if (path == null) {
return null;
}
String pathToUse = replace(path, WINDOWS_FOLDER_SEPARATOR, FOLDER_SEPARATOR);
// Strip prefix from path to analyze, to not treat it as part of the
// first path element. This is necessary to correctly parse paths like
// "file:core/../core/io/Resource.class", where the ".." should just
// strip the first "core" directory while keeping the "file:" prefix.
int prefixIndex = pathToUse.indexOf(":");
String prefix = "";
if (prefixIndex != -1) {
prefix = pathToUse.substring(0, prefixIndex + 1);
pathToUse = pathToUse.substring(prefixIndex + 1);
}
if (pathToUse.startsWith(FOLDER_SEPARATOR)) {
prefix = prefix + FOLDER_SEPARATOR;
pathToUse = pathToUse.substring(1);
}
String[] pathArray = delimitedListToStringArray(pathToUse, FOLDER_SEPARATOR);
List pathElements = new LinkedList();
int tops = 0;
for (int i = pathArray.length - 1; i >= 0; i--) {
String element = pathArray[i];
if (CURRENT_PATH.equals(element)) {
// Points to current directory - drop it.
}
else if (TOP_PATH.equals(element)) {
// Registering top path found.
tops++;
}
else {
if (tops > 0) {
// Merging path element with element corresponding to top path.
tops--;
}
else {
// Normal path element found.
pathElements.add(0, element);
}
}
}
// Remaining top paths need to be retained.
for (int i = 0; i < tops; i++) {
pathElements.add(0, TOP_PATH);
}
return prefix + collectionToDelimitedString(pathElements, FOLDER_SEPARATOR);
}
/**
* Convenience method to return a Collection as a delimited (e.g. CSV)
* String. E.g. useful for <code>toString()</code> implementations.
* @param coll the Collection to display
* @param delim the delimiter to use (probably a ",")
* @param prefix the String to start each element with
* @param suffix the String to end each element with
* @return the delimited String
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String collectionToDelimitedString(Collection coll, String delim, String prefix, String suffix) {
if (coll == null || coll.isEmpty()) {
return "";
}
StringBuilder sb = new StringBuilder();
Iterator it = coll.iterator();
while (it.hasNext()) {
sb.append(prefix).append(it.next()).append(suffix);
if (it.hasNext()) {
sb.append(delim);
}
}
return sb.toString();
}
/**
* Convenience method to return a Collection as a delimited (e.g. CSV)
* String. E.g. useful for <code>toString()</code> implementations.
* @param coll the Collection to display
* @param delim the delimiter to use (probably a ",")
* @return the delimited String
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String collectionToDelimitedString(Collection coll, String delim) {
return collectionToDelimitedString(coll, delim, "", "");
}
/**
* Replace all occurences of a substring within a string with
* another string.
* @param inString String to examine
* @param oldPattern String to replace
* @param newPattern String to insert
* @return a String with the replacements
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String replace(String inString, String oldPattern, String newPattern) {
if (isEmpty(inString) || isEmpty(oldPattern) || newPattern == null) {
return inString;
}
StringBuilder sbuf = new StringBuilder();
// output StringBuilder we'll build up
int pos = 0; // our position in the old string
int index = inString.indexOf(oldPattern);
// the index of an occurrence we've found, or -1
int patLen = oldPattern.length();
while (index >= 0) {
sbuf.append(inString.substring(pos, index));
sbuf.append(newPattern);
pos = index + patLen;
index = inString.indexOf(oldPattern, pos);
}
sbuf.append(inString.substring(pos));
// remember to append any characters to the right of a match
return sbuf.toString();
}
public static URI toURI(String location) throws URISyntaxException {
return new URI( StringUtils.replace( location,
" ",
"%20" ) );
}
/**
* Take a String which is a delimited list and convert it to a String array.
* <p>A single delimiter can consists of more than one character: It will still
* be considered as single delimiter string, rather than as bunch of potential
* delimiter characters - in contrast to <code>tokenizeToStringArray</code>.
* @param str the input String
* @param delimiter the delimiter between elements (this is a single delimiter,
* rather than a bunch individual delimiter characters)
* @return an array of the tokens in the list
* @see #tokenizeToStringArray
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String[] delimitedListToStringArray(String str, String delimiter) {
return delimitedListToStringArray(str, delimiter, null);
}
/**
* Take a String which is a delimited list and convert it to a String array.
* <p>A single delimiter can consists of more than one character: It will still
* be considered as single delimiter string, rather than as bunch of potential
* delimiter characters - in contrast to <code>tokenizeToStringArray</code>.
* @param str the input String
* @param delimiter the delimiter between elements (this is a single delimiter,
* rather than a bunch individual delimiter characters)
* @param charsToDelete a set of characters to delete. Useful for deleting unwanted
* line breaks: e.g. "\r\n\f" will delete all new lines and line feeds in a String.
* @return an array of the tokens in the list
* @see #tokenizeToStringArray
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String[] delimitedListToStringArray(String str, String delimiter, String charsToDelete) {
if (str == null) {
return new String[0];
}
if (delimiter == null) {
return new String[] {str};
}
List result = new ArrayList();
if ("".equals(delimiter)) {
for (int i = 0; i < str.length(); i++) {
result.add(deleteAny(str.substring(i, i + 1), charsToDelete));
}
}
else {
int pos = 0;
int delPos = 0;
while ((delPos = str.indexOf(delimiter, pos)) != -1) {
result.add(deleteAny(str.substring(pos, delPos), charsToDelete));
pos = delPos + delimiter.length();
}
if (str.length() > 0 && pos <= str.length()) {
// Add rest of String, but not in case of empty input.
result.add(deleteAny(str.substring(pos), charsToDelete));
}
}
return toStringArray(result);
}
/**
* Copy the given Collection into a String array.
* The Collection must contain String elements only.
* @param collection the Collection to copy
* @return the String array (<code>null</code> if the passed-in
* Collection was <code>null</code>)
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String[] toStringArray(Collection collection) {
if (collection == null) {
return null;
}
return (String[]) collection.toArray(new String[collection.size()]);
}
/**
* Delete any character in a given String.
* @param inString the original String
* @param charsToDelete a set of characters to delete.
* E.g. "az\n" will delete 'a's, 'z's and new lines.
* @return the resulting String
*
* Borrowed from Spring, under the ASL2.0 license.
*/
public static String deleteAny(String inString, String charsToDelete) {
if (isEmpty(inString) || isEmpty(charsToDelete)) {
return inString;
}
StringBuilder out = new StringBuilder();
for (int i = 0; i < inString.length(); i++) {
char c = inString.charAt(i);
if (charsToDelete.indexOf(c) == -1) {
out.append(c);
}
}
return out.toString();
}
public static String toString(Reader reader) throws IOException {
if ( reader instanceof BufferedReader ) {
return toString( (BufferedReader) reader );
} else {
return toString( new BufferedReader( reader ) );
}
}
public static String toString(InputStream is) throws IOException {
return toString( new BufferedReader(new InputStreamReader(is, "UTF-8") ) );
}
public static String toString(BufferedReader reader) throws IOException {
StringBuilder sb = new StringBuilder();
try {
String line;
boolean previousLine = false;
while ((line = reader.readLine()) != null) {
if ( previousLine ) {
sb.append("\n");
}
sb.append(line);
previousLine = true;
}
} finally {
reader.close();
}
return sb.toString();
}
}