/*
* Rapid Beans Framework: StringHelper.java
*
* Copyright (C) 2009 Martin Bluemel
*
* Creation Date: 11/09/2005
*
* This program is free software; you can redistribute it and/or modify it under the terms of the
* GNU Lesser General Public License as published by the Free Software Foundation;
* either version 3 of the License, or (at your option) any later version.
* This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
* See the GNU Lesser General Public License for more details.
* You should have received a copies of the GNU Lesser General Public License and the
* GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>.
*/
package org.rapidbeans.core.util;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import org.rapidbeans.core.exception.RapidBeansRuntimeException;
import org.rapidbeans.core.exception.UtilException;
/**
* Helper class for string processing.
*
* @author Martin Bluemel
*/
public final class StringHelper {
/**
* Convert the first character of the given string to upper cases.
*
* @param string
* the string to convert
*
* @return the converted string
*/
public static String upperFirstCharacter(final String string) {
final char[] ca = string.toCharArray();
ca[0] = Character.toUpperCase(ca[0]);
return new String(ca);
}
public enum StripMode {
/**
* strip leading and trailing characters.
*/
both,
/**
* strip only leading characters
*/
leading,
/**
* strip only trailing characters
*/
trailing,
}
private static final char[] WHITESPACE_CHARACTERS = { ' ', '\n', '\t' };
private static final String WHITESPACE_CHARACTER_STRING = new String(WHITESPACE_CHARACTERS);
/**
* Check if the given string contains only digits or not.
*
* @param string
* the string that will be checked
*
* @return true if the given string contains only digits,<br/>
* otherwise false.
*/
public static boolean isDigitsOnly(final String string) {
final int length = string.length();
for (int i = 0; i < length; i++) {
final char c = string.charAt(i);
if (c < '0' || c > '9') {
return false;
}
}
return true;
}
/**
* Implements a more powerful mechanism to remove or filter leading and
* trailing characters than String.trim().<br/>
* Instead of simply removing whitespace characters you can specify an
* arbitrary character to be removed.<br/>
* This variant removes the specified character from both sides.
*
* @param string
* the string to strip
* @param stripCharacter
* specifies the character to remove or filter.
*
* @return the filtered string
*/
public static String strip(final String string, final char stripCharacter) {
return strip(string, new char[] { stripCharacter }, StripMode.both);
}
/**
* Implements a more powerful mechanism to remove or filter leading and
* trailing characters than String.trim().<br/>
* Instead of simply removing whitespace characters you can specify
* arbitrary characters to be removed.<br/>
* This variant removes the specified characters from both sides.
*
* @param string
* the string to strip
* @param stripCharacters
* specifies a list of characters to remove or filter.
*
* @return the filtered string
*/
public static String strip(final String string, final char[] stripCharacters) {
return strip(string, stripCharacters, StripMode.both);
}
/**
* Implements a more powerful mechanism to remove or filter leading and
* trailing whitespace characters than String.trim().<br/>
* Additionally to simply removing whitespace on both string ends you can
* specify if only leading or trailing characters are removed.
*
* @param string
* the string to strip
* @param stripMode
* specifies if only leading or trailing characters are removed
* or if removal happens on both sides.
*
* @return the filtered string
*/
public static String strip(final String string, final StripMode mode) {
return strip(string, WHITESPACE_CHARACTERS, mode);
}
/**
* Implements a more powerful mechanism to remove or filter leading and
* trailing characters than String.trim().<br/>
* Instead of simply removing whitespace can specify a character to be
* removed.<br/>
* Additionally you can specify the characters are removed from both sides
* or if only leading or trailing characters are removed.
*
* @param string
* the string to strip
* @param stripCharacters
* specifies a list of characters to remove or filter.
* @param stripMode
* specifies if only leading or trailing characters are removed
* or if removal happens on both sides.
*
* @return the filtered string
*/
public static String strip(final String string, final char trimChar, final StripMode mode) {
return strip(string, new char[] { trimChar }, mode);
}
/**
* Implements a more powerful mechanism to remove or filter leading and
* trailing characters than String.trim().<br/>
* Instead of simply removing whitespace can specify the characters to be
* removed.<br/>
* Additionally you can specify the characters are removed from both sides
* or if only leading or trailing characters are removed.
*
* @param string
* the string to strip
* @param stripCharacters
* specifies a list of characters to remove or filter.
* @param stripMode
* specifies if only leading or trailing characters are removed
* or if removal happens on both sides.
*
* @return the filtered string
*/
public static String strip(final String string, final char[] trimChars, final StripMode mode) {
final int len = string.length();
int firstNonStrippedIndex = 0;
int lastNonStrippedIndex = len;
if (mode == StripMode.both || mode == StripMode.leading) {
firstNonStrippedIndex = -1;
while (firstNonStrippedIndex < len && charMatches(trimChars, string.charAt(++firstNonStrippedIndex)))
;
}
if (mode == StripMode.both || mode == StripMode.trailing) {
lastNonStrippedIndex = len;
while (lastNonStrippedIndex >= firstNonStrippedIndex
&& charMatches(trimChars, string.charAt(--lastNonStrippedIndex)))
;
lastNonStrippedIndex++;
}
String s = string;
if (firstNonStrippedIndex > 0 || lastNonStrippedIndex < len) {
s = string.substring(firstNonStrippedIndex, lastNonStrippedIndex);
}
return s;
}
public enum FillMode {
/**
* fill up the left side.
*/
left,
/**
* fill up the right side
*/
right,
}
/**
* A primitive helper for formatting strings by simply filling it up on the
* left or on the right side.
*
* @param string
* the string
* @param length
* the length to fill the string up to
* @param fillChar
* the fill character
* @param mode
* FILL_MODE_LEFT or FILL_MODE_RIGHT
*
* @return the string filled up to the specified length
*/
public static String fillUp(final String string, final int length, final char fillChar, final FillMode mode) {
final int charsToFillCount = length - string.length();
if (charsToFillCount <= 0) {
return string;
}
final StringBuffer buf = new StringBuffer(length);
if (mode == FillMode.left) {
buf.append(newCharArray(charsToFillCount, fillChar));
}
buf.append(string);
if (mode == FillMode.right) {
buf.append(newCharArray(charsToFillCount, fillChar));
}
return buf.toString();
}
private static char[] newCharArray(final int charsToFillCount, final char fillChar) {
final char[] ca = new char[charsToFillCount];
Arrays.fill(ca, fillChar);
return ca;
}
/**
* the pattern to build String arrays.
*/
private static final String[] EMPTY_STRING_ARRAY = {};
/**
* Splits a string into tokens using any whitespace character a delimiter.
*
* @param string
* the string to split.
*
* @return a list containing all tokens
*/
public static List<String> split(final String string) {
return split(string, WHITESPACE_CHARACTER_STRING);
}
/**
* Splits a string into tokens using one or multiple delimiter characters.
*
* @param string
* the string to spit.
* @param delimChars
* a string containing all delimiter characters
*
* @return a list containing all tokens
*/
public static List<String> split(final String string, final String delimChars) {
final ArrayList<String> list = new ArrayList<String>();
final StringTokenizer tokenizer = new StringTokenizer(string, delimChars);
while (tokenizer.hasMoreTokens()) {
list.add(tokenizer.nextToken());
}
return list;
}
/**
* Split the first token from a give string using any whitespace character
* as delimiter.
*
* @param string
* the string to split
*
* @return the first token or null if no token has been found
*/
public static String splitFirst(final String string) {
return splitFirst(string, WHITESPACE_CHARACTER_STRING);
}
/**
* Split the first token from a give string using one or multiple delimiter
* characters given in a string.
*
* @param string
* the string to split
* @param delimiterChars
* the delimiter characters
*
* @return the first token or null if no token has been found
*/
public static String splitFirst(final String string, final String delimChars) {
final StringTokenizer tokenizer = new StringTokenizer(string, delimChars);
if (tokenizer.hasMoreTokens()) {
return tokenizer.nextToken();
} else {
return null;
}
}
/**
* Split the last token from a give string using any whitespace character as
* delimiter.
*
* @param string
* the string to split
*
* @return the last token or null if no token has been found
*/
public static String splitLast(final String string) {
return splitLast(string, WHITESPACE_CHARACTER_STRING);
}
/**
* Split the last token from a give string using one or multiple delimiter
* characters given in a string.
*
* @param string
* the string to split
* @param delimiterChars
* the delimiter characters
*
* @return the last token or null if no token has been found
*/
public static String splitLast(final String string, final String delimChars) {
String lastToken = null;
final StringTokenizer tokenizer = new StringTokenizer(string, delimChars);
while (tokenizer.hasMoreTokens()) {
lastToken = tokenizer.nextToken(delimChars);
}
return lastToken;
}
/**
* Split all before the last token from a given string using one delimiter
* character in a string. The delimiter tokens in between will be simply
* returned.
*
* @param string
* the string to split
* @param delimiterChars
* the delimiter characters
*
* @return all tokens besides the last or null if no token has been found
*/
public static String splitBeforeLast(final String string, final String delimChars) {
final char[] delimCa = delimChars.toCharArray();
final int len = string.length();
int state = 0;
int pos1 = -1;
int pos2 = -1;
int pos3 = -1;
for (int i = len - 1; pos3 == -1 && i >= 0; i--) {
final char c = string.charAt(i);
switch (state) {
case 0:
if (!charMatches(delimCa, c)) {
pos1 = i + 1;
state = 1;
}
break;
case 1:
if (charMatches(delimCa, c)) {
pos2 = i + 1;
state = 2;
}
break;
case 2:
if (!charMatches(delimCa, c)) {
pos3 = i + 1;
}
break;
}
}
switch (state) {
case 0:
return "";
case 1:
return string.substring(0, pos1);
default:
if (pos3 > -1) {
return string.substring(0, pos3);
} else {
return string.substring(pos2, pos1);
}
}
}
/**
* A convenient method to split a string by any whitespace character but
* leave quoted substring together.
*
* @param string
* the string that will be split
*
* @return an array with substring split
*/
public static String[] splitQuoted(final String string) {
final List<String> list = new ArrayList<String>();
final StringBuffer buffer = new StringBuffer();
int state = 0;
final int len = string.length();
for (int i = 0; i < len; i++) {
final char c = string.charAt(i);
switch (state) {
// between token
case 0:
switch (c) {
case ' ':
case '\t':
case '\n':
// state stays 0
break;
case '"':
state = 2;
break;
default:
buffer.append(c);
state = 1;
}
break;
// within unquoted token
case 1:
switch (c) {
case ' ':
case '\t':
case '\n':
list.add(buffer.toString());
buffer.setLength(0);
state = 0;
break;
case '"':
list.add(buffer.toString());
buffer.setLength(0);
state = 2;
break;
default:
buffer.append(c);
state = 1;
}
break;
// within quoted token
case 2:
switch (c) {
case '\\':
state = 3;
break;
case '"':
list.add(buffer.toString());
buffer.setLength(0);
state = 0;
break;
default:
buffer.append(c);
break;
}
break;
// within quoted token after \
case 3:
switch (c) {
case '\\':
buffer.append('\\');
state = 2;
break;
case '"':
buffer.append('"');
state = 2;
break;
default:
buffer.append(c);
state = 2;
break;
}
break;
default:
throw new UtilException("wrong state " + state);
}
}
switch (state) {
case 0:
break;
case 1:
list.add(buffer.toString());
break;
case 2:
throw new UtilException("Missing qouote at the end of string\"" + string + "\"");
}
return list.toArray(EMPTY_STRING_ARRAY);
}
/**
* A helper class to flag for each split token if it was quoted or not.
*
* @author Martin Bluemel
*/
public static class SplitToken {
/**
* indicates if the token was quoted or not.
*/
private boolean quoted = false;
/**
* the split token string.
*/
private String token = null;
/**
* The constructor.
*
* @param quoted
* indicates if the token was quoted or not.
* @param token
* the split token string.
*/
public SplitToken(final boolean quoted, final String token) {
this.quoted = quoted;
this.token = token;
}
/**
* @return the quoted
*/
public boolean isQuoted() {
return quoted;
}
/**
* @return the token
*/
public String getToken() {
return token;
}
}
/**
* A convenient method to split a string by any whitespace character but
* leave quoted substring together.
*
* @param string
* the string that will be split
*
* @return an array with substring split
*/
public static List<SplitToken> splitQuotedIsQuoted(String string) {
final List<SplitToken> list = new ArrayList<SplitToken>();
final StringBuffer buffer = new StringBuffer();
int state = 0;
final int len = string.length();
for (int i = 0; i < len; i++) {
final char c = string.charAt(i);
switch (state) {
// between token
case 0:
switch (c) {
case ' ':
case '\t':
case '\n':
// state stays 0
break;
case '"':
state = 2;
break;
default:
buffer.append(c);
state = 1;
}
break;
// within unquoted token
case 1:
switch (c) {
case ' ':
case '\t':
case '\n':
list.add(new SplitToken(false, buffer.toString()));
buffer.setLength(0);
state = 0;
break;
case '"':
list.add(new SplitToken(false, buffer.toString()));
buffer.setLength(0);
state = 2;
break;
default:
buffer.append(c);
state = 1;
}
break;
// within quoted token
case 2:
switch (c) {
case '\\':
state = 3;
break;
case '"':
list.add(new SplitToken(true, buffer.toString()));
buffer.setLength(0);
state = 0;
break;
default:
buffer.append(c);
break;
}
break;
// within quoted token after \
case 3:
switch (c) {
case '\\':
buffer.append('\\');
state = 2;
break;
case '"':
buffer.append('"');
state = 2;
break;
default:
buffer.append('\\');
buffer.append(c);
state = 2;
break;
}
break;
default:
throw new UtilException("wrong state " + state);
}
}
switch (state) {
case 0:
break;
case 1:
list.add(new SplitToken(false, buffer.toString()));
break;
case 2:
throw new UtilException("Missing qouote at the end of string @" + string + "@");
}
return list;
}
public static List<String> splitEscaped(String string, char sep, char esc) {
final List<String> list = new ArrayList<String>();
final StringBuffer buffer = new StringBuffer();
int state = 0;
final int len = string.length();
for (int i = 0; i < len; i++) {
final char c = string.charAt(i);
switch (state) {
case 0:
if (c == sep) {
// state stays 0
} else if (c == esc) {
state = 2;
} else {
buffer.append(c);
state = 1;
}
break;
case 1:
if (c == sep) {
list.add(buffer.toString());
buffer.setLength(0);
state = 0;
} else if (c == esc) {
state = 2;
} else {
buffer.append(c);
state = 1;
}
break;
case 2:
buffer.append(c);
state = 1;
break;
default:
throw new UtilException("wrong state " + state);
}
}
switch (state) {
case 0:
break;
case 1:
list.add(buffer.toString());
break;
case 2:
throw new UtilException("Missing character after escape character '" + esc + "'");
}
return list;
}
/**
* determines if a character is in a character array.
*
* @param chars
* the character array
* @param c
* the character to test
* @return if the character matches or not
*/
private static boolean charMatches(final char[] chars, final char c) {
boolean matches = false;
for (int i = 0; !matches && i < chars.length; i++) {
if (chars[i] == c) {
matches = true;
}
}
return matches;
}
/**
* The default constructor may not be used.
*/
private StringHelper() {
}
/**
* Simply escape one single character
*
* @param in
* the input string
* @param escapeChar
* the escape character
* @param charToEscape
* the character to escape
*
* @return the escaped string
*/
public static String escape(String in, char escapeChar, char charToEscape) {
final StringBuffer out = new StringBuffer();
final int len = in.length();
for (int i = 0; i < len; i++) {
final char c = in.charAt(i);
if (c == escapeChar) {
out.append(escapeChar);
out.append(escapeChar);
} else if (c == charToEscape) {
out.append(escapeChar);
out.append(c);
} else {
out.append(c);
}
}
return out.toString();
}
/**
* Handle complex escape sequences.
*
* @param in
* the input string to escape
* @param escapeMap
* defines the escape mapping
*
* @return the escaped string
*/
public static String escape(final String in, final EscapeMap escapeMap) {
final StringBuffer out = new StringBuffer();
escape(in, out, escapeMap.getEscMap());
return out.toString();
}
/**
* Handle complex escape sequences.
*
* @param in
* the input string
* @param escapeMap
* defines the escape mapping
*
* @return the escaped string
*/
public static String unescape(final String in, final EscapeMap escapeMap) {
final StringBuffer out = new StringBuffer();
escape(in, out, escapeMap.getUescMap());
return out.toString();
}
/**
* Handle complex escape sequences.
*
* @param in
* the input string
* @param out
* the output string buffer
* @param escapeMap
* defines the escape mapping
*
* @return the escaped string
*/
private static void escape(final String in, final StringBuffer out, final Map<String, String> escMap) {
final int len = in.length();
for (int i = 0; i < len; i++) {
boolean replaced = false;
for (final String sequenceToReplace : escMap.keySet()) {
if ((i + sequenceToReplace.length()) <= len
&& in.substring(i, i + sequenceToReplace.length()).equals(sequenceToReplace)) {
out.append(escMap.get(sequenceToReplace));
i += sequenceToReplace.length() - 1;
replaced = true;
break;
}
}
if (!replaced) {
out.append(in.charAt(i));
}
}
}
/**
* Print the stack trace of the given Throwable to a string.
*
* @param throwable
* the Throwable of which you want to get the stack trace.
*
* @return the stack trace as string
*/
public static String toStackTraceString(final Throwable throwable) {
final Writer result = new StringWriter();
final PrintWriter printWriter = new PrintWriter(result);
throwable.printStackTrace(printWriter);
return result.toString();
}
/**
* remove blanks, tabs and line feeds from beginning and end of a string.
*
* @param sIn
* - string to trim
* @return trimmed string
*/
public static String trim(final String sIn) {
char[] ca = { ' ', '\t', '\n' };
return trim(sIn, ca, TrimMode.both);
}
/**
* remove given characters from beginning and end of a string.
*
* @param sIn
* - string to trim
* @param trimChars
* - characters to trim
* @return - trimmed string
*/
public static String trim(final String sIn, final char[] trimChars) {
return trim(sIn, trimChars, TrimMode.both);
}
/**
* remove blanks, tabs and line feeds from beginning and/or end of a string.
*
* @param sIn
* - string to trim
* @param trimChars
* - characters to trim
* @param mode
* - TRIM_MODE_BOTH, TRIM_MODE_LEADING or TRIM_MODE_TRAILING
* @return - trimmed string
*/
public static String trim(final String sIn, final char[] trimChars, final TrimMode mode) {
String s = sIn;
int len = sIn.length();
int posStart = 0;
int posEnd = len;
int i = 0;
if (mode == TrimMode.leading || mode == TrimMode.both) {
while (i < len && charMatches(trimChars, sIn.charAt(i))) {
posStart = ++i;
}
}
if (mode == TrimMode.trailing || mode == TrimMode.both) {
i = len - 1;
while (i >= posStart && charMatches(trimChars, sIn.charAt(i))) {
posEnd = --i;
}
if (posEnd < len) {
posEnd++;
}
}
if (posStart > 0 || posEnd < len) {
s = sIn.substring(posStart, posEnd);
}
return s;
}
/**
* unescape the '\'
*/
public static String unescape(final String string) {
final StringBuffer buf = new StringBuffer();
final int len = string.length();
int state = 0;
for (int i = 0; i < len; i++) {
final char c = string.charAt(i);
switch (state) {
case 0:
switch (c) {
case '\\':
state = 1;
break;
default:
buf.append(c);
break;
}
break;
case 1:
switch (c) {
case 'n':
buf.append('\n');
state = 0;
break;
case 't':
buf.append('\t');
state = 0;
break;
case 'b':
buf.append('\b');
state = 0;
break;
case '\\':
buf.append('\\');
state = 0;
break;
default:
throw new RapidBeansRuntimeException("Unknown escape squence '\\" + c + "'");
}
break;
}
}
return buf.toString();
}
}