/* * Rapid Beans Framework: StringHelper.java * * Copyright (C) 2009 Martin Bluemel * * Creation Date: 11/09/2005 * * This program is free software; you can redistribute it and/or modify it under the terms of the * GNU Lesser General Public License as published by the Free Software Foundation; * either version 3 of the License, or (at your option) any later version. * This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * See the GNU Lesser General Public License for more details. * You should have received a copies of the GNU Lesser General Public License and the * GNU General Public License along with this program; if not, see <http://www.gnu.org/licenses/>. */ package org.rapidbeans.core.util; import java.io.PrintWriter; import java.io.StringWriter; import java.io.Writer; import java.util.ArrayList; import java.util.Arrays; import java.util.List; import java.util.Map; import java.util.StringTokenizer; import org.rapidbeans.core.exception.RapidBeansRuntimeException; import org.rapidbeans.core.exception.UtilException; /** * Helper class for string processing. * * @author Martin Bluemel */ public final class StringHelper { /** * Convert the first character of the given string to upper cases. * * @param string * the string to convert * * @return the converted string */ public static String upperFirstCharacter(final String string) { final char[] ca = string.toCharArray(); ca[0] = Character.toUpperCase(ca[0]); return new String(ca); } public enum StripMode { /** * strip leading and trailing characters. */ both, /** * strip only leading characters */ leading, /** * strip only trailing characters */ trailing, } private static final char[] WHITESPACE_CHARACTERS = { ' ', '\n', '\t' }; private static final String WHITESPACE_CHARACTER_STRING = new String(WHITESPACE_CHARACTERS); /** * Check if the given string contains only digits or not. * * @param string * the string that will be checked * * @return true if the given string contains only digits,<br/> * otherwise false. */ public static boolean isDigitsOnly(final String string) { final int length = string.length(); for (int i = 0; i < length; i++) { final char c = string.charAt(i); if (c < '0' || c > '9') { return false; } } return true; } /** * Implements a more powerful mechanism to remove or filter leading and * trailing characters than String.trim().<br/> * Instead of simply removing whitespace characters you can specify an * arbitrary character to be removed.<br/> * This variant removes the specified character from both sides. * * @param string * the string to strip * @param stripCharacter * specifies the character to remove or filter. * * @return the filtered string */ public static String strip(final String string, final char stripCharacter) { return strip(string, new char[] { stripCharacter }, StripMode.both); } /** * Implements a more powerful mechanism to remove or filter leading and * trailing characters than String.trim().<br/> * Instead of simply removing whitespace characters you can specify * arbitrary characters to be removed.<br/> * This variant removes the specified characters from both sides. * * @param string * the string to strip * @param stripCharacters * specifies a list of characters to remove or filter. * * @return the filtered string */ public static String strip(final String string, final char[] stripCharacters) { return strip(string, stripCharacters, StripMode.both); } /** * Implements a more powerful mechanism to remove or filter leading and * trailing whitespace characters than String.trim().<br/> * Additionally to simply removing whitespace on both string ends you can * specify if only leading or trailing characters are removed. * * @param string * the string to strip * @param stripMode * specifies if only leading or trailing characters are removed * or if removal happens on both sides. * * @return the filtered string */ public static String strip(final String string, final StripMode mode) { return strip(string, WHITESPACE_CHARACTERS, mode); } /** * Implements a more powerful mechanism to remove or filter leading and * trailing characters than String.trim().<br/> * Instead of simply removing whitespace can specify a character to be * removed.<br/> * Additionally you can specify the characters are removed from both sides * or if only leading or trailing characters are removed. * * @param string * the string to strip * @param stripCharacters * specifies a list of characters to remove or filter. * @param stripMode * specifies if only leading or trailing characters are removed * or if removal happens on both sides. * * @return the filtered string */ public static String strip(final String string, final char trimChar, final StripMode mode) { return strip(string, new char[] { trimChar }, mode); } /** * Implements a more powerful mechanism to remove or filter leading and * trailing characters than String.trim().<br/> * Instead of simply removing whitespace can specify the characters to be * removed.<br/> * Additionally you can specify the characters are removed from both sides * or if only leading or trailing characters are removed. * * @param string * the string to strip * @param stripCharacters * specifies a list of characters to remove or filter. * @param stripMode * specifies if only leading or trailing characters are removed * or if removal happens on both sides. * * @return the filtered string */ public static String strip(final String string, final char[] trimChars, final StripMode mode) { final int len = string.length(); int firstNonStrippedIndex = 0; int lastNonStrippedIndex = len; if (mode == StripMode.both || mode == StripMode.leading) { firstNonStrippedIndex = -1; while (firstNonStrippedIndex < len && charMatches(trimChars, string.charAt(++firstNonStrippedIndex))) ; } if (mode == StripMode.both || mode == StripMode.trailing) { lastNonStrippedIndex = len; while (lastNonStrippedIndex >= firstNonStrippedIndex && charMatches(trimChars, string.charAt(--lastNonStrippedIndex))) ; lastNonStrippedIndex++; } String s = string; if (firstNonStrippedIndex > 0 || lastNonStrippedIndex < len) { s = string.substring(firstNonStrippedIndex, lastNonStrippedIndex); } return s; } public enum FillMode { /** * fill up the left side. */ left, /** * fill up the right side */ right, } /** * A primitive helper for formatting strings by simply filling it up on the * left or on the right side. * * @param string * the string * @param length * the length to fill the string up to * @param fillChar * the fill character * @param mode * FILL_MODE_LEFT or FILL_MODE_RIGHT * * @return the string filled up to the specified length */ public static String fillUp(final String string, final int length, final char fillChar, final FillMode mode) { final int charsToFillCount = length - string.length(); if (charsToFillCount <= 0) { return string; } final StringBuffer buf = new StringBuffer(length); if (mode == FillMode.left) { buf.append(newCharArray(charsToFillCount, fillChar)); } buf.append(string); if (mode == FillMode.right) { buf.append(newCharArray(charsToFillCount, fillChar)); } return buf.toString(); } private static char[] newCharArray(final int charsToFillCount, final char fillChar) { final char[] ca = new char[charsToFillCount]; Arrays.fill(ca, fillChar); return ca; } /** * the pattern to build String arrays. */ private static final String[] EMPTY_STRING_ARRAY = {}; /** * Splits a string into tokens using any whitespace character a delimiter. * * @param string * the string to split. * * @return a list containing all tokens */ public static List<String> split(final String string) { return split(string, WHITESPACE_CHARACTER_STRING); } /** * Splits a string into tokens using one or multiple delimiter characters. * * @param string * the string to spit. * @param delimChars * a string containing all delimiter characters * * @return a list containing all tokens */ public static List<String> split(final String string, final String delimChars) { final ArrayList<String> list = new ArrayList<String>(); final StringTokenizer tokenizer = new StringTokenizer(string, delimChars); while (tokenizer.hasMoreTokens()) { list.add(tokenizer.nextToken()); } return list; } /** * Split the first token from a give string using any whitespace character * as delimiter. * * @param string * the string to split * * @return the first token or null if no token has been found */ public static String splitFirst(final String string) { return splitFirst(string, WHITESPACE_CHARACTER_STRING); } /** * Split the first token from a give string using one or multiple delimiter * characters given in a string. * * @param string * the string to split * @param delimiterChars * the delimiter characters * * @return the first token or null if no token has been found */ public static String splitFirst(final String string, final String delimChars) { final StringTokenizer tokenizer = new StringTokenizer(string, delimChars); if (tokenizer.hasMoreTokens()) { return tokenizer.nextToken(); } else { return null; } } /** * Split the last token from a give string using any whitespace character as * delimiter. * * @param string * the string to split * * @return the last token or null if no token has been found */ public static String splitLast(final String string) { return splitLast(string, WHITESPACE_CHARACTER_STRING); } /** * Split the last token from a give string using one or multiple delimiter * characters given in a string. * * @param string * the string to split * @param delimiterChars * the delimiter characters * * @return the last token or null if no token has been found */ public static String splitLast(final String string, final String delimChars) { String lastToken = null; final StringTokenizer tokenizer = new StringTokenizer(string, delimChars); while (tokenizer.hasMoreTokens()) { lastToken = tokenizer.nextToken(delimChars); } return lastToken; } /** * Split all before the last token from a given string using one delimiter * character in a string. The delimiter tokens in between will be simply * returned. * * @param string * the string to split * @param delimiterChars * the delimiter characters * * @return all tokens besides the last or null if no token has been found */ public static String splitBeforeLast(final String string, final String delimChars) { final char[] delimCa = delimChars.toCharArray(); final int len = string.length(); int state = 0; int pos1 = -1; int pos2 = -1; int pos3 = -1; for (int i = len - 1; pos3 == -1 && i >= 0; i--) { final char c = string.charAt(i); switch (state) { case 0: if (!charMatches(delimCa, c)) { pos1 = i + 1; state = 1; } break; case 1: if (charMatches(delimCa, c)) { pos2 = i + 1; state = 2; } break; case 2: if (!charMatches(delimCa, c)) { pos3 = i + 1; } break; } } switch (state) { case 0: return ""; case 1: return string.substring(0, pos1); default: if (pos3 > -1) { return string.substring(0, pos3); } else { return string.substring(pos2, pos1); } } } /** * A convenient method to split a string by any whitespace character but * leave quoted substring together. * * @param string * the string that will be split * * @return an array with substring split */ public static String[] splitQuoted(final String string) { final List<String> list = new ArrayList<String>(); final StringBuffer buffer = new StringBuffer(); int state = 0; final int len = string.length(); for (int i = 0; i < len; i++) { final char c = string.charAt(i); switch (state) { // between token case 0: switch (c) { case ' ': case '\t': case '\n': // state stays 0 break; case '"': state = 2; break; default: buffer.append(c); state = 1; } break; // within unquoted token case 1: switch (c) { case ' ': case '\t': case '\n': list.add(buffer.toString()); buffer.setLength(0); state = 0; break; case '"': list.add(buffer.toString()); buffer.setLength(0); state = 2; break; default: buffer.append(c); state = 1; } break; // within quoted token case 2: switch (c) { case '\\': state = 3; break; case '"': list.add(buffer.toString()); buffer.setLength(0); state = 0; break; default: buffer.append(c); break; } break; // within quoted token after \ case 3: switch (c) { case '\\': buffer.append('\\'); state = 2; break; case '"': buffer.append('"'); state = 2; break; default: buffer.append(c); state = 2; break; } break; default: throw new UtilException("wrong state " + state); } } switch (state) { case 0: break; case 1: list.add(buffer.toString()); break; case 2: throw new UtilException("Missing qouote at the end of string\"" + string + "\""); } return list.toArray(EMPTY_STRING_ARRAY); } /** * A helper class to flag for each split token if it was quoted or not. * * @author Martin Bluemel */ public static class SplitToken { /** * indicates if the token was quoted or not. */ private boolean quoted = false; /** * the split token string. */ private String token = null; /** * The constructor. * * @param quoted * indicates if the token was quoted or not. * @param token * the split token string. */ public SplitToken(final boolean quoted, final String token) { this.quoted = quoted; this.token = token; } /** * @return the quoted */ public boolean isQuoted() { return quoted; } /** * @return the token */ public String getToken() { return token; } } /** * A convenient method to split a string by any whitespace character but * leave quoted substring together. * * @param string * the string that will be split * * @return an array with substring split */ public static List<SplitToken> splitQuotedIsQuoted(String string) { final List<SplitToken> list = new ArrayList<SplitToken>(); final StringBuffer buffer = new StringBuffer(); int state = 0; final int len = string.length(); for (int i = 0; i < len; i++) { final char c = string.charAt(i); switch (state) { // between token case 0: switch (c) { case ' ': case '\t': case '\n': // state stays 0 break; case '"': state = 2; break; default: buffer.append(c); state = 1; } break; // within unquoted token case 1: switch (c) { case ' ': case '\t': case '\n': list.add(new SplitToken(false, buffer.toString())); buffer.setLength(0); state = 0; break; case '"': list.add(new SplitToken(false, buffer.toString())); buffer.setLength(0); state = 2; break; default: buffer.append(c); state = 1; } break; // within quoted token case 2: switch (c) { case '\\': state = 3; break; case '"': list.add(new SplitToken(true, buffer.toString())); buffer.setLength(0); state = 0; break; default: buffer.append(c); break; } break; // within quoted token after \ case 3: switch (c) { case '\\': buffer.append('\\'); state = 2; break; case '"': buffer.append('"'); state = 2; break; default: buffer.append('\\'); buffer.append(c); state = 2; break; } break; default: throw new UtilException("wrong state " + state); } } switch (state) { case 0: break; case 1: list.add(new SplitToken(false, buffer.toString())); break; case 2: throw new UtilException("Missing qouote at the end of string @" + string + "@"); } return list; } public static List<String> splitEscaped(String string, char sep, char esc) { final List<String> list = new ArrayList<String>(); final StringBuffer buffer = new StringBuffer(); int state = 0; final int len = string.length(); for (int i = 0; i < len; i++) { final char c = string.charAt(i); switch (state) { case 0: if (c == sep) { // state stays 0 } else if (c == esc) { state = 2; } else { buffer.append(c); state = 1; } break; case 1: if (c == sep) { list.add(buffer.toString()); buffer.setLength(0); state = 0; } else if (c == esc) { state = 2; } else { buffer.append(c); state = 1; } break; case 2: buffer.append(c); state = 1; break; default: throw new UtilException("wrong state " + state); } } switch (state) { case 0: break; case 1: list.add(buffer.toString()); break; case 2: throw new UtilException("Missing character after escape character '" + esc + "'"); } return list; } /** * determines if a character is in a character array. * * @param chars * the character array * @param c * the character to test * @return if the character matches or not */ private static boolean charMatches(final char[] chars, final char c) { boolean matches = false; for (int i = 0; !matches && i < chars.length; i++) { if (chars[i] == c) { matches = true; } } return matches; } /** * The default constructor may not be used. */ private StringHelper() { } /** * Simply escape one single character * * @param in * the input string * @param escapeChar * the escape character * @param charToEscape * the character to escape * * @return the escaped string */ public static String escape(String in, char escapeChar, char charToEscape) { final StringBuffer out = new StringBuffer(); final int len = in.length(); for (int i = 0; i < len; i++) { final char c = in.charAt(i); if (c == escapeChar) { out.append(escapeChar); out.append(escapeChar); } else if (c == charToEscape) { out.append(escapeChar); out.append(c); } else { out.append(c); } } return out.toString(); } /** * Handle complex escape sequences. * * @param in * the input string to escape * @param escapeMap * defines the escape mapping * * @return the escaped string */ public static String escape(final String in, final EscapeMap escapeMap) { final StringBuffer out = new StringBuffer(); escape(in, out, escapeMap.getEscMap()); return out.toString(); } /** * Handle complex escape sequences. * * @param in * the input string * @param escapeMap * defines the escape mapping * * @return the escaped string */ public static String unescape(final String in, final EscapeMap escapeMap) { final StringBuffer out = new StringBuffer(); escape(in, out, escapeMap.getUescMap()); return out.toString(); } /** * Handle complex escape sequences. * * @param in * the input string * @param out * the output string buffer * @param escapeMap * defines the escape mapping * * @return the escaped string */ private static void escape(final String in, final StringBuffer out, final Map<String, String> escMap) { final int len = in.length(); for (int i = 0; i < len; i++) { boolean replaced = false; for (final String sequenceToReplace : escMap.keySet()) { if ((i + sequenceToReplace.length()) <= len && in.substring(i, i + sequenceToReplace.length()).equals(sequenceToReplace)) { out.append(escMap.get(sequenceToReplace)); i += sequenceToReplace.length() - 1; replaced = true; break; } } if (!replaced) { out.append(in.charAt(i)); } } } /** * Print the stack trace of the given Throwable to a string. * * @param throwable * the Throwable of which you want to get the stack trace. * * @return the stack trace as string */ public static String toStackTraceString(final Throwable throwable) { final Writer result = new StringWriter(); final PrintWriter printWriter = new PrintWriter(result); throwable.printStackTrace(printWriter); return result.toString(); } /** * remove blanks, tabs and line feeds from beginning and end of a string. * * @param sIn * - string to trim * @return trimmed string */ public static String trim(final String sIn) { char[] ca = { ' ', '\t', '\n' }; return trim(sIn, ca, TrimMode.both); } /** * remove given characters from beginning and end of a string. * * @param sIn * - string to trim * @param trimChars * - characters to trim * @return - trimmed string */ public static String trim(final String sIn, final char[] trimChars) { return trim(sIn, trimChars, TrimMode.both); } /** * remove blanks, tabs and line feeds from beginning and/or end of a string. * * @param sIn * - string to trim * @param trimChars * - characters to trim * @param mode * - TRIM_MODE_BOTH, TRIM_MODE_LEADING or TRIM_MODE_TRAILING * @return - trimmed string */ public static String trim(final String sIn, final char[] trimChars, final TrimMode mode) { String s = sIn; int len = sIn.length(); int posStart = 0; int posEnd = len; int i = 0; if (mode == TrimMode.leading || mode == TrimMode.both) { while (i < len && charMatches(trimChars, sIn.charAt(i))) { posStart = ++i; } } if (mode == TrimMode.trailing || mode == TrimMode.both) { i = len - 1; while (i >= posStart && charMatches(trimChars, sIn.charAt(i))) { posEnd = --i; } if (posEnd < len) { posEnd++; } } if (posStart > 0 || posEnd < len) { s = sIn.substring(posStart, posEnd); } return s; } /** * unescape the '\' */ public static String unescape(final String string) { final StringBuffer buf = new StringBuffer(); final int len = string.length(); int state = 0; for (int i = 0; i < len; i++) { final char c = string.charAt(i); switch (state) { case 0: switch (c) { case '\\': state = 1; break; default: buf.append(c); break; } break; case 1: switch (c) { case 'n': buf.append('\n'); state = 0; break; case 't': buf.append('\t'); state = 0; break; case 'b': buf.append('\b'); state = 0; break; case '\\': buf.append('\\'); state = 0; break; default: throw new RapidBeansRuntimeException("Unknown escape squence '\\" + c + "'"); } break; } } return buf.toString(); } }