/* Strings.java Purpose: String utilities and constants Description: History: 2001/4/17, Tom M. Yeh: Created. Copyright (C) 2001 Potix Corporation. All Rights Reserved. {{IS_RIGHT This program is distributed under LGPL Version 2.1 in the hope that it will be useful, but WITHOUT ANY WARRANTY. }}IS_RIGHT */ package org.zkoss.lang; import java.io.UnsupportedEncodingException; import java.util.Arrays; import org.zkoss.mesg.MCommon; import org.zkoss.util.IllegalSyntaxException; /** * String utilities and constants * * @author tomyeh */ public class Strings { /** Used with {@link #escape} to escape a string in * JavaScript. It assumes the string will be enclosed with a single quote. */ public static final String ESCAPE_JAVASCRIPT = "'\n\r\t\f\\/!"; public static final String EMPTY = ""; /** * Returns true if the string is null or empty. */ public static final boolean isEmpty(String s) { return s == null || s.length() == 0; } /** * Returns true if the string is null or empty or pure blank. */ public static final boolean isBlank(String s) { return s == null || s.trim().length() == 0; } /** Trims the string buffer by removing the leading and trailing * whitespaces. * Note: unlike String.trim(), you can specify the index of * the first character to start trimming. * * <p>Like String.trim(), a whitespace is a character that has * a code not great than <code>'\u0020'</code> (the space character) * * @param buf the string buffer to trim the leading and trailing. * @param index the index of the first character to trim * (i.e., consider as the leading character). * If 0, it starts from the beginning. * @return the same string buffer * @since 3.0.4 */ public static final StringBuffer trim(StringBuffer buf, final int index) { for (int j = index, len = buf.length();; ++j) { if (j >= len) { buf.delete(index, len); break; //done } char cc = buf.charAt(j); if (cc > ' ') { //same as String.trim() buf.delete(index, j); for (len = j = buf.length(); --j >= index;) { cc = buf.charAt(j); if (cc > ' ') { //same as String.trim() buf.delete(j + 1, len); break; } } break; //done } } return buf; } /** Returns an encoded string buffer, faster and shorter than * Integer.toHexString. It uses numbers and lower-case letters only. * Thus it is a valid variable name if prefix with an alphabet. * At least one character is generated. * * <p>It works even in system that is case-insensitive, such as IE. * * <p>It is useful to generate a string to represent a number. */ public static final StringBuffer encode(StringBuffer sb, int val) { if (val < 0) { sb.append('z'); val = -val; } do { int v = val & 31; if (v < 10) { sb.append((char)('0' + v)); } else { sb.append((char)(v + ((int)'a' - 10))); } } while ((val >>>= 5) != 0); return sb; } /** Returns an encoded string buffer, faster and shorter than * Long.toHexString. It uses numbers and lower-case letters only. * Thus it is a valid variable name if prefix with an alphabet. * At least one character is generated. * * <p>It works even in system that is case-insensitive, such as IE. * * <p>It is useful to generate a string to represent a number. */ public static final StringBuffer encode(StringBuffer sb, long val) { if (val < 0) { sb.append('z'); val = -val; } do { int v = ((int)val) & 31; if (v < 10) { sb.append((char)('0' + v)); } else { sb.append((char)(v + ((int)'a' - 10))); } } while ((val >>>= 5) != 0); return sb; } /** Returns an encoded string, faster and shorter than * Long.toHexString. */ public static final String encode(int val) { return encode(new StringBuffer(12), val).toString(); } /** Returns an encoded string, faster and shorter than * Long.toHexString. */ public static final String encode(long val) { return encode(new StringBuffer(20), val).toString(); } /** Returns the index of the give character in the given string buffer, * or -1 if not found. * It is equivalent to <code>sb.indexOf(""+cc, j);</code>, but faster. * @since 5.0.3 */ public static final int indexOf(StringBuffer sb, char cc, int j) { for (int len = sb.length(); j < len; ++j) if (sb.charAt(j) == cc) return j; return -1; } /** Returns the last index of the give character in the given string buffer, * or -1 if not found. * It is equivalent to <code>sb.lastIndexOf(""+cc, j);</code>, but faster. * @since 5.0.3 */ public static final int lastIndexOf(StringBuffer sb, char cc, int j) { if (j >= sb.length()) j = sb.length() - 1; for (; j >= 0; --j) if (sb.charAt(j) == cc) return j; return -1; } /** * Returns the index that is one of delimiters, or the length if none * of delimiter is found. * * <p>Unlike String.indexOf(String, int), this method returns the first * occurrence of <i>any</i> character in the delimiters. * * <p>This method is optimized to use String.indexOf(char, int) * if it found the length of delimiter is 1. * * @param src the source string to search * @param from the index to start the search from * @param delimiters the set of characters to search for * * @return the index that is one of delimiters. * If return >= src.length(), it means no such delimiters * @see #lastAnyOf */ public static final int anyOf(String src, String delimiters, int from) { switch (delimiters.length()) { case 0: return src.length(); case 1: final int j = src.indexOf(delimiters.charAt(0), from); return j >= 0 ? j: src.length(); } for (int len = src.length(); from < len && delimiters.indexOf(src.charAt(from)) < 0; ++from) ; return from; } /** * The backward version of {@link #anyOf}. * * <p>This method is optimized to use String.indexOf(char, int) * if it found the length of delimiter is 1. * * @return the previous index that is one of delimiter. * If it is negative, it means no delimiter in front of * <code>from</code> * @see #anyOf */ public static final int lastAnyOf(String src, String delimiters, int from) { switch (delimiters.length()) { case 0: return -1; case 1: return src.lastIndexOf(delimiters.charAt(0), from); } int len = src.length(); if (from >= len) from = len - 1; for (; from >= 0 && delimiters.indexOf(src.charAt(from)) < 0; --from) ; return from; } /** * Returns the next index after skipping whitespaces. */ public static final int skipWhitespaces(CharSequence src, int from) { for (final int len = src.length(); from < len && Character.isWhitespace(src.charAt(from)); ++from) ; return from; } /** * The backward version of {@link #skipWhitespaces}. * * @return the next index that is not a whitespace. * If it is negative, it means no whitespace in front of it. */ public static final int skipWhitespacesBackward(CharSequence src, int from) { final int len = src.length(); if (from >= len) from = len - 1; for (; from >= 0 && Character.isWhitespace(src.charAt(from)); --from) ; return from; } /** Returns the next whitespace. */ public static final int nextWhitespace(CharSequence src, int from) { for (final int len = src.length(); from < len && !Character.isWhitespace(src.charAt(from)); ++from) ; return from; } /** Escapes (a.k.a, quote) the special characters with backslash. * It prefix a backslash to any characters specified in the specials * argument. * * <p>Note: specials usually contains '\\'. * * <p>For example, {@link org.zkoss.util.Maps#parse} will un-quote * backspace. Thus, if you want to preserve backslash, you have * invoke escape(s, "\\") before calling Maps.parse(). * * @param src the string to process. If null, null is returned. * @param specials a string of characters that shall be escaped/quoted * To escape a string in JavaScript code snippet, you can use {@link #ESCAPE_JAVASCRIPT}. * @see #unescape */ public static final String escape(String src, String specials) { if (src == null) return null; final char[] chars = src.toCharArray(); StringBuilder sb = null; int j = 0; l_out: for (int j2 = 0, len = chars.length;;) { String enc = null; char cc; int k = j2; for (;; ++k) { if (k >= len) break l_out; cc = chars[k]; if (shallEncodeUnicode(cc, specials)) { enc = encodeUnicode(cc); break; } if (specials.indexOf(cc) >= 0) break; } if (enc == null && (cc = escapeSpecial(src, cc, k, specials)) == (char)0) { j2 = k + 1; continue; } if (sb == null) sb = new StringBuilder(len + 4); sb.append(Arrays.copyOfRange(chars, j, k)).append('\\'); if (enc != null) sb.append(enc); else sb.append(cc); j2 = j = k + 1; } if (sb == null) return src; //nothing changed return sb.append(Arrays.copyOfRange(chars, j, chars.length)).toString(); } private static char escapeSpecial(CharSequence src, char cc, int k, String specials) { switch (cc) { case '\n': return 'n'; case '\t': return 't'; case '\r': return 'r'; case '\f': return 'f'; case '/': String key; //escape </script> if (ESCAPE_JAVASCRIPT.equals(specials) // handle it specially && (k <= 0 || src.charAt(k - 1) != '<' || k + 8 > src.length() || !("script>" .equalsIgnoreCase((key = src.subSequence(k + 1, k + 8).toString())) || "script " .equalsIgnoreCase(key)))) { return (char) 0; // don't escape } break; case '!': //escape <!-- (ZK-676: it causes problem if used with <script>) if (ESCAPE_JAVASCRIPT.equals(specials) //handle it specially && (k <= 0 || src.charAt(k - 1) != '<' || k + 3 > src.length() || !"--".equals(src.subSequence(k+1, k+3)))) { return (char)0; //don't escape } break; } return cc; } /** Escapes (a.k.a. quote) the special characters with backslash * and appends it the specified string buffer. * * @param dst the destination buffer to append to. * @param src the source to escape from. * @param specials a string of characters that shall be escaped/quoted * To escape a string in JavaScript code snippet, you can use {@link #ESCAPE_JAVASCRIPT}. * @since 5.0.0 */ public static final StringBuffer escape(StringBuffer dst, CharSequence src, String specials) { if (src == null) return dst; for (int j = 0, j2 = 0, len = src.length();;) { String enc = null; char cc; int k = j2; for (;; ++k) { if (k >= len) return dst.append((Object)src.subSequence(j, src.length())); cc = src.charAt(k); if (shallEncodeUnicode(cc, specials)) { enc = encodeUnicode(cc); break; } if (specials.indexOf(cc) >= 0) break; } if (enc == null && (cc = escapeSpecial(src, cc, k, specials)) == (char)0) { j2 = k + 1; continue; } dst.append((Object)src.subSequence(j, k)).append('\\'); if (enc != null) dst.append(enc); else dst.append(cc); j2 = j = k + 1; } } /** Escapes (a.k.a. quote) the special characters with backslash * and appends it the specified string buffer. * * @param dst the destination buffer to append to. * @param src the source to escape from. * @param specials a string of characters that shall be escaped/quoted * To escape a string in JavaScript code snippet, you can use {@link #ESCAPE_JAVASCRIPT}. * @since 8.0.0 */ public static final StringBuilder escape(StringBuilder dst, CharSequence src, String specials) { if (src == null) return dst; String str = src.toString(); char[] chars = str.toCharArray(); for (int j = 0, j2 = 0, len = chars.length;;) { String enc = null; char cc; int k = j2; for (;; ++k) { if (k >= len) { return dst.append(Arrays.copyOfRange(chars, j, len)); } cc = chars[k]; if (shallEncodeUnicode(cc, specials)) { enc = encodeUnicode(cc); break; } if (specials.indexOf(cc) >= 0) break; } if (enc == null && (cc = escapeSpecial(src, cc, k, specials)) == (char)0) { j2 = k + 1; continue; } dst.append(Arrays.copyOfRange(chars, j, k)).append('\\'); if (enc != null) dst.append(enc); else dst.append(cc); j2 = j = k + 1; } } /** Escapes (a.k.a. quote) the special characters with backslash. * <p>Note: this implementation is referred from <a href="https://github.com/unbescape/unbescape">unbescape</a></p> * @since 8.0.0 */ public static final String escapeJavaScript(String text) { // We utilize the unbescape project's implementation to do the escape for Javascript value // which license is under Apache License 2.0 - https://github.com/unbescape/unbescape return JavaScriptEscape.escapeJavaScript(text); } private static final boolean shallEncodeUnicode(char cc, String specials) { return ESCAPE_JAVASCRIPT.equals(specials) && cc > (char)255 && !Character.isLetterOrDigit(cc); //don't check isSpaceChar since \u2028 will return true and it //is not recognized by Firefox } /** Return "u????". */ private static final String encodeUnicode(int cc) { final StringBuilder sb = new StringBuilder(6) .append('u').append(Integer.toHexString(cc)); while (sb.length() < 5) sb.insert(1, '0'); return sb.toString(); } /** Un-escape the quoted string. * @see #escape */ public static final String unescape(String s) { if (s == null) return null; StringBuilder sb = null; int j = 0; for (int k; (k = s.indexOf('\\', j)) >= 0;) { if (sb == null) sb = new StringBuilder(s.length()); char cc = s.charAt(k + 1); switch (cc) { case 'n': cc = '\n'; break; case 't': cc = '\t'; break; case 'r': cc = '\r'; break; case 'f': cc = '\f'; break; } sb.append(s.substring(j, k)).append(cc); j = k + 2; } if (sb == null) return s; //nothing changed return sb.append(s.substring(j)).toString(); } /** * Returns the substring from the <code>from</code> index up to the * <code>until</code> character or end-of-string. * Unlike String.subsring, it converts \f, \n, \t and \r. It doesn't * handle u and x yet. * * @return the result (never null). Result.next is the position of * the <code>until</code> character if found, or * a number larger than length() if no such character. */ public static final Result substring(String src, int from, char until) { return substring(src, from, until, true); } /** * Returns the substring from the <code>from</code> index up to the * <code>until</code> character or end-of-string. * * @param escBackslash whether to treat '\\' specially (as escape char) * It doesn't handle u and x yet. * @return the result (never null). Result.next is the position of * the <code>until</code> character if found, or * a number larger than length() if no such character. * You can tell which case it is by examining {@link Result#separator}. */ public static final Result substring(String src, int from, char until, boolean escBackslash) { final int len = src.length(); final StringBuilder sb = new StringBuilder(len); for (boolean quoted = false; from < len; ++from) { char cc = src.charAt(from); if (quoted) { quoted = false; switch (cc) { case 'f': cc = '\f'; break; case 'n': cc = '\n'; break; case 'r': cc = '\r'; break; case 't': cc = '\t'; break; } } else if (cc == until) { break; } else if (escBackslash && cc == '\\') { quoted = true; continue; //skip it } sb.append(cc); } return new Result(from, sb.toString(), from < len ? until: (char)0); } /** Returns the next token with unescape. * <ul> * <li>It trims whitespaces before and after the token.</li> * <li>It handles both '\'' and '"'. All characters between them are * considered as a token.</li> * <li>If nothing found before end-of-string, null is returned</li> * </ul> * * If a separator is found, it is returned in * {@link Strings.Result#separator}. * * @exception IllegalSyntaxException if the quoted string is unclosed. */ public static final Result nextToken(String src, int from, char[] separators) throws IllegalSyntaxException { return nextToken(src, from, separators, true, true, false); } /** Returns the next token with additional options. * It is the same as nextToken(src, from, separators, escBackslash, quotAsToken, false); * Refer to {@link #nextToken(String, int, char[], boolean, boolean, boolean)} * for more information. * * @exception IllegalSyntaxException if the quoted string is unclosed. * @see #nextToken(String, int, char[], boolean, boolean, boolean) */ public static final Result nextToken(String src, int from, char[] separators, boolean escBackslash, boolean quotAsToken) throws IllegalSyntaxException { return nextToken(src, from, separators, escBackslash, quotAsToken, false); } /** Returns the next token with additional options. * * <ul> * <li>It trims whitespaces before and after the token.</li> * <li>If quotAsToken is true, all characters between quotations * ('\'' or '"') are considered as a token.</li> * <li>If parenthesis is true, the separators and quotes inside * a pair of parenthesis won't be treated specially. * It is useful if EL expressions might be contained.</li> * <li>Consider '\\' as the escape char if escBackslash is true.</li> * <li>If nothing found before end-of-string, null is returned</li> * </ul> * * If a separator is found, it is returned in * {@link Strings.Result#separator}. * * @param escBackslash whether to treat '\\' specially (as escape char) * It doesn't handle u and x yet. * @param quotAsToken whether to treat characters inside '\'' or '"' * as a token. Note: the quotes are excluded from the token. * @param parenthesis whether to ignore separators and quotes inside * a pair of parentheses. Recognized parentheses include * {}, [] or (). * @exception IllegalSyntaxException if the quoted string is unclosed. * @since 3.0.6 */ public static final Result nextToken(String src, int from, char[] separators, boolean escBackslash, boolean quotAsToken, boolean parenthesis) throws IllegalSyntaxException { final int len = src.length(); from = skipWhitespaces(src, from); if (from >= len) return null; //end-of-string //1. handle quoted final char cc = src.charAt(from); if (quotAsToken && (cc == '\'' || cc == '"')) { final Result res = substring(src, from + 1, cc, escBackslash); if (res.separator != cc) throw new IllegalSyntaxException(MCommon.QUOTE_UNMATCHED, src); res.next = skipWhitespaces(src, res.next + 1); if (res.next < len && isSeparator(src.charAt(res.next), separators)) ++res.next; return res; } //2. handle not-quoted final int j = nextSeparator(src, from, separators, escBackslash, false, quotAsToken, parenthesis); int next = j; if (j < len) { if (quotAsToken) { final char c = src.charAt(j); if (c != '\'' && c != '"') ++next; } else { ++next; } } if (j == from) //nothing but separator return new Result(next, "", src.charAt(j)); int k = 1 + skipWhitespacesBackward(src, j - 1); return new Result(next, k > from ? escBackslash ? unescape(src.substring(from, k)) : src.substring(from, k): "", j < len ? src.charAt(j): (char)0); //if the token is nothing but spaces, k < from } /** Returns the next separator index in the src string. * * @param escQuot whether to escape characters inside quotations * ('\'' or '"'). In other words, ignore separators inside quotations * @param quotAsSeparator whether to consider quotations as one of * the separators * @since 2.4.0 */ public static int nextSeparator(String src, int from, char[] separators, boolean escBackslash, boolean escQuot, boolean quotAsSeparator) { return nextSeparator(src, from, separators, escBackslash, escQuot, quotAsSeparator, false); } /** Returns the next separator index in the src string. * * @param escQuot whether to escape characters inside quotations * ('\'' or '"'). In other words, it specifies whether to ignore * separators inside quotations * @param quotAsSeparator whether to consider quotations as one of * the separators. * If escQuot is true, quotAsSeparator is ignored. * @param parenthesis whether to ignore separators and quotes in side * a pair of parentheses. Recognized parentheses include * {}, [] or (). * @since 3.0.6 */ public static int nextSeparator(String src, int from, char[] separators, boolean escBackslash, boolean escQuot, boolean quotAsSeparator, boolean parenthesis) { boolean esc = false; char quot = (char)0, endparen; for (final int len = src.length(); from < len; ++from) { if (esc) { esc = false; continue; } final char cc = src.charAt(from); if (escBackslash && cc == '\\') { esc = true; } else if (quot != (char)0) { if (cc == quot) quot = (char)0; } else if (escQuot && (cc == '\'' || cc == '"')) { quot = cc; } else if ((quotAsSeparator && (cc == '\'' || cc == '"')) || isSeparator(cc, separators)) { break; } else if (parenthesis && (endparen = getEndingParenthesis(cc)) != (char)0) { from = skipParenthesis(src, from, cc, endparen); if (from >= len) break; //don't increase } } return from; } /** * Joins the elements of the array into a single String separated by a comma. * @param array * @since 8.0.0 */ public static final <T> String join(T[] array) { return join(array, ','); } /** * Joins the elements of the array into a single String separated by the given separator. * @param array * @param separator * @since 8.0.0 */ public static final <T> String join(T[] array, char separator) { if (array == null) return null; return join(array, separator, 0, array.length); } /** * Joins the elements of the array into a single String separated by the given separator * from the startIndex to the endIndex. * @param array * @param separator * @param startIndex * @param endIndex * @since 8.0.0 */ public static final <T> String join(T[] array, char separator, int startIndex, int endIndex) { if (array == null) return null; final int items = endIndex - startIndex; if (items <= 0) return EMPTY; final StringBuilder buf = new StringBuilder(items * 16); for (int i = startIndex; i < endIndex; i++) { if (i > startIndex) { buf.append(separator); } buf.append(array[i]); } return buf.toString(); } /** * Converts the given byte[] to UTF-8 string, if possible. Or return with the default charset. * <p>This is a method helper for JDK 5</p> * @since 8.0.2 */ public static final String toString(byte[] data) { try { return new String(data, "UTF-8"); } catch (UnsupportedEncodingException e) { return new String(data); } } /** Returns the ending parenthesis (such as }), * or (char)0 if cc is not the beginning parenthesis (such as {). */ private static final char getEndingParenthesis(char cc) { return cc == '{' ? '}': cc == '(' ? ')': cc == '[' ? ']': (char)0; } /** Skip the string enclosed by a pair of parenthesis and * return index after the ending parenthesis. * @param j the index of the starting parenthesis */ private static int skipParenthesis(String src, int j, char beg, char end) { for (int len = src.length(), depth = 0; ++j < len;) { final char cc = src.charAt(j); if (cc == '\\') ++j; //skip next else if (cc == beg) ++depth; else if (cc == end && --depth < 0) break; } return j; } private static final boolean isSeparator(char cc, char[] separators) { for (int j = 0; j < separators.length; ++j) { if (cc == separators[j] || (separators[j] == ' ' && Character.isWhitespace(cc))) return true; } return false; } /** The result of {@link #substring}. */ public static class Result { /** The next index. */ public int next; /** The converted string. */ public String token; /** The separator found. If no separator but end-of-line found, * ((char)0) is returned. */ public char separator; protected Result(int next, String token, char separator) { this.next = next; this.token = token; this.separator = separator; } protected Result(int next, char separator) { this.next = next; this.separator = separator; } //-- Object --// public String toString() { return "[next="+next+", token="+token+" separator="+separator+']'; } } }