//------------------------------------------------------------------------------ // Copyright (c) 2005, 2007 IBM Corporation and others. // All rights reserved. This program and the accompanying materials // are made available under the terms of the Eclipse Public License v1.0 // which accompanies this distribution, and is available at // http://www.eclipse.org/legal/epl-v10.html // // Contributors: // IBM Corporation - initial implementation //------------------------------------------------------------------------------ package org.eclipse.epf.common.utils; import java.net.URI; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import org.eclipse.core.runtime.IPath; import org.eclipse.core.runtime.Path; import org.eclipse.core.runtime.Platform; import org.eclipse.epf.common.CommonPlugin; import com.ibm.icu.lang.UCharacter; import com.ibm.icu.util.StringTokenizer; /** * A helper class for manuipulating strings. * * @author Kelvin Low * @author Jinhua Xi * @since 1.0 */ public class StrUtil { public static final String EMPTY_STRING = ""; //$NON-NLS-1$ public static final String TAB = "\t"; //$NON-NLS-1$ private static final String REGEXP_ANY_SPECIAL_CHARS = "(`|~|!|@|#|\\$|%|\\^|&|\\*|\\(|\\)|\\+|=|\\[|\\]|\\||\\:|\"|<|>|\\?|/|'|\\s|\\\\)+"; //$NON-NLS-1$ private static final String REGEXP_INVALID_PUBLISHED_PATH_CHARS = "(\\[|#|\\*|\\?|\"|<|>|\\||!|%|/|\\])+"; //$NON-NLS-1$ private static final String REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX = "(\\[|#|\\*|\\?|\"|<|>|\\||!|%|\\])+"; //$NON-NLS-1$ private static final String REGEXP_INVALID_FILENAME_CHARS = "(\\[|#|/|\\\\|\\:|\\*|\\?|\"|<|>|\\||\\]|\\s)+"; //$NON-NLS-1$ private static final char[] HEX_DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' }; public static final String LINE_FEED = System.getProperty("line.separator"); //$NON-NLS-1$ public static final String ESCAPED_LF = " "; //$NON-NLS-1$ public static final String ESCAPED_CR = " "; //$NON-NLS-1$ public static final String ESCAPED_LINE_FEED = LINE_FEED.replace( "\n", ESCAPED_LF).replace("\r", ESCAPED_CR); //$NON-NLS-1$ //$NON-NLS-2$ public static final String LINE_FEED_REGEX = LINE_FEED.replaceAll( "\\\\", "\\\\"); //$NON-NLS-1$ //$NON-NLS-2$ public static final String ESCAPED_LINE_FEED_REGEX = ESCAPED_LINE_FEED; public static final String HTML_BREAK = "<br/>";//$NON-NLS-1$ public static final String HTML_COPY = "©";//$NON-NLS-1$ public static final String HTML_EURO = "€";//$NON-NLS-1$ public static final String HTML_REG = "®";//$NON-NLS-1$ public static final String HTML_TRADEMARK = "™";//$NON-NLS-1$ public static boolean during_migration = false; private static StrUtilOptions options; /** * Private constructor to prevent this class from being instantiated. All * methods in this class should be static. */ private StrUtil() { } /** * Tests for null string. * <p> * A null string is defined as one that has an empty reference or has zero * length. * * @param str * a string * @return <code>true</code> if the given string is a null string */ public static boolean isNull(String str) { return str == null || str.length() == 0; } /** * Tests for blank string. * <p> * A blank string is defined as one that has an empty reference or has zero * length after the leading and trailing space characters are trimmed. * * @param str * a string * @return <code>true</code> if the given string is a blank string */ public static boolean isBlank(String str) { return str == null || str.trim().length() == 0; } /** * Removes the leading and trailing space characters from a string. * * @param str * a string * @return a string with no leading and trailing space characters */ public static String trim(String str) { return str == null ? null : str.trim(); } /** * Removes whitespace characters (TAB, CR, LF and SPACE) from a string. * * @param str * a string * @return a string with whitespace characters removed */ public static String removeWhiteSpaceChars(String str) { int len = (str == null) ? 0 : str.length(); for (int i = 0; i < len; i++) { switch (str.charAt(i)) { case '\t': case '\r': case '\n': case ' ': break; default: return str; } } return ""; //$NON-NLS-1$ } /** * Removes CR, LF from a string. * * @param str * a string * @return a string with CR, LF removed */ public static String removeNewlines(String str) { StringBuffer sb = new StringBuffer(); int len = (str == null) ? 0 : str.length(); for (int i = 0; i < len; i++) { char c = str.charAt(i); switch (c) { case '\r': case '\n': break; default: sb.append(c); } } return sb.toString(); } /** * Splits a string into an array of string tokens. * * @param str * a string * @param sep * a string containing the string separators * @param count * the desired number of string tokens * @return an array of string tokens */ public static String[] split(String str, String sep, int count) { if (str == null || count == 0 || count < -1) { return null; } StringTokenizer tokenizer = new StringTokenizer(str, sep, count == -1 ? false : true); if (count == -1) { count = tokenizer.countTokens(); } String[] result = new String[count]; int i = 0; while (tokenizer.hasMoreTokens()) { String t = tokenizer.nextToken(); if (i < count) { if ((t.length() == 1) && (sep.indexOf(t) != -1)) { continue; } result[i++] = t; } else { result[count - 1] += t; } } return result; } /** * Splits a string into an array of string tokens. * * @param str * a string * @param sep * a string containing the string separators * @return an array of string tokens */ public static String[] split(String str, String sep) { return split(str, sep, -1); } /** * Replaces a substring within a string with another substring. * <p> * Note: Only the first occurrence of the substring will be replaced. * * @param str * a string * @param src * the substring to replace * @param tgt * the substring to use for the replacement * @return a string with the first substring replaced * * TODO: Review implementation. Call String.replaceFirst() instead? */ public static String replace(String str, String src, String tgt) { if ((str == null || str.length() == 0) || (src == null || src.length() == 0)) { return str; } String tmpStr = str; int index; while ((index = tmpStr.indexOf(src)) != -1) { tmpStr = tmpStr.substring(0, index) + tgt + tmpStr.substring(index + src.length()); } return tmpStr; } /** * Returns the integer value of a string. * * @param str * a string * @param defaultValue * a default integer value for the string * @return the integer value of the given string */ public static int getIntValue(String str, int defaultValue) { if (StrUtil.isBlank(str)) { return defaultValue; } try { return Integer.parseInt(str); } catch (NumberFormatException e) { return defaultValue; } } /** * Returns an array of bytes representing the UTF-8 encoding of a string. * * @param str * a string * @return a byte array containing the UTF-8 encoding of the given string */ public static byte[] getUTF8Bytes(String str) { char[] c = str.toCharArray(); int len = c.length; int count = 0; for (int i = 0; i < len; i++) { int ch = c[i]; if (ch <= 0x7f) { count++; } else if (ch <= 0x7ff) { count += 2; } else { count += 3; } } byte[] b = new byte[count]; int off = 0; for (int i = 0; i < len; i++) { int ch = c[i]; if (ch <= 0x7f) { b[off++] = (byte) ch; } else if (ch <= 0x7ff) { b[off++] = (byte) ((ch >> 6) | 0xc0); b[off++] = (byte) ((ch & 0x3f) | 0x80); } else { b[off++] = (byte) ((ch >> 12) | 0xe0); b[off++] = (byte) (((ch >> 6) & 0x3f) | 0x80); b[off++] = (byte) ((ch & 0x3f) | 0x80); } } return b; } /** * Returns the hexidecimal character representation for an integer. * * @param value * an integer * @return the hexidecimal representation */ private static char toHex(int value) { return HEX_DIGITS[(value & 0xF)]; } /** * Returns the escaped Unicode representation of a string. * * @param str * a string * @param skipASCII * if <code>true</code>, avoid escaping the ASCII characters * @return the escaped Unicode representation of the given string */ public static String toEscapedUnicode(String str, boolean skipASCII) { int len = str.length(); StringBuffer result = new StringBuffer(len * 2); for (int i = 0; i < len; i++) { char ch = str.charAt(i); if (skipASCII && ch < 0x007E) { result.append(ch); } else { result.append("\\u"); //$NON-NLS-1$ result.append(toHex((ch >> 12) & 0xF)); result.append(toHex((ch >> 8) & 0xF)); result.append(toHex((ch >> 4) & 0xF)); result.append(toHex(ch & 0xF)); } } return result.toString(); } /** * Returns the escaped HTML representation of a string. * * @param html * a HTML string * @return the escaped Unicode representation of the given HTML string */ public static String getEscapedHTML(String html) { if (html == null || html.length() == 0) { return ""; //$NON-NLS-1$ } StrUtilOptions options = getOptions(); StringBuffer result = new StringBuffer(); int length = html.length(); for (int i = 0; i < length; i++) { char ch = html.charAt(i); switch (ch) { case '%': if (i + 4 < length) { String hexStr = html.substring(i + 1, i + 5); boolean validHextStr = true; for (int j = 0; j < hexStr.length(); j++) { char c = hexStr.charAt(j); if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'f') || (c >= 'A' && c <= 'F'))) { validHextStr = false; break; } } if (options == null) { //code below will treat "%20de" as " de" //this may lose some double bytes character(e.g. Chinese), which start with %20, but keep all url links //so far open this convertion not only during library migration to support file like "my design.gif" if (/*during_migration && */validHextStr) { if (hexStr.startsWith("20")) { //$NON-NLS-1$ result.append("%20"); //$NON-NLS-1$ i += 2; break; } } } else { int ix = options.getRteUrlDecodingOption(); if (ix == 1) { validHextStr = false; } else if (ix == 2) { String key = getHexStr("%" + hexStr); if (key != null && options.getRteUrlDecodingHexMap().containsKey(key)) { validHextStr = false; } } } if (validHextStr) { try { int codePoint = Integer.parseInt(hexStr, 16); char[] c = UCharacter.toChars(codePoint); result.append(c); i += 4; break; } catch (NumberFormatException e) { // wasn't a valid hex string.. // fall through to the result.append(ch) } catch (Exception e) { CommonPlugin.getDefault().getLogger().logError(e); } } } result.append(ch); break; case '\u00a9': result.append(HTML_COPY); break; case '\u00ae': result.append(HTML_REG); break; case '\u20ac': result.append(HTML_EURO); break; case '\u2122': result.append(HTML_TRADEMARK); break; default: result.append(ch); break; } } return result.toString(); } /** * Returns the plain text from HTML text. * <p> * Note: All HTML tags will be stripped. * * @param html * the HTML text. * @return the plain text representation of the given HTML text */ public static String getPlainText(String html) { if (html == null) { return ""; //$NON-NLS-1$ } final Pattern p_plaintext_filter = Pattern.compile( "<[^>]*?>", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$ final Pattern p_plaintext_filter2 = Pattern.compile( "&.{1,5}[^;];", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$ final Pattern p_plaintext_filter3 = Pattern.compile( "\\s+", Pattern.CASE_INSENSITIVE | Pattern.DOTALL); //$NON-NLS-1$ String result = html.replaceAll(p_plaintext_filter.pattern(), " ") //$NON-NLS-1$ .replaceAll(p_plaintext_filter2.pattern(), " ") //$NON-NLS-1$ .replaceAll(p_plaintext_filter3.pattern(), " "); //$NON-NLS-1$ return result; } /** * Converts a string into a valid file name. * * @param str * a string * @return a valid file name derived from the given string */ public static String makeValidFileName(String str) { if (str == null) { return ""; //$NON-NLS-1$ } return getPlainText(str) .replaceAll(REGEXP_INVALID_FILENAME_CHARS, " ").trim(); //$NON-NLS-1$ } public static String removeSpecialCharacters(String str) { if (str == null) { return ""; //$NON-NLS-1$ } return getPlainText(str) .replaceAll(REGEXP_ANY_SPECIAL_CHARS, " ").trim(); //$NON-NLS-1$ } /** * Returns true if the path does not contain any invalid filename * characters. * * @param path * the file path * @return <code>true</code> if the given path contains only valid * filename characters */ public static boolean isValidPublishPath(String path) { // return path.replaceAll(invalidPublishPathCharsRegExp, // "").equals(path); if (Platform.getOS().equals(Platform.WS_WIN32)) { return path .replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS, "").equals(path); //$NON-NLS-1$ } // else default to Linux return path .replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX, "").equals(path); //$NON-NLS-1$ } /** * Returns the lower-case of str unless the current locale is German * * @param str * @return */ public static String toLower(String str) { if (Platform.getNL().startsWith("de")) { //$NON-NLS-1$ return str; } return str.toLowerCase(); } /** * escape the " and ' * * @param str * @return */ public static String escape(String str) { return str.replaceAll("'", "\\\\\'").replaceAll("\"", "\\\\\""); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ } /** * Converts a <code>List</code> to an String array. * * @param list * a <code>List</code> * @return a String array */ public static String[] convertListToStrArray(List list) { if (list != null) { int cnt = list.size(); String[] strArray = new String[cnt]; for (int i = 0; i < cnt; i++) { String str = (String) list.get(i); strArray[i] = new String(str); } return strArray; } else { return null; } } public static boolean isValidName(String name) { return name.replaceAll(REGEXP_ANY_SPECIAL_CHARS, "").equals(name); //$NON-NLS-1$ } /** * added some test code * * @param args */ public static void main(String[] args) { String text = "A&% `&# $~!@#$%^&*()_-+={}[\"]:|\\:;\"'<,>.?/ \t\r\nZ"; //$NON-NLS-1$ System.out.println("[" + text + "] <== text"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println("[" + text.replaceAll(REGEXP_ANY_SPECIAL_CHARS, " ") //$NON-NLS-1$ //$NON-NLS-2$ + "] <== All"); //$NON-NLS-1$ System.out.println("[" //$NON-NLS-1$ + text.replaceAll(REGEXP_INVALID_FILENAME_CHARS, " ") //$NON-NLS-1$ + "] <== File Name"); //$NON-NLS-1$ System.out.println("[" //$NON-NLS-1$ + text.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS, " ") //$NON-NLS-1$ + "] <== path"); //$NON-NLS-1$ System.out.println("[" //$NON-NLS-1$ + text.replaceAll(REGEXP_INVALID_PUBLISHED_PATH_CHARS_LINUX, " ") + "] <== path, Linux"); //$NON-NLS-1$ //$NON-NLS-2$ System.out.println(""); //$NON-NLS-1$ } /** * Converts the platform line-separator (\n or \n\r or \r) to <br/> * * @param text * @return */ public static String convertNewlinesToHTML(String text) { if (text != null) { text = text.replaceAll(LINE_FEED_REGEX, HTML_BREAK + LINE_FEED); text = text.replaceAll(ESCAPED_LINE_FEED_REGEX, HTML_BREAK + ESCAPED_LINE_FEED); } return text; } public static URI toURI(String pathStr) { if (pathStr != null && pathStr.length() > 0) { IPath path = Path.fromOSString(pathStr); try { return path.toFile().toURI(); } catch (Exception e) { CommonPlugin.getDefault().getLogger().logError(e); } } return null; } public static String escapeChar(String text, char c) { int i=text.indexOf(c); if ( i < 0 ) { return text; } int start = 0; StringBuffer buffer = new StringBuffer(); while ( i > start ) { buffer.append(text.substring(start, i)).append("\\"); //$NON-NLS-1$ start = i; i=text.indexOf(c, start+1); } buffer.append(text.substring(start)); return buffer.toString(); } public static String getHexStr(String str) { if (str.length() < 3) { return null; } if (str.charAt(0) != '%') { return null; } StringBuffer b = new StringBuffer(); b.append('%'); for (int i = 1 ; i <= 2; i++) { char c = str.charAt(i); if (c >= 'a' && c <= 'z') { c -= 'a'; c += 'A'; } boolean valid = (c >= '0' && c <= '9') || (c >= 'A' && c <= 'F'); if (!valid) { return null; } b.append(c); } return b.toString(); } public interface StrUtilOptions { int getRteUrlDecodingOption(); Map<String, String> getRteUrlDecodingHexMap(); } public static StrUtilOptions getOptions() { return options; } public static void setOptions(StrUtilOptions options) { StrUtil.options = options; } }