/* * Copyright (c) 2009 Nokia Corporation and/or its subsidiary(-ies). * All rights reserved. * This component and the accompanying materials are made available * under the terms of the License "Eclipse Public License v1.0" * which accompanies this distribution, and is available * at the URL "http://www.eclipse.org/legal/epl-v10.html". * * Initial Contributors: * Nokia Corporation - initial contribution. * * Contributors: * * Description: * */ package ejs.base.utils; import java.util.*; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Text handling utilities * */ public class TextUtils { /** Pattern describing a run of illegal identifier characters */ private static final Pattern illegalIdentifierCharsPattern = Pattern.compile("[^A-Za-z0-9_]+"); //$NON-NLS-1$ /** * Return true if text is null or zero-length */ public static boolean isEmpty(String text) { return text == null || text.length() == 0; } /** * Return the given text or an empty string, never null */ public static String safeStr(String text) { return text != null? text : ""; } /** * Return string length or zero for null string * @param text * @return length, or 0 */ public static int strlen(String text) { return text != null? text.length() : 0; } /** * Strip the rightmost extension from a file name, * e.g. foo.a.b -> foo.a. * If the string is an extension only, e.g. .foo then * nothing is stripped. * @param text * @return new string, without trailing period */ public static String stripExtension(String text) { String result = text; if (text != null) { int pos = text.lastIndexOf('.'); if (pos >= 1) { result = text.substring(0, pos); } } return result; } /** * Get the rightmost extension from a file name. * If the string is an extension only, e.g. .foo then * the whole string is returned. If there's no '.' then * null is returned * @param text * @return the extension, without a period */ public static String getExtension(String text) { String result = null; if (text != null) { int pos = text.lastIndexOf('.'); if (pos >= 0) { result = text.substring(pos+1); } } return result; } /** * Clean up text from an XML node. Removes leading and trailing * text and converts all runs of whitespace to single spaces. * * @param text input string * @return cleaned-up string */ public static String cleanUpXMLText(String text) { if (text == null) return null; // map embedded whitespace to space characters Pattern patt = Pattern.compile("\\s+", Pattern.MULTILINE); //$NON-NLS-1$ Matcher matcher = patt.matcher(text); text = matcher.replaceAll(" "); //$NON-NLS-1$ // remove leading & trailing whitespace return text.trim(); } /** * Parse a string into an integer, returning a default * value instead of throwing an exception for invalid strings. */ public static int parseInt(String s, int defaultValue) { int result = defaultValue; // avoid spurious IllegalArgumentException if (s != null) { try { result = Integer.parseInt(s); } catch (NumberFormatException x) { } } return result; } /** * Parse a string into a float, returning a default * value instead of throwing an exception for invalid strings. */ public static float parseFloat(String s, float defaultValue) { float result = defaultValue; try { result = Float.parseFloat(s); } catch (NumberFormatException x) { } return result; } /** * Escape all the characters in the string which might be * used in regular expressions * @param text * @return escaped text */ public static String regexEscape(String text) { StringBuffer buffer = new StringBuffer(); char[] chars = text.toCharArray(); for (int i = 0; i < chars.length; i++) { if ("[{()}]\\+*^$|".indexOf(chars[i]) >= 0) { //$NON-NLS-1$ buffer.append('\\'); } buffer.append(chars[i]); } return buffer.toString(); } /** * Change all the newlines to "\n" * @param text * @return text with canonical newlines */ public static String canonicalizeNewlines(String text) { return canonicalizeNewlines(text, "\n"); //$NON-NLS-1$ } /** * Change all the newlines to the given eol sequence * @param text * @param eol * @return text with canonical newlines */ public static String canonicalizeNewlines(String text, String eol) { Matcher matcher = ANY_NEWLINE_MATCHING_PATTERN.matcher(text); return matcher.replaceAll(eol); //$NON-NLS-1$ } static final Pattern patternUnixNewlineMatch = Pattern.compile("\n"); //$NON-NLS-1$ static final Pattern patternUnixNewlinesMatch = Pattern.compile("\n+"); //$NON-NLS-1$ public static final String LINE_ENDING_PATTERN_STRING = "(\r\n|\r|\n)"; //$NON-NLS-1$ /** Pattern which matches a single instance of any newline pattern. */ public static final Pattern ANY_NEWLINE_MATCHING_PATTERN = Pattern.compile(LINE_ENDING_PATTERN_STRING); //$NON-NLS-1$ /** * Return a pattern which matches the given literal text, * except that the particular newline in use (represented by '\n' in the * literal) can match any newline style in the target. * @param lit * @param newlineRuns if true, match >=1 newlines for each \n, else match * only the exact number provided * @return pattern */ public static Pattern getNewlineIndependentPattern(String lit, boolean newlineRuns) { String rxLit = regexEscape(lit); Matcher matcher = patternUnixNewlineMatch.matcher(rxLit); String repl = LINE_ENDING_PATTERN_STRING; rxLit = matcher.replaceAll(newlineRuns ? repl + "+" : repl); //$NON-NLS-1$ return Pattern.compile(rxLit); } /** * Return the string in Titlecase, i.e. the first character is capitalized. * If the first character is not a letter, there is no change. * @param name * @return titlecased string */ public static String titleCase(String name) { if (name.length() == 0) return name; return Character.toUpperCase(name.charAt(0)) + name.substring(1); } /** * Return the string in inverse titlecase, i.e. the first character is lowercased. * If the first character is not a letter, there is no change. * @param name * @return inverse titlecased string */ public static String inverseTitleCase(String name) { if (name.length() == 0) return name; return Character.toLowerCase(name.charAt(0)) + name.substring(1); } /** * Tell if the given name is a legal identifier, according to the * common rules that it must start with an letter or underscore * and be followed by alphanumeric characters or underscores. * @param name * @return true: legal */ public static boolean isLegalIdentifier(String name) { return !illegalIdentifierCharsPattern.matcher(name).find(); } /** * Return a version of the name with all the runs of illegal * characters changed to underscores. * @param name * @return identifier-legal string */ public static String legalizeIdentifier(String name) { // replace runs of non-identifier characters with "_"; Matcher matcher = illegalIdentifierCharsPattern.matcher(name); name = matcher.replaceAll("_"); //$NON-NLS-1$ if (name.length() > 0 && Character.isDigit(name.charAt(0))) return "_" + name; //$NON-NLS-1$ else return name; } /** * Escape a string 'val' into C/C++ style, doubling * backslashes and escaping the quote character. * @param val incoming "pure" string * @param quote the quote character * @return the string with interesting characters escaped */ static public String escape(String val, char quote) { StringBuffer buff = new StringBuffer(); for (int i = 0; i < val.length(); i++) { char ch = val.charAt(i); if (ch == quote || ch == '\\') { buff.append('\\'); buff.append(ch); } else if (ch < 32) { switch (ch) { case '\t': buff.append("\\t"); //$NON-NLS-1$ break; case '\n': buff.append("\\n"); //$NON-NLS-1$ break; case '\r': buff.append("\\r"); //$NON-NLS-1$ break; case '\f': buff.append("\\f"); //$NON-NLS-1$ break; case '\b': buff.append("\\b"); //$NON-NLS-1$ break; default: buff.append("\\"); buff.append(String.format("%03o", Integer.valueOf(ch))); break; } } else if (isLineOrParaSeparator(ch)) { buff.append("\\u"); //$NON-NLS-1$ buff.append(Integer.toHexString(ch)); } else buff.append(ch); } return buff.toString(); } private static boolean isLineOrParaSeparator(char ch) { int type = Character.getType(ch); return ch > 127 && (type == Character.PARAGRAPH_SEPARATOR || type == Character.LINE_SEPARATOR); } /** * Quote a string, escaping it into C/C++ style * @param val incoming "pure" string * @param quote the quote character * @return an escaped string surrounded with quotes. */ static public String quote(String val, char quote) { StringBuffer buff = new StringBuffer(); buff.append(quote); buff.append(escape(val, quote)); buff.append(quote); return buff.toString(); } /** * Unescape escaped chars in a string 'val'. * */ static public String unescape(String val, char quote) { StringBuffer buff = new StringBuffer(); for (int i = 0; i < val.length(); i++) { char ch = val.charAt(i); if (ch == '\\' && i + 1 < val.length()) { ch = val.charAt(++i); if (Character.digit(ch, 8) >= 0 && i + 2 < val.length()) { String code = val.substring(i, i+3); i += 2; try { int octal = Integer.parseInt(code, 8); buff.append((char) octal); } catch (NumberFormatException x) { // UtilsPlugin.log(x); } } else { switch (ch) { case 't': buff.append('\t'); break; case 'n': buff.append('\n'); break; case 'r': buff.append('\r'); break; case 'f': buff.append('\f'); break; case 'b': buff.append('\b'); break; case 'u': try { String code = val.substring(i+1, i+5); buff.append((char) Integer.parseInt(code, 16)); i+=4; } catch (NumberFormatException e) { buff.append('\\'); buff.append('u'); } break; case '\\': buff.append('\\'); break; case '"': buff.append('"'); break; case '\'': buff.append('\''); break; default: buff.append('\\'); buff.append(ch); break; } } } else { buff.append(ch); } } return buff.toString(); } /** * Remove any surrounding quotes from a string * @param string * @param quote quoting character * @return updated string */ public static String unquote(String string, char quote) { if (string.length() >= 2 && string.charAt(0) == quote && string.charAt(string.length() - 1) == quote) //return unescape(string.substring(1, string.length() - 1), quote); return string.substring(1, string.length() - 1); else return string; } /** * Escape quoted strings in text 'val', doubling backslashes. * @param val incoming string * @param quote the quote character * @return the string with escapes inside quoted strings double-escaped */ static public String escapeStrings(String val, char quote) { StringBuffer buff = new StringBuffer(); boolean inQuote = false; for (int i = 0; i < val.length(); i++) { char ch = val.charAt(i); if (ch == quote) { // changing quote state inQuote = true; buff.append(ch); } else if (inQuote) { // handle escape chars inside string if (ch == '\\') { buff.append('\\'); buff.append(ch); if (i + 1 < val.length() && val.charAt(i+1) == quote) { // if quote is already escaped, pass it through (don't toggle quote state) buff.append(quote); i++; } } else { buff.append(ch); } } else { // not quoted buff.append(ch); } } return buff.toString(); } /** * Format a string for dumping to console, e.g. * by adding a newline whereever a newline escape appears. * @param string * @return formatted string */ public static String formatForDump(String string) { String[] lines = string.split("\\n"); //$NON-NLS-1$ StringBuilder builder = new StringBuilder(); boolean hadLine = false; for (int i = 0; i < lines.length; i++) { if (lines[i].length() > 0) { if (hadLine) builder.append('\n'); builder.append(lines[i]); builder.append("\\n"); //$NON-NLS-1$ hadLine = true; } } return builder.toString(); } /** * Return a string equivalent to the input, but with * all illegal XML characters escaped. */ public static String escapeXML(String s) { StringBuffer result = new StringBuffer(); int len = s != null? s.length() : 0; for (int i = 0; i < len; i++) { char ch = s.charAt(i); switch (ch) { case '&': result.append("&"); //$NON-NLS-1$ break; case '"': result.append("""); //$NON-NLS-1$ break; case '\'': result.append("'"); //$NON-NLS-1$ break; case '<': result.append("<"); //$NON-NLS-1$ break; case '>': result.append(">"); //$NON-NLS-1$ break; default: result.append(ch); } } return result.toString(); } /** * Format a list of items into a text string that can be displayed * in a dialog. * @param messages * @return string with embedded tabs and newlines */ public static String formatTabbedList(Collection<?> items) { StringBuilder builder = new StringBuilder(); for (Iterator<?> iter = items.iterator(); iter.hasNext();) { String item = iter.next().toString(); builder.append('\t'); builder.append(item); builder.append('\n'); } return builder.toString(); } /** * Title case every word in a sentence, except for conjunctions * @param string * @return String Titlecased Like This */ public static String titleCaseSentence(String string) { Pattern conjunction = Pattern.compile("(in|to|of|and|or|but|if|as)"); Pattern pattern = Pattern.compile("\\b"); String[] pieces = pattern.split(string); StringBuilder builder = new StringBuilder(); for (int i = 0; i < pieces.length; i++) { if (!conjunction.matcher(pieces[i]).matches()) pieces[i] = titleCase(pieces[i]); builder.append(pieces[i]); } return builder.toString(); } /** * @param string input string with '\' + return breaking lines * @return string with '\' + return breaks removed */ public static String catenateBrokenLines(String string) { if (string == null) return null; return string.replaceAll("\\\\" + LINE_ENDING_PATTERN_STRING, ""); } /** * Catenate all the Object.toStrings() together, with an optional string in between. */ public static String catenateStrings(Object[] objects, String separator) { StringBuilder builder = new StringBuilder(); boolean first = true; for (Object o : objects) { if (first) first = false; else if (separator != null) builder.append(separator); builder.append(o); } return builder.toString(); } /** * Catenate all the Object.toStrings() together, with an optional string in between. */ public static String catenateStrings(Collection<?> list, String separator) { StringBuilder builder = new StringBuilder(); boolean first = true; for (Object o : list) { if (first) first = false; else if (separator != null) builder.append(separator); builder.append(o); } return builder.toString(); } /** * Create a reasonable human-readable label from an identifier. * @param name * @return */ public static String createLabelFromIdentifier(String name) { if (name == null) return ""; // strip qualifier and dotted prefix int idx; idx = name.indexOf(':'); if (idx >= 0) name = name.substring(0, idx); idx = name.lastIndexOf('.'); if (idx >= 0) name = name.substring(idx + 1); if (name.isEmpty()) return ""; StringBuilder label = new StringBuilder(); StringBuilder word = new StringBuilder(); char lastCh = 0; for (char ch : name.toCharArray()) { if (ch == '_') { label.append(fixupWordCapitalization(word.toString(), label.length() == 0)); word.setLength(0); label.append(' '); lastCh = ch; continue; } else if (lastCh != 0 && Character.isUpperCase(ch) && !Character.isUpperCase(lastCh)) { label.append(fixupWordCapitalization(word.toString(), label.length() == 0)); word.setLength(0); label.append(' '); } word.append(Character.toLowerCase(ch)); lastCh = ch; } label.append(fixupWordCapitalization(word.toString(), label.length() == 0)); return label.toString(); } /** * @param string * @return */ private static Object fixupWordCapitalization(String string, boolean first) { if (string.matches("if|of|the|a|an")) return string; // acronyms boolean plural = false; if (string.endsWith("s")) { string = string.substring(0, string.length() - 1); plural = true; } if (string.matches("(?i)uri|url|xml")) { string = string.toUpperCase(); } else { string = titleCase(string); } if (plural) { return string + "s"; } return string; } /** * @param string * @param count * @return */ public static String repeatString(String string, int count) { if (count == 0) return ""; else if (count == 1) return string; StringBuilder sb = new StringBuilder(); while (count-- > 0) sb.append(string); return sb.toString(); } public static String binaryToString(byte[] content) { return binaryToString(content, 0, content.length); } public static String binaryToString(byte[] content, int offset, int length) { StringBuilder sb = new StringBuilder(); for (int o = 0; o < length; o++) { sb.append(HexUtils.toHex2(content[offset + o])); } return sb.toString(); } }