/* ********************************************************************** ** ** Copyright notice ** ** ** ** (c) 2005-2009 RSSOwl Development Team ** ** http://www.rssowl.org/ ** ** ** ** All rights reserved ** ** ** ** This program and the accompanying materials are made available under ** ** the terms of the Eclipse Public License v1.0 which accompanies this ** ** distribution, and is available at: ** ** http://www.rssowl.org/legal/epl-v10.html ** ** ** ** A copy is found in the file epl-v10.html and important notices to the ** ** license from the team is found in the textfile LICENSE.txt distributed ** ** in this package. ** ** ** ** This copyright notice MUST APPEAR in all copies of the file! ** ** ** ** Contributors: ** ** RSSOwl Development Team - initial API and implementation ** ** ** ** ********************************************************************** */ package org.rssowl.core.util; import org.apache.lucene.analysis.StopAnalyzer; import org.rssowl.core.connection.MonitorCanceledException; import org.rssowl.core.internal.Activator; import java.io.IOException; import java.io.Reader; import java.io.StringReader; import java.util.ArrayList; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; /** * Utility Class for working with <code>Strings</code>. * * @author bpasero */ public class StringUtils { private static final String EMPTY_STRING = ""; //$NON-NLS-1$ /** * An array containing some common English words that are not usually useful * for searching. */ public static final String[] ENGLISH_STOP_WORDS = StopAnalyzer.ENGLISH_STOP_WORDS; /* This utility class constructor is hidden */ private StringUtils() { // Protect default constructor } /** * If <code>string</code> is <code>null</code>, returns <code>null</code>. * Otherwise, returns the result of {@link String#trim()}. * * @param string String to be trimmed or <code>null</code>. * @return <code>null</code> or trimmed String. */ public static final String safeTrim(String string) { return string == null ? null : string.trim(); } /** * Returns TRUE in case the given String has a value that is not "" or * <code>NULL</code>. * * @param str The String to check * @return boolean TRUE in case the String has an value not "" or * <code>NULL</code>. */ public static boolean isSet(String str) { return (str != null && str.length() > 0); } /** * This returns a new string with all surrounding whitespace removed and * internal whitespace normalized to a single space. If only whitespace * exists, the empty string is returned. * <p> * Per XML 1.0 Production 3 whitespace includes: #x20, #x9, #xD, #xA * </p> * <p> * See <code>org.jdom.Text</code> * </p> * * @param str string to be normalized. * @return normalized string or empty string */ public static String normalizeString(String str) { if (str == null) return EMPTY_STRING; char[] n = new char[str.length()]; boolean white = true; int pos = 0; for (int i = 0, c = str.length(); i < c; ++i) { char element = str.charAt(i); if (" \t\n\r".indexOf(element) != -1) { //$NON-NLS-1$ if (!white) { n[pos++] = ' '; white = true; } } else { n[pos++] = element; white = false; } } if (white && pos > 0) pos--; return new String(n, 0, pos); } /** * Trim the given String to the given Limit. Make it human readable, such as * it is tried to trim the text after a whitespace, in order to keep entire * words. * * @param str The String to Trim * @param limit The max. number of characters * @return String The human readable trimmed String */ public static String smartTrim(String str, int limit) { /* String does not contain a whitespace or is small */ if (str.indexOf(' ') == -1 || str.length() < limit) return str; /* Substring to Limit */ str = str.substring(0, limit); /* Cut after a whitespace */ for (int a = limit - 1; a >= 0; a--) if (str.charAt(a) == ' ') return str.substring(0, a) + "..."; //$NON-NLS-1$ return str; } /** * Remove HTML tags from the given String and replace Entities with their * corresponding values. * * @param str The String to remove the Tags from * @param replaceEntities <code>true</code> to replace entities and * <code>false</code> otherwise. * @return Returns a String that is no longer containing any HTML or Entities. */ public static String stripTags(String str, boolean replaceEntities) { return filterTags(str, null, replaceEntities); } /** * Remove HTML tags from the given String and replace Entities with their * corresponding values. If the set of Strings is provided (not null), only * these tags will be stripped. * * @param str The String to remove the Tags from * @param tags the set of HTML tags to strip out of the given String or * <code>null</code> to strip all HTML tags. * @param replaceEntities <code>true</code> to replace entities and * <code>false</code> otherwise. * @return the String with HTML Tags and Entities replaced. */ public static String filterTags(String str, Set<String> tags, boolean replaceEntities) { /* Check String first */ if (!StringUtils.isSet(str)) return str; Reader stripReader; if (tags == null || tags.isEmpty()) stripReader = new HTMLStripReader(new StringReader(str), replaceEntities); else stripReader = new HTMLFilterReader(new StringReader(str), tags, replaceEntities); try { return readString(stripReader); } catch (IOException e) { if (!(e instanceof MonitorCanceledException)) Activator.getDefault().logError(e.getMessage(), e); return str; } finally { try { stripReader.close(); } catch (IOException e) { if (!(e instanceof MonitorCanceledException)) Activator.getDefault().logError(e.getMessage(), e); } } } /** * Checks whether the given String is of the Format "R,G,B" with each of the * components being an parseable Integer. * * @param rgb The String to check for a Valid RGB Value. * @return <code>TRUE</code> if the given String is a valid RGB Value. */ public static boolean isValidRGB(String rgb) { if (rgb == null) return true; String split[] = rgb.split(","); //$NON-NLS-1$ if (split.length != 3) return false; try { Integer.parseInt(split[0]); Integer.parseInt(split[1]); Integer.parseInt(split[2]); } catch (NumberFormatException e) { return false; } return true; } /** * This method does exactly the same as String.replaceAll() with the * difference that no regular expressions are used to perform the replacement. * * @param strings The source Strings to search and replace * @param search The search term that should get replaced * @param replace The value that replaces the search term * @return Set The new Strings with all replaced search terms */ public static Set<String> replaceAll(Set<String> strings, String search, String replace) { Set<String> replacedStrings = new HashSet<String>(strings.size()); for (String string : strings) { replacedStrings.add(replaceAll(string, search, replace)); } return replacedStrings; } /** * This method does exactly the same as String.replaceAll() with the * difference that no regular expressions are used to perform the replacement. * * @param str The source String to search and replace * @param search The search term that should get replaced * @param replace The value that replaces the search term * @return String The new String with all replaced search terms */ public static String replaceAll(String str, String search, String replace) { int start = 0; int pos; StringBuilder result = null; while ((pos = str.indexOf(search, start)) >= 0) { if (result == null) result = new StringBuilder(str.length()); result.append(str.substring(start, pos)); result.append(replace); start = pos + search.length(); } if (result != null) result.append(str.substring(start)); return result != null ? result.toString() : str; } /** * Convert a String to int and return <code>-1</code> in case the input String * is not a number. * * @param str The String to convert. * @return int The converted integer or <code>-1</code> in case the input * String is not a number. */ public static int stringToInt(String str) { try { return Integer.parseInt(str); } catch (NumberFormatException e) { return -1; } } /** * Tokenizes the given String at a whitespace character, but keeps phrases * surrounded by quotes together. * * @param str the String to tokenize. * @param keepQuotes if <code>true</code> the quotes will be part of the token * and <code>false</code> if to remove them. * @return A list of tokens, including phrases surrounded by quotes if any. */ public static List<String> tokenizePhraseAware(String str, boolean keepQuotes) { if (!StringUtils.isSet(str)) return Collections.emptyList(); str = normalizeString(str); boolean inQuotes = false; List<String> tokens = new ArrayList<String>(1); StringBuilder builder = new StringBuilder(); for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); /* Opening Quote */ if (c == '"' && !inQuotes) { inQuotes = true; if (keepQuotes) builder.append(c); } /* Closing Quote */ else if (c == '"' && inQuotes) { inQuotes = false; if (keepQuotes) builder.append(c); } /* Whitespace outside Quotes */ else if (c == ' ' && !inQuotes) { tokens.add(builder.toString()); builder.setLength(0); } /* Whitespace inside Quotes */ else if (c == ' ' && inQuotes) { builder.append(c); } /* Any other Character */ else { builder.append(c); } } if (builder.length() > 0) tokens.add(builder.toString()); return tokens; } /** * @param str the string to escape for use in HTML. * @return the escaped string that can safely be used in HTML. */ public static String htmlEscape(String str) { if (!StringUtils.isSet(str)) return str; str = StringUtils.replaceAll(str, "<", "<"); //$NON-NLS-1$ //$NON-NLS-2$ str = StringUtils.replaceAll(str, ">", ">"); //$NON-NLS-1$ //$NON-NLS-2$ return str; } /** * @param reader the {@link Reader} to read from. * @return a {@link String} as result from reading. * @throws IOException in case of an error. */ public static String readString(Reader reader) throws IOException { StringBuilder str = new StringBuilder(); int len = 0; char[] buf = new char[1000]; while ((len = reader.read(buf)) != -1) str.append(buf, 0, len); return str.toString(); } /** * @param str the {@link String} to check for. * @return <code>true</code> if the provided {@link String} is a phrase search * and <code>false</code> otherwise. */ public static boolean isPhraseSearch(String str) { if (!StringUtils.isSet(str)) return false; str = str.trim(); /* Check for Phrase Quotes */ return (str.startsWith("\"") && str.endsWith("\"") && str.length() != 1); //$NON-NLS-1$ //$NON-NLS-2$ } /** * @param str the {@link String} to check for. * @return <code>true</code> if the provided {@link String} contains special * characters and phrase search tokens and <code>false</code> otherwise. */ public static boolean isPhraseSearchWithWildcardToken(String str) { if (!isPhraseSearch(str)) return false; /* Check for Wildcard Chars */ return str.contains("*") || str.contains("?"); //$NON-NLS-1$ //$NON-NLS-2$ } /** * @param str the {@link String} to check for. * @return <code>true</code> if the provided {@link String} only contains of * wildcards and <code>false</code> otherwise. */ public static boolean isWildcardsOnly(String str) { if (!StringUtils.isSet(str)) return false; str = str.trim(); for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); /* Non Wildcard Found */ if (c != '*' && c != '?') return false; } return true; } /** * @param str the {@link String} to check for. * @return <code>true</code> if the provided {@link String} contains special * characters and wildcard tokens and <code>false</code> otherwise. */ public static boolean isSpecialCharacterSearchWithWildcardToken(String str) { if (!StringUtils.isSet(str)) return false; str = str.trim(); boolean containsSpecialChars = false; boolean containsWildcards = false; for (int i = 0; i < str.length(); i++) { char c = str.charAt(i); /* Wildcard Found */ if (c == '*' || c == '?') { containsWildcards = true; if (containsSpecialChars) return true; continue; } /* Dot and At are working ok (exceptions) */ if (c == '.' || c == '@') continue; /* Special Char Found */ if ((c > 32 && c < 48) || // !, ", #, $, %, &, ', (, ), *, +, ,, -, ., / (c > 57 && c < 65) || // :, ;, <, =, >, ?, @ (c > 90 && c < 97) || // [, \, ], ^, _, ` (c > 122 && c < 127) || // {, |, }, ~ (String.valueOf(c).equals("ยง")) //Not part of ASCII //$NON-NLS-1$ ) { containsSpecialChars = true; if (containsWildcards) return true; } } return false; } /** * @param str the {@link String} to check for. * @return <code>true</code> in case the provided {@link String} supports * trailing wildcards and <code>false</code> otherwise. */ public static boolean supportsTrailingWildcards(String str) { if (StringUtils.isSet(str) && !str.endsWith("*") && !str.endsWith("?") && !StringUtils.isPhraseSearch(str)) { //$NON-NLS-1$ //$NON-NLS-2$ str = str + "*"; //$NON-NLS-1$ if (!StringUtils.isSpecialCharacterSearchWithWildcardToken(str)) return true; } return false; } }