/* * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see <http://www.gnu.org/licenses/>. */ package com.frostwire.util; import java.io.UnsupportedEncodingException; import java.net.URLDecoder; import java.net.URLEncoder; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.StringTokenizer; /** * Provides static methods to split, check for substrings, change case and * compare strings, along with additional string utility methods. */ public class StringUtils { /** Returns true if input contains the given pattern, which may contain the * wildcard character '*'. TODO: need more formal definition. Examples: * * <pre> * StringUtils.contains("", "") ==> true * StringUtils.contains("abc", "") ==> true * StringUtils.contains("abc", "b") ==> true * StringUtils.contains("abc", "d") ==> false * StringUtils.contains("abcd", "a*d") ==> true * StringUtils.contains("abcd", "*a**d*") ==> true * StringUtils.contains("abcd", "d*a") ==> false * </pre> */ public static final boolean contains(String input, String pattern) { return contains(input, pattern, false); } /** Exactly like contains(input, pattern), but case is ignored if * ignoreCase==true. */ public static final boolean contains(String input, String pattern, boolean ignoreCase) { //More efficient algorithms are possible, e.g. a modified version of the //Rabin-Karp algorithm, but they are unlikely to be faster with such //short strings. Also, some contant time factors could be shaved by //combining the second FOR loop below with the subset(..) call, but that //just isn't important. The important thing is to avoid needless //allocations. final int n = pattern.length(); //Where to resume searching after last wildcard, e.g., just past //the last match in input. int last = 0; //For each token in pattern starting at i... for (int i = 0; i < n;) { //1. Find the smallest j>i s.t. pattern[j] is space, *, or +. char c = ' '; int j = i; for (; j < n; j++) { char c2 = pattern.charAt(j); if (c2 == ' ' || c2 == '+' || c2 == '*') { c = c2; break; } } //2. Match pattern[i..j-1] against input[last...]. int k = subset(pattern, i, j, input, last, ignoreCase); if (k < 0) return false; //3. Reset the starting search index if got ' ' or '+'. //Otherwise increment past the match in input. if (c == ' ' || c == '+') last = 0; else if (c == '*') last = k + j - i; i = j + 1; } return true; } public static boolean containsCharacters(String input, char[] chars) { char[] inputChars = input.toCharArray(); Arrays.sort(inputChars); for (int i = 0; i < chars.length; i++) { if (Arrays.binarySearch(inputChars, chars[i]) >= 0) return true; } return false; } /** * @requires TODO3: fill this in * @effects returns the the smallest i>=bigStart * s.t. little[littleStart...littleStop-1] is a prefix of big[i...] * or -1 if no such i exists. If ignoreCase==false, case doesn't matter * when comparing characters. */ private static final int subset(String little, int littleStart, int littleStop, String big, int bigStart, boolean ignoreCase) { //Equivalent to // return big.indexOf(little.substring(littleStart, littleStop), bigStart); //but without an allocation. //Note special case for ignoreCase below. if (ignoreCase) { final int n = big.length() - (littleStop - littleStart) + 1; outerLoop: for (int i = bigStart; i < n; i++) { //Check if little[littleStart...littleStop-1] matches with shift i final int n2 = littleStop - littleStart; for (int j = 0; j < n2; j++) { char c1 = big.charAt(i + j); char c2 = little.charAt(littleStart + j); if (c1 != c2 && c1 != toOtherCase(c2)) //Ignore case. See below. continue outerLoop; } return i; } return -1; } else { final int n = big.length() - (littleStop - littleStart) + 1; outerLoop: for (int i = bigStart; i < n; i++) { final int n2 = littleStop - littleStart; for (int j = 0; j < n2; j++) { char c1 = big.charAt(i + j); char c2 = little.charAt(littleStart + j); if (c1 != c2) //Consider case. See above. continue outerLoop; } return i; } return -1; } } /** If c is a lower case ASCII character, returns Character.toUpperCase(c). * Else if c is an upper case ASCII character, returns Character.toLowerCase(c), * Else returns c. * Note that this is <b>not internationalized</b>; but it is fast. */ public static final char toOtherCase(char c) { int i = c; final int A = 'A'; //65 final int Z = 'Z'; //90 final int a = 'a'; //97 final int z = 'z'; //122 final int SHIFT = a - A; if (i < A) //non alphabetic return c; else if (i <= Z) //upper-case return (char) (i + SHIFT); else if (i < a) //non alphabetic return c; else if (i <= z) //lower-case return (char) (i - SHIFT); else //non alphabetic return c; } /** * Exactly like split(s, Character.toString(delimiter)) */ public static String[] split(String s, char delimiter) { return split(s, Character.toString(delimiter)); } /** * Returns the tokens of s delimited by the given delimiter, without * returning the delimiter. Repeated sequences of delimiters are treated * as one. Examples: * <pre> * split("a//b/ c /","/")=={"a","b"," c "} * split("a b", "/")=={"a b"}. * split("///", "/")=={}. * </pre> * * <b>Note that whitespace is preserved if it is not part of the delimiter.</b> * An older version of this trim()'ed each token of whitespace. */ public static String[] split(String s, String delimiters) { //Tokenize s based on delimiters, adding to buffer. StringTokenizer tokenizer = new StringTokenizer(s, delimiters); List<String> tokens = new ArrayList<String>(); while (tokenizer.hasMoreTokens()) tokens.add(tokenizer.nextToken()); return tokens.toArray(new String[0]); } /** * Exactly like splitNoCoalesce(s, Character.toString(delimiter)) */ public static String[] splitNoCoalesce(String s, char delimiter) { return splitNoCoalesce(s, Character.toString(delimiter)); } /** * Similar to split(s, delimiters) except that subsequent delimiters are not * coalesced, so the returned array may contain empty strings. If s starts * (ends) with a delimiter, the returned array starts (ends) with an empty * strings. If s contains N delimiters, N+1 strings are always returned. * Examples: * * <pre> * split("a//b/ c /","/")=={"a","","b"," c ", ""} * split("a b", "/")=={"a b"}. * split("///", "/")=={"","","",""}. * </pre> * * @return an array A s.t. s.equals(A[0]+d0+A[1]+d1+...+A[N]), where * for all dI, dI.size()==1 && delimiters.indexOf(dI)>=0; and for * all c in A[i], delimiters.indexOf(c)<0 */ public static String[] splitNoCoalesce(String s, String delimiters) { //Tokenize s based on delimiters, adding to buffer. StringTokenizer tokenizer = new StringTokenizer(s, delimiters, true); List<String> tokens = new ArrayList<String>(); //True if last token was a delimiter. Initialized to true to force //an empty string if s starts with a delimiter. boolean gotDelimiter = true; while (tokenizer.hasMoreTokens()) { String token = tokenizer.nextToken(); //Is token a delimiter? if (token.length() == 1 && delimiters.indexOf(token) >= 0) { //If so, add blank only if last token was a delimiter. if (gotDelimiter) tokens.add(""); gotDelimiter = true; } else { //If not, add "real" token. tokens.add(token); gotDelimiter = false; } } //Add trailing empty string UNLESS s is the empty string. if (gotDelimiter && !tokens.isEmpty()) tokens.add(""); return tokens.toArray(new String[0]); } /** * Returns true iff s starts with prefix, ignoring case. * @return true iff s.toUpperCase().startsWith(prefix.toUpperCase()) */ public static boolean startsWithIgnoreCase(String s, String prefix) { final int pl = prefix.length(); if (s.length() < pl) return false; for (int i = 0; i < pl; i++) { char sc = s.charAt(i); char pc = prefix.charAt(i); if (sc != pc) { sc = Character.toUpperCase(sc); pc = Character.toUpperCase(pc); if (sc != pc) { sc = Character.toLowerCase(sc); pc = Character.toLowerCase(pc); if (sc != pc) return false; } } } return true; } /** * Replaces all occurrences of old_str in str with new_str * * @param str the String to modify * @param old_str the String to be replaced * @param new_str the String to replace old_str with * * @return the modified str. */ public static String replace(String str, String old_str, String new_str) { int o = 0; StringBuilder buf = new StringBuilder(); for (int i = str.indexOf(old_str); i > -1; i = str.indexOf(old_str, i + 1)) { if (i > o) { buf.append(str.substring(o, i)); } buf.append(new_str); o = i + old_str.length(); } buf.append(str.substring(o, str.length())); return buf.toString(); } /** * Returns a truncated string, up to the maximum number of characters */ public static String truncate(final String string, final int maxLen) { if (string.length() <= maxLen) return string; else return string.substring(0, maxLen); } /** * Helper method to obtain the starting index of a substring within another * string, ignoring their case. This method is expensive because it has * to set each character of each string to lower case before doing the * comparison. Uses the default <code>Locale</code> for case conversion. * * @param str the string in which to search for the <tt>substring</tt> * argument * @param substring the substring to search for in <tt>str</tt> * @return if the <tt>substring</tt> argument occurs as a substring within * <tt>str</tt>, then the index of the first character of the first such * substring is returned; if it does not occur as a substring, -1 is * returned */ public static int indexOfIgnoreCase(String str, String substring) { return indexOfIgnoreCase(str, substring, Locale.getDefault()); } /** * Helper method to obtain the starting index of a substring within another * string, ignoring their case. This method is expensive because it has * to set each character of each string to lower case before doing the * comparison. * * @param str the string in which to search for the <tt>substring</tt> * argument * @param substring the substring to search for in <tt>str</tt> * @param locale the <code>Locale</code> to use when converting the * case of <code>str</code> and <code>substring</code>. This is necessary because * case conversion is <code>Locale</code> specific. * @return if the <tt>substring</tt> argument occurs as a substring within * <tt>str</tt>, then the index of the first character of the first such * substring is returned; if it does not occur as a substring, -1 is * returned */ public static int indexOfIgnoreCase(String str, String substring, Locale locale) { // Look for the index after the expensive conversion to lower case. return str.toLowerCase(locale).indexOf(substring.toLowerCase(locale)); } /** * Utility wrapper for getting a String object out of * byte [] using the ascii encoding. */ public static String getASCIIString(byte[] bytes) { return getEncodedString(bytes, "ISO-8859-1"); } /** * Utility wrapper for getting a String object out of * byte [] using the UTF-8 encoding. */ public static String getUTF8String(byte[] bytes) { return getEncodedString(bytes, "UTF-8"); } /** * @return a string with an encoding we know we support. */ private static String getEncodedString(byte[] bytes, String encoding) { try { return new String(bytes, encoding); } catch (UnsupportedEncodingException impossible) { throw new RuntimeException(impossible); } } /** * Returns the tokens of array concanated to a delimited by the given * delimiter, without Examples: * * <pre> * explode({ "a", "b" }, " ") == "a b" * explode({ "a", "b" }, "") == "ab" * </pre> */ public static String explode(String[] array, String delimeter) { StringBuilder sb = new StringBuilder(); if (array.length > 0) { sb.append(array[0]); for (int i = 1; i < array.length; i++) { sb.append(delimeter); sb.append(array[i]); } } return sb.toString(); } /** * Returns the tokens of a collection concanated to a delimited by the given * delimiter. */ public static String explode(Collection<String> collection, String delimiter) { StringBuilder sb = new StringBuilder(); if (!collection.isEmpty()) { Iterator<String> i = collection.iterator(); sb.append(i.next()); while (i.hasNext()) { sb.append(delimiter); sb.append(i.next()); } } return sb.toString(); } /** * Check if a String is null or empty (the length is null). * * @param s the string to check * @return true if it is null or empty */ public static boolean isNullOrEmpty(String s, boolean trim) { return s == null || (trim ? s.trim().length() == 0 : s.length() == 0); } public static boolean isNullOrEmpty(String s) { return isNullOrEmpty(s, false); } public static String removeDoubleSpaces(String s) { return s != null ? s.replaceAll("\\s+", " ") : null; } public static String buildSet(List<?> list) { StringBuilder sb = new StringBuilder("("); int i = 0; for (Object id : list) { sb.append(id); if (i++ < (list.size() - 1)) { sb.append(","); } } sb.append(")"); return sb.toString(); } public static String getLocaleString(Map<String, String> strMap, String defaultStr) { String localeLanguageCode = Locale.getDefault().getLanguage(); if (StringUtils.isNullOrEmpty(localeLanguageCode, true)) { localeLanguageCode = "en"; } String str = strMap.get(localeLanguageCode); if (StringUtils.isNullOrEmpty(str, true)) { str = defaultStr; } return str; } /** * Like URLEncoder.encode, except translates spaces into %20 instead of + * * @param s * @return */ public static String encodeUrl(String s) { if (s == null) { return ""; } try { return URLEncoder.encode(s, "UTF-8").replaceAll("\\+", "%20"); } catch (UnsupportedEncodingException e) { return s.replaceAll("\\+", "%20"); } } public static String decodeUrl(String s) { if (s == null) { return ""; } try { return URLDecoder.decode(s, "UTF-8"); } catch (UnsupportedEncodingException e) { return s; } } }