/*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.frostwire.util;
import java.io.UnsupportedEncodingException;
import java.net.URLDecoder;
import java.net.URLEncoder;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;
/**
* Provides static methods to split, check for substrings, change case and
* compare strings, along with additional string utility methods.
*/
public class StringUtils {
/** Returns true if input contains the given pattern, which may contain the
* wildcard character '*'. TODO: need more formal definition. Examples:
*
* <pre>
* StringUtils.contains("", "") ==> true
* StringUtils.contains("abc", "") ==> true
* StringUtils.contains("abc", "b") ==> true
* StringUtils.contains("abc", "d") ==> false
* StringUtils.contains("abcd", "a*d") ==> true
* StringUtils.contains("abcd", "*a**d*") ==> true
* StringUtils.contains("abcd", "d*a") ==> false
* </pre>
*/
public static final boolean contains(String input, String pattern) {
return contains(input, pattern, false);
}
/** Exactly like contains(input, pattern), but case is ignored if
* ignoreCase==true. */
public static final boolean contains(String input, String pattern, boolean ignoreCase) {
//More efficient algorithms are possible, e.g. a modified version of the
//Rabin-Karp algorithm, but they are unlikely to be faster with such
//short strings. Also, some contant time factors could be shaved by
//combining the second FOR loop below with the subset(..) call, but that
//just isn't important. The important thing is to avoid needless
//allocations.
final int n = pattern.length();
//Where to resume searching after last wildcard, e.g., just past
//the last match in input.
int last = 0;
//For each token in pattern starting at i...
for (int i = 0; i < n;) {
//1. Find the smallest j>i s.t. pattern[j] is space, *, or +.
char c = ' ';
int j = i;
for (; j < n; j++) {
char c2 = pattern.charAt(j);
if (c2 == ' ' || c2 == '+' || c2 == '*') {
c = c2;
break;
}
}
//2. Match pattern[i..j-1] against input[last...].
int k = subset(pattern, i, j, input, last, ignoreCase);
if (k < 0)
return false;
//3. Reset the starting search index if got ' ' or '+'.
//Otherwise increment past the match in input.
if (c == ' ' || c == '+')
last = 0;
else if (c == '*')
last = k + j - i;
i = j + 1;
}
return true;
}
public static boolean containsCharacters(String input, char[] chars) {
char[] inputChars = input.toCharArray();
Arrays.sort(inputChars);
for (int i = 0; i < chars.length; i++) {
if (Arrays.binarySearch(inputChars, chars[i]) >= 0)
return true;
}
return false;
}
/**
* @requires TODO3: fill this in
* @effects returns the the smallest i>=bigStart
* s.t. little[littleStart...littleStop-1] is a prefix of big[i...]
* or -1 if no such i exists. If ignoreCase==false, case doesn't matter
* when comparing characters.
*/
private static final int subset(String little, int littleStart, int littleStop, String big, int bigStart, boolean ignoreCase) {
//Equivalent to
// return big.indexOf(little.substring(littleStart, littleStop), bigStart);
//but without an allocation.
//Note special case for ignoreCase below.
if (ignoreCase) {
final int n = big.length() - (littleStop - littleStart) + 1;
outerLoop: for (int i = bigStart; i < n; i++) {
//Check if little[littleStart...littleStop-1] matches with shift i
final int n2 = littleStop - littleStart;
for (int j = 0; j < n2; j++) {
char c1 = big.charAt(i + j);
char c2 = little.charAt(littleStart + j);
if (c1 != c2 && c1 != toOtherCase(c2)) //Ignore case. See below.
continue outerLoop;
}
return i;
}
return -1;
} else {
final int n = big.length() - (littleStop - littleStart) + 1;
outerLoop: for (int i = bigStart; i < n; i++) {
final int n2 = littleStop - littleStart;
for (int j = 0; j < n2; j++) {
char c1 = big.charAt(i + j);
char c2 = little.charAt(littleStart + j);
if (c1 != c2) //Consider case. See above.
continue outerLoop;
}
return i;
}
return -1;
}
}
/** If c is a lower case ASCII character, returns Character.toUpperCase(c).
* Else if c is an upper case ASCII character, returns Character.toLowerCase(c),
* Else returns c.
* Note that this is <b>not internationalized</b>; but it is fast.
*/
public static final char toOtherCase(char c) {
int i = c;
final int A = 'A'; //65
final int Z = 'Z'; //90
final int a = 'a'; //97
final int z = 'z'; //122
final int SHIFT = a - A;
if (i < A) //non alphabetic
return c;
else if (i <= Z) //upper-case
return (char) (i + SHIFT);
else if (i < a) //non alphabetic
return c;
else if (i <= z) //lower-case
return (char) (i - SHIFT);
else
//non alphabetic
return c;
}
/**
* Exactly like split(s, Character.toString(delimiter))
*/
public static String[] split(String s, char delimiter) {
return split(s, Character.toString(delimiter));
}
/**
* Returns the tokens of s delimited by the given delimiter, without
* returning the delimiter. Repeated sequences of delimiters are treated
* as one. Examples:
* <pre>
* split("a//b/ c /","/")=={"a","b"," c "}
* split("a b", "/")=={"a b"}.
* split("///", "/")=={}.
* </pre>
*
* <b>Note that whitespace is preserved if it is not part of the delimiter.</b>
* An older version of this trim()'ed each token of whitespace.
*/
public static String[] split(String s, String delimiters) {
//Tokenize s based on delimiters, adding to buffer.
StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
List<String> tokens = new ArrayList<String>();
while (tokenizer.hasMoreTokens())
tokens.add(tokenizer.nextToken());
return tokens.toArray(new String[0]);
}
/**
* Exactly like splitNoCoalesce(s, Character.toString(delimiter))
*/
public static String[] splitNoCoalesce(String s, char delimiter) {
return splitNoCoalesce(s, Character.toString(delimiter));
}
/**
* Similar to split(s, delimiters) except that subsequent delimiters are not
* coalesced, so the returned array may contain empty strings. If s starts
* (ends) with a delimiter, the returned array starts (ends) with an empty
* strings. If s contains N delimiters, N+1 strings are always returned.
* Examples:
*
* <pre>
* split("a//b/ c /","/")=={"a","","b"," c ", ""}
* split("a b", "/")=={"a b"}.
* split("///", "/")=={"","","",""}.
* </pre>
*
* @return an array A s.t. s.equals(A[0]+d0+A[1]+d1+...+A[N]), where
* for all dI, dI.size()==1 && delimiters.indexOf(dI)>=0; and for
* all c in A[i], delimiters.indexOf(c)<0
*/
public static String[] splitNoCoalesce(String s, String delimiters) {
//Tokenize s based on delimiters, adding to buffer.
StringTokenizer tokenizer = new StringTokenizer(s, delimiters, true);
List<String> tokens = new ArrayList<String>();
//True if last token was a delimiter. Initialized to true to force
//an empty string if s starts with a delimiter.
boolean gotDelimiter = true;
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
//Is token a delimiter?
if (token.length() == 1 && delimiters.indexOf(token) >= 0) {
//If so, add blank only if last token was a delimiter.
if (gotDelimiter)
tokens.add("");
gotDelimiter = true;
} else {
//If not, add "real" token.
tokens.add(token);
gotDelimiter = false;
}
}
//Add trailing empty string UNLESS s is the empty string.
if (gotDelimiter && !tokens.isEmpty())
tokens.add("");
return tokens.toArray(new String[0]);
}
/**
* Returns true iff s starts with prefix, ignoring case.
* @return true iff s.toUpperCase().startsWith(prefix.toUpperCase())
*/
public static boolean startsWithIgnoreCase(String s, String prefix) {
final int pl = prefix.length();
if (s.length() < pl)
return false;
for (int i = 0; i < pl; i++) {
char sc = s.charAt(i);
char pc = prefix.charAt(i);
if (sc != pc) {
sc = Character.toUpperCase(sc);
pc = Character.toUpperCase(pc);
if (sc != pc) {
sc = Character.toLowerCase(sc);
pc = Character.toLowerCase(pc);
if (sc != pc)
return false;
}
}
}
return true;
}
/**
* Replaces all occurrences of old_str in str with new_str
*
* @param str the String to modify
* @param old_str the String to be replaced
* @param new_str the String to replace old_str with
*
* @return the modified str.
*/
public static String replace(String str, String old_str, String new_str) {
int o = 0;
StringBuilder buf = new StringBuilder();
for (int i = str.indexOf(old_str); i > -1; i = str.indexOf(old_str, i + 1)) {
if (i > o) {
buf.append(str.substring(o, i));
}
buf.append(new_str);
o = i + old_str.length();
}
buf.append(str.substring(o, str.length()));
return buf.toString();
}
/**
* Returns a truncated string, up to the maximum number of characters
*/
public static String truncate(final String string, final int maxLen) {
if (string.length() <= maxLen)
return string;
else
return string.substring(0, maxLen);
}
/**
* Helper method to obtain the starting index of a substring within another
* string, ignoring their case. This method is expensive because it has
* to set each character of each string to lower case before doing the
* comparison. Uses the default <code>Locale</code> for case conversion.
*
* @param str the string in which to search for the <tt>substring</tt>
* argument
* @param substring the substring to search for in <tt>str</tt>
* @return if the <tt>substring</tt> argument occurs as a substring within
* <tt>str</tt>, then the index of the first character of the first such
* substring is returned; if it does not occur as a substring, -1 is
* returned
*/
public static int indexOfIgnoreCase(String str, String substring) {
return indexOfIgnoreCase(str, substring, Locale.getDefault());
}
/**
* Helper method to obtain the starting index of a substring within another
* string, ignoring their case. This method is expensive because it has
* to set each character of each string to lower case before doing the
* comparison.
*
* @param str the string in which to search for the <tt>substring</tt>
* argument
* @param substring the substring to search for in <tt>str</tt>
* @param locale the <code>Locale</code> to use when converting the
* case of <code>str</code> and <code>substring</code>. This is necessary because
* case conversion is <code>Locale</code> specific.
* @return if the <tt>substring</tt> argument occurs as a substring within
* <tt>str</tt>, then the index of the first character of the first such
* substring is returned; if it does not occur as a substring, -1 is
* returned
*/
public static int indexOfIgnoreCase(String str, String substring, Locale locale) {
// Look for the index after the expensive conversion to lower case.
return str.toLowerCase(locale).indexOf(substring.toLowerCase(locale));
}
/**
* Utility wrapper for getting a String object out of
* byte [] using the ascii encoding.
*/
public static String getASCIIString(byte[] bytes) {
return getEncodedString(bytes, "ISO-8859-1");
}
/**
* Utility wrapper for getting a String object out of
* byte [] using the UTF-8 encoding.
*/
public static String getUTF8String(byte[] bytes) {
return getEncodedString(bytes, "UTF-8");
}
/**
* @return a string with an encoding we know we support.
*/
private static String getEncodedString(byte[] bytes, String encoding) {
try {
return new String(bytes, encoding);
} catch (UnsupportedEncodingException impossible) {
throw new RuntimeException(impossible);
}
}
/**
* Returns the tokens of array concanated to a delimited by the given
* delimiter, without Examples:
*
* <pre>
* explode({ "a", "b" }, " ") == "a b"
* explode({ "a", "b" }, "") == "ab"
* </pre>
*/
public static String explode(String[] array, String delimeter) {
StringBuilder sb = new StringBuilder();
if (array.length > 0) {
sb.append(array[0]);
for (int i = 1; i < array.length; i++) {
sb.append(delimeter);
sb.append(array[i]);
}
}
return sb.toString();
}
/**
* Returns the tokens of a collection concanated to a delimited by the given
* delimiter.
*/
public static String explode(Collection<String> collection, String delimiter) {
StringBuilder sb = new StringBuilder();
if (!collection.isEmpty()) {
Iterator<String> i = collection.iterator();
sb.append(i.next());
while (i.hasNext()) {
sb.append(delimiter);
sb.append(i.next());
}
}
return sb.toString();
}
/**
* Check if a String is null or empty (the length is null).
*
* @param s the string to check
* @return true if it is null or empty
*/
public static boolean isNullOrEmpty(String s, boolean trim) {
return s == null || (trim ? s.trim().length() == 0 : s.length() == 0);
}
public static boolean isNullOrEmpty(String s) {
return isNullOrEmpty(s, false);
}
public static String removeDoubleSpaces(String s) {
return s != null ? s.replaceAll("\\s+", " ") : null;
}
public static String buildSet(List<?> list) {
StringBuilder sb = new StringBuilder("(");
int i = 0;
for (Object id : list) {
sb.append(id);
if (i++ < (list.size() - 1)) {
sb.append(",");
}
}
sb.append(")");
return sb.toString();
}
public static String getLocaleString(Map<String, String> strMap, String defaultStr) {
String localeLanguageCode = Locale.getDefault().getLanguage();
if (StringUtils.isNullOrEmpty(localeLanguageCode, true)) {
localeLanguageCode = "en";
}
String str = strMap.get(localeLanguageCode);
if (StringUtils.isNullOrEmpty(str, true)) {
str = defaultStr;
}
return str;
}
/**
* Like URLEncoder.encode, except translates spaces into %20 instead of +
*
* @param s
* @return
*/
public static String encodeUrl(String s) {
if (s == null) {
return "";
}
try {
return URLEncoder.encode(s, "UTF-8").replaceAll("\\+", "%20");
} catch (UnsupportedEncodingException e) {
return s.replaceAll("\\+", "%20");
}
}
public static String decodeUrl(String s) {
if (s == null) {
return "";
}
try {
return URLDecoder.decode(s, "UTF-8");
} catch (UnsupportedEncodingException e) {
return s;
}
}
}