package org.limewire.util;
import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;
/**
* Provides static methods to split, check for substrings, change case and
* compare strings, along with additional string utility methods.
*/
public class StringUtils {
/**
* Collator used for internationalization.
*/
private volatile static Collator COLLATOR;
private static final ThreadLocal<CharsetEncoder> ASCII_ENCODER = new ThreadLocal<CharsetEncoder>() {
@Override
protected CharsetEncoder initialValue() {
return Charset.forName("ISO-8859-1").newEncoder();
}
};
static {
COLLATOR = Collator.getInstance(Locale.getDefault());
COLLATOR.setDecomposition(Collator.FULL_DECOMPOSITION);
COLLATOR.setStrength(Collator.PRIMARY);
}
/** Updates the locale that string-matching will use. */
public static void setLocale(Locale locale) {
Collator later = Collator.getInstance(locale);
later.setDecomposition(Collator.FULL_DECOMPOSITION);
later.setStrength(Collator.PRIMARY);
COLLATOR = later;
}
/**
* Returns true if input contains the given pattern, which may contain the
* wildcard character '*'. TODO: need more formal definition. Examples:
*
* <pre>
* StringUtils.contains("", "") ==> true
* StringUtils.contains("abc", "") ==> true
* StringUtils.contains("abc", "b") ==> true
* StringUtils.contains("abc", "d") ==> false
* StringUtils.contains("abcd", "a*d") ==> true
* StringUtils.contains("abcd", "*a**d*") ==> true
* StringUtils.contains("abcd", "d*a") ==> false
* </pre>
*/
public static boolean contains(String input, String pattern) {
return contains(input, pattern, false);
}
/**
* Exactly like contains(input, pattern), but case is ignored if
* ignoreCase==true.
*/
public static boolean contains(String input, String pattern, boolean ignoreCase) {
// More efficient algorithms are possible, e.g. a modified version of
// the
// Rabin-Karp algorithm, but they are unlikely to be faster with such
// short strings. Also, some contant time factors could be shaved by
// combining the second FOR loop below with the subset(..) call, but
// that
// just isn't important. The important thing is to avoid needless
// allocations.
final int n = pattern.length();
// Where to resume searching after last wildcard, e.g., just past
// the last match in input.
int last = 0;
// For each token in pattern starting at i...
for (int i = 0; i < n;) {
// 1. Find the smallest j>i s.t. pattern[j] is space, *, or +.
char c = ' ';
int j = i;
for (; j < n; j++) {
char c2 = pattern.charAt(j);
if (c2 == ' ' || c2 == '+' || c2 == '*') {
c = c2;
break;
}
}
// 2. Match pattern[i..j-1] against input[last...].
int k = subset(pattern, i, j, input, last, ignoreCase);
if (k < 0)
return false;
// 3. Reset the starting search index if got ' ' or '+'.
// Otherwise increment past the match in input.
if (c == ' ' || c == '+')
last = 0;
else if (c == '*')
last = k + j - i;
i = j + 1;
}
return true;
}
public static boolean containsCharacters(String input, char[] chars) {
char[] inputChars = input.toCharArray();
Arrays.sort(inputChars);
for (char c : chars) {
if (Arrays.binarySearch(inputChars, c) >= 0)
return true;
}
return false;
}
/**
* @requires TODO3: fill this in
* @effects returns the the smallest i>=bigStart s.t.
* little[littleStart...littleStop-1] is a prefix of big[i...] or
* -1 if no such i exists. If ignoreCase==false, case doesn't
* matter when comparing characters.
*/
private static int subset(String little, int littleStart, int littleStop, String big,
int bigStart, boolean ignoreCase) {
// Equivalent to
// return big.indexOf(little.substring(littleStart, littleStop),
// bigStart);
// but without an allocation.
// Note special case for ignoreCase below.
if (ignoreCase) {
final int n = big.length() - (littleStop - littleStart) + 1;
outerLoop: for (int i = bigStart; i < n; i++) {
// Check if little[littleStart...littleStop-1] matches with
// shift i
final int n2 = littleStop - littleStart;
for (int j = 0; j < n2; j++) {
char c1 = big.charAt(i + j);
char c2 = little.charAt(littleStart + j);
if (c1 != c2 && c1 != toOtherCase(c2)) // Ignore case. See
// below.
continue outerLoop;
}
return i;
}
return -1;
} else {
final int n = big.length() - (littleStop - littleStart) + 1;
outerLoop: for (int i = bigStart; i < n; i++) {
final int n2 = littleStop - littleStart;
for (int j = 0; j < n2; j++) {
char c1 = big.charAt(i + j);
char c2 = little.charAt(littleStart + j);
if (c1 != c2) // Consider case. See above.
continue outerLoop;
}
return i;
}
return -1;
}
}
/**
* If c is a lower case ASCII character, returns Character.toUpperCase(c).
* Else if c is an upper case ASCII character, returns
* Character.toLowerCase(c), Else returns c. Note that this is <b>not
* internationalized</b>; but it is fast.
*/
public static char toOtherCase(char c) {
int i = c;
final int A = 'A'; // 65
final int Z = 'Z'; // 90
final int a = 'a'; // 97
final int z = 'z'; // 122
final int SHIFT = a - A;
if (i < A) // non alphabetic
return c;
else if (i <= Z) // upper-case
return (char) (i + SHIFT);
else if (i < a) // non alphabetic
return c;
else if (i <= z) // lower-case
return (char) (i - SHIFT);
else
// non alphabetic
return c;
}
/**
* Exactly like split(s, Character.toString(delimiter)).
*/
public static String[] split(String s, char delimiter) {
return split(s, Character.toString(delimiter));
}
/**
* Returns the tokens of s delimited by the given delimiter, without
* returning the delimiter. Repeated sequences of delimiters are treated as
* one. Examples:
*
* <pre>
* split("a//b/ c /","/")=={"a","b"," c "}
* split("a b", "/")=={"a b"}.
* split("///", "/")=={}.
* </pre>
*
* <b>Note:</b> whitespace is preserved if it is not part of the delimiter.
* <p>
* An older version of this trim()'ed each token of whitespace.
*/
public static String[] split(String s, String delimiters) {
// Tokenize s based on delimiters, adding to buffer.
StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
List<String> tokens = new ArrayList<String>();
while (tokenizer.hasMoreTokens())
tokens.add(tokenizer.nextToken());
return tokens.toArray(new String[tokens.size()]);
}
/**
* Exactly like splitNoCoalesce(s, Character.toString(delimiter)).
*/
public static String[] splitNoCoalesce(String s, char delimiter) {
return splitNoCoalesce(s, Character.toString(delimiter));
}
/**
* Similar to split(s, delimiters) except that subsequent delimiters are not
* coalesced, so the returned array may contain empty strings. If s starts
* (ends) with a delimiter, the returned array starts (ends) with an empty
* strings. If s contains N delimiters, N+1 strings are always returned.
* Examples:
*
* <pre>
* split("a//b/ c /","/")=={"a","","b"," c ", ""}
* split("a b", "/")=={"a b"}.
* split("///", "/")=={"","","",""}.
* </pre>
*
* @return an array A s.t. s.equals(A[0]+d0+A[1]+d1+...+A[N]), where for all
* dI, dI.size()==1 && delimiters.indexOf(dI)>=0; and for all c in
* A[i], delimiters.indexOf(c)<0
*/
public static String[] splitNoCoalesce(String s, String delimiters) {
// Tokenize s based on delimiters, adding to buffer.
StringTokenizer tokenizer = new StringTokenizer(s, delimiters, true);
List<String> tokens = new ArrayList<String>();
// True if last token was a delimiter. Initialized to true to force
// an empty string if s starts with a delimiter.
boolean gotDelimiter = true;
while (tokenizer.hasMoreTokens()) {
String token = tokenizer.nextToken();
// Is token a delimiter?
if (token.length() == 1 && delimiters.indexOf(token) >= 0) {
// If so, add blank only if last token was a delimiter.
if (gotDelimiter)
tokens.add("");
gotDelimiter = true;
} else {
// If not, add "real" token.
tokens.add(token);
gotDelimiter = false;
}
}
// Add trailing empty string UNLESS s is the empty string.
if (gotDelimiter && !tokens.isEmpty())
tokens.add("");
return tokens.toArray(new String[tokens.size()]);
}
/**
* This method will compare the two strings using full decomposition and
* only look at primary differences The comparison will ignore case as well
* as differences like FULLWIDTH vs HALFWIDTH.
*/
public static int compareFullPrimary(String s1, String s2) {
return COLLATOR.compare(s1, s2);
}
/**
* Returns true iff <code>s</code> starts with prefix, ignoring case.
*
* @return true iff s.toUpperCase().startsWith(prefix.toUpperCase())
*/
public static boolean startsWithIgnoreCase(String s, String prefix) {
final int pl = prefix.length();
if (s.length() < pl)
return false;
for (int i = 0; i < pl; i++) {
char sc = s.charAt(i);
char pc = prefix.charAt(i);
if (sc != pc) {
sc = Character.toUpperCase(sc);
pc = Character.toUpperCase(pc);
if (sc != pc) {
sc = Character.toLowerCase(sc);
pc = Character.toLowerCase(pc);
if (sc != pc)
return false;
}
}
}
return true;
}
/**
* Replaces all occurrences of old_str in str with new_str.
*
* @param str the String to modify
* @param old_str the String to be replaced
* @param new_str the String to replace old_str with
*
* @return the modified str.
*/
public static String replace(String str, String old_str, String new_str) {
int o = 0;
StringBuilder buf = new StringBuilder();
for (int i = str.indexOf(old_str); i > -1; i = str.indexOf(old_str, i + 1)) {
if (i > o) {
buf.append(str.substring(o, i));
}
buf.append(new_str);
o = i + old_str.length();
}
buf.append(str.substring(o, str.length()));
return buf.toString();
}
/**
* Returns a truncated string, up to the maximum number of characters.
*/
public static String truncate(final String string, final int maxLen) {
if (string.length() <= maxLen)
return string;
else
return string.substring(0, maxLen);
}
/**
* Helper method to obtain the starting index of a substring within another
* string, ignoring their case. This method is expensive because it has to
* set each character of each string to lower case before doing the
* comparison. Uses the default <code>Locale</code> for case conversion.
*
* @param str the string in which to search for the <tt>substring</tt>
* argument
* @param substring the substring to search for in <tt>str</tt>
* @return if the <tt>substring</tt> argument occurs as a substring within
* <tt>str</tt>, then the index of the first character of the first
* such substring is returned; if it does not occur as a substring,
* -1 is returned
*/
public static int indexOfIgnoreCase(String str, String substring) {
return indexOfIgnoreCase(str, substring, Locale.getDefault());
}
/**
* Helper method to obtain the starting index of a substring within another
* string, ignoring their case. This method is expensive because it has to
* set each character of each string to lower case before doing the
* comparison.
*
* @param str the string in which to search for the <tt>substring</tt>
* argument
* @param substring the substring to search for in <tt>str</tt>
* @param locale the <code>Locale</code> to use when converting the case of
* <code>str</code> and <code>substring</code>. This is necessary
* because case conversion is <code>Locale</code> specific.
* @return if the <tt>substring</tt> argument occurs as a substring within
* <tt>str</tt>, then the index of the first character of the first
* such substring is returned; if it does not occur as a substring,
* -1 is returned
*/
public static int indexOfIgnoreCase(String str, String substring, Locale locale) {
// Look for the index after the expensive conversion to lower case.
return str.toLowerCase(locale).indexOf(substring.toLowerCase(locale));
}
/**
* Utility wrapper for getting a String object out of byte [] using the
* ASCII encoding.
*/
public static String getASCIIString(byte[] bytes) {
return getEncodedString(bytes, "ISO-8859-1");
}
public static String getASCIIString(byte[] bytes, int offset, int length) {
return new String(bytes, offset, length, Charset.forName("ISO-8859-1"));
}
/**
* Utility wrapper for getting a String object out of byte [] using the
* UTF-8 encoding.
*/
public static String getUTF8String(byte[] bytes) {
return getEncodedString(bytes, "UTF-8");
}
public static String getUTF8String(byte[] bytes, int offset, int length) {
return new String(bytes, offset, length, Charset.forName("UTF-8"));
}
/**
* @return a string with an encoding we know we support.
*/
private static String getEncodedString(byte[] bytes, String encoding) {
try {
return new String(bytes, encoding);
} catch (UnsupportedEncodingException impossible) {
throw new RuntimeException(impossible);
}
}
/**
* Returns the tokens of array concatenated to a delimited by the given
* delimiter. Examples:
*
* <pre>
* explode({ "a", "b" }, " ") == "a b"
* explode({ "a", "b" }, "") == "ab"
* </pre>
*/
public static String explode(Object[] array, String delimeter) {
StringBuilder sb = new StringBuilder();
if (array.length > 0) {
sb.append(array[0]);
for (int i = 1; i < array.length; i++) {
sb.append(delimeter);
sb.append(array[i]);
}
}
return sb.toString();
}
/**
* Concatenates/joins the elements of <code>iteratble</code> together,
* separated by <code>delimiter</code>
*
* <pre>
* explode({ "a", "b" }, " ") == "a b"
* explode({ "a", "b" }, "") == "ab"
* </pre>
*
* @return "" if iterable doesn't have elements
*/
public static <T> String explode(Iterable<T> iterable, String delimiter) {
return explode(iterable, delimiter, Integer.MAX_VALUE, Integer.MAX_VALUE, "");
}
/**
* Concatenates/joins the elements of <code>iteratble</code> together,
* separated by <code>delimiter</code>
*
* @param <T>
* @param iterable the list of items to join
* @param delimiter the sequence to put between elements
* @param maxRows the maximum number of elements to explode
* @param maxCols the maximum number of characters in each element
* @param moreRowsMsg the message to display if elements have been skipped
* @return
*/
public static <T> String explode(Iterable<T> iterable, String delimiter, int maxRows, int maxCols, String moreRowsMsg) {
Iterator<T> iterator = iterable.iterator();
if (!iterator.hasNext()) {
return "";
}
int rowCount = 0;
StringBuilder builder = new StringBuilder();
while (iterator.hasNext()) {
if (rowCount != 0) {
builder.append(delimiter);
}
if (++rowCount > maxRows) {
builder.append(moreRowsMsg);
break;
} else {
String nextLine = String.valueOf(iterator.next());
int length = Math.min(nextLine.length(), maxCols);
builder.append(nextLine.substring(0, length));
if (length == maxCols) {
builder.append("...");
}
}
}
return builder.toString();
}
/**
* Concatenates the string representation of <code>object</code>
* <code>times</code> times together, separating it with <code>delimiter</code>.
*
* @throws AssertionError whent times is smaller than 1
*/
public static <T> String explode(T object, String delimiter, int times) {
assert times >= 1;
if (times == 1) {
return String.valueOf(object);
}
StringBuilder builder = new StringBuilder();
builder.append(object);
for (int i = 1; i < times; i++) {
builder.append(delimiter);
builder.append(object);
}
return builder.toString();
}
/**
* A wrapped version of {@link String#getBytes(String)} that changes the
* unlikely encoding exception into a runtime exception. Returns empty array
* if the passed in string is null.
*/
public static byte[] toUTF8Bytes(String string) {
if (string == null)
return new byte[0];
try {
return string.getBytes("UTF-8");
} catch (UnsupportedEncodingException ex) {
throw new RuntimeException("UTF-8 not supported?", ex);
}
}
public static byte[] toAsciiBytes(String string) {
if (string == null)
return new byte[0];
try {
return string.getBytes("US-ASCII");
} catch (UnsupportedEncodingException ex) {
throw new RuntimeException("US-ASCII not supported?", ex);
}
}
/**
* A wrapped version of {@link String#String(byte[], String)} that changes
* the unlikely encoding exception into a runtime exception. Returns null if
* the passed in array is null.
*/
public static String toUTF8String(byte[] bytes) {
if (bytes == null)
return null;
try {
return new String(bytes, "UTF-8");
} catch (UnsupportedEncodingException ex) {
throw new RuntimeException("UTF-8 not supported?", ex);
}
}
private static ThreadLocal<IdentityHashMap<Object, Object>> threadLocal = new ThreadLocal<IdentityHashMap<Object, Object>>();
/**
* Creates a string representation of the object <code>thiz</code>.
* <p>
* Can optionally be given a whitelist of fields that should be part of the
* string output.
* <p>
* Note: Should synchronize calling method if the fields of the instance can
* be modified by other threads.
* <p>
* Note: Creates a temporary copy of arrays of primitive elements.
* <p>
* Calls {@link Object#toString()} on fields.
*/
public static String toString(Object thiz, Object... whitelist) {
return toStringBlackAndWhite(thiz, Arrays.asList(whitelist), Collections.emptyList());
}
/**
* Creates a string representation of the object <code>thiz</code>.
* <p>
* Can optionally be given a blacklist of fields that should not be part of
* the string output.
* <p>
* Note: Should synchronize calling method if the fields of the instance can
* be modified by other threads.
* <p>
* Note: Creates a temporary copy of arrays of primitive elements.
* <p>
* Calls {@link Object#toString()} on fields.
*/
public static String toStringBlacklist(Object thiz, Object... blacklist) {
return toStringBlackAndWhite(thiz, Collections.emptyList(), Arrays.asList(blacklist));
}
/**
* Creates a string representation of the object <code>thiz</code>.
* <p>
* Can optionally be given a blacklist and whitelist of fields that should
* not be part of the string output.
* <p>
* Note: Should synchronize calling method if the fields of the instance can
* be modified by other threads.
* <p>
* Note: Creates a temporary copy of arrays of primitive elements.
* <p>
* Calls {@link Object#toString()} on fields.
*/
private static String toStringBlackAndWhite(Object thiz,
Collection<? extends Object> whitelist, Collection<? extends Object> blacklist) {
boolean cleanUp = false;
try {
IdentityHashMap<Object, Object> handledObjects = threadLocal.get();
if (handledObjects == null) {
cleanUp = true;
handledObjects = new IdentityHashMap<Object, Object>();
threadLocal.set(handledObjects);
}
if (handledObjects.containsKey(thiz)) {
return "circular structure";
}
handledObjects.put(thiz, thiz);
Map<String, String> fields = new LinkedHashMap<String, String>();
for (Field field : thiz.getClass().getDeclaredFields()) {
try {
boolean accessible = field.isAccessible();
field.setAccessible(true);
Object value = field.get(thiz);
field.setAccessible(accessible);
if (!Modifier.isStatic(field.getModifiers()) && !blacklist.contains(value)
&& (whitelist.isEmpty() || whitelist.contains(value))) {
if (value == null) {
fields.put(field.getName(), String.valueOf(value));
} else {
Class clazz = value.getClass();
if (clazz.isArray()) {
if (!clazz.getComponentType().isPrimitive()) {
fields.put(field.getName(), String.valueOf(Arrays
.asList((Object[]) value)));
} else {
int length = Array.getLength(value);
List<Object> copy = new ArrayList<Object>(length);
for (int i = 0; i < length; i++) {
copy.add(Array.get(value, i));
}
fields.put(field.getName(), String.valueOf(copy));
}
} else {
fields.put(field.getName(), String.valueOf(value));
}
}
}
} catch (IllegalArgumentException e) {
e.printStackTrace();
} catch (IllegalAccessException e) {
e.printStackTrace();
}
}
return thiz.getClass().getSimpleName() + " " + fields.toString();
} finally {
if (cleanUp) {
threadLocal.set(null);
}
}
}
/**
* Returns true if the given string is null or its trimmed representation
* is empty.
*/
public static boolean isEmpty(String s) {
if (s == null || s.length() == 0) {
return true;
}
int length = s.length();
for (int i = 0; i < length; i ++) {
if (s.charAt(i) != ' ') {
return false;
}
}
return true;
}
/**
* @return true if <code>sequence</code> can be encoded as ASCII only
*/
public static boolean isAsciiOnly(CharSequence sequence) {
return ASCII_ENCODER.get().canEncode(sequence);
}
/**
* @return the number of occurrences of <code>c</code> in
* <code>sequence</code>
*/
public static int countOccurrences(CharSequence sequence, char c) {
int count = 0;
for (int i = 0; i < sequence.length(); i++) {
if (sequence.charAt(i) == c) {
++count;
}
}
return count;
}
/**
* Returns a hexString representation of the given byteArray.
*/
public static String toHexString(byte[] block) {
StringBuffer hexString = new StringBuffer(block.length * 2);
char[] hexChars = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd',
'e', 'f' };
int high = 0;
int low = 0;
for (byte b : block) {
high = ((b & 0xf0) >> 4);
low = b & 0x0f;
hexString.append(hexChars[high]);
hexString.append(hexChars[low]);
}
return hexString.toString();
}
/**
* Returns a byte array from the given hexString.
* Assume string is a proper hexString.
*/
public static byte[] fromHexString(String hexString) {
byte[] bytes = new byte[hexString.length() / 2];
for (int i = 0; i < bytes.length; i++) {
bytes[i] = (byte) Integer.parseInt(hexString.substring(2 * i, 2 * i + 2), 16);
}
return bytes;
}
}