StringUtils.java example

Explorer
LimeWire-Pirate-Edition-master
package org.limewire.util;

import java.io.UnsupportedEncodingException;
import java.lang.reflect.Array;
import java.lang.reflect.Field;
import java.lang.reflect.Modifier;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.text.Collator;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.IdentityHashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.StringTokenizer;

/**
 * Provides static methods to split, check for substrings, change case and
 * compare strings, along with additional string utility methods.
 */
public class StringUtils {

    /**
     * Collator used for internationalization.
     */
    private volatile static Collator COLLATOR;

    private static final ThreadLocal<CharsetEncoder> ASCII_ENCODER = new ThreadLocal<CharsetEncoder>() {
        @Override
        protected CharsetEncoder initialValue() {
            return Charset.forName("ISO-8859-1").newEncoder();
        }
    };

    static {
        COLLATOR = Collator.getInstance(Locale.getDefault());
        COLLATOR.setDecomposition(Collator.FULL_DECOMPOSITION);
        COLLATOR.setStrength(Collator.PRIMARY);
    }

    /** Updates the locale that string-matching will use. */
    public static void setLocale(Locale locale) {
        Collator later = Collator.getInstance(locale);
        later.setDecomposition(Collator.FULL_DECOMPOSITION);
        later.setStrength(Collator.PRIMARY);
        COLLATOR = later;
    }

    /**
     * Returns true if input contains the given pattern, which may contain the
     * wildcard character '*'. TODO: need more formal definition. Examples:
     * 
     * <pre>
     *  StringUtils.contains("", "") ==> true
     *  StringUtils.contains("abc", "") ==> true
     *  StringUtils.contains("abc", "b") ==> true
     *  StringUtils.contains("abc", "d") ==> false
     *  StringUtils.contains("abcd", "a*d") ==> true
     *  StringUtils.contains("abcd", "*a**d*") ==> true
     *  StringUtils.contains("abcd", "d*a") ==> false
     * </pre>
     */
    public static boolean contains(String input, String pattern) {
        return contains(input, pattern, false);
    }

    /**
     * Exactly like contains(input, pattern), but case is ignored if
     * ignoreCase==true.
     */
    public static boolean contains(String input, String pattern, boolean ignoreCase) {
        // More efficient algorithms are possible, e.g. a modified version of
        // the
        // Rabin-Karp algorithm, but they are unlikely to be faster with such
        // short strings. Also, some contant time factors could be shaved by
        // combining the second FOR loop below with the subset(..) call, but
        // that
        // just isn't important. The important thing is to avoid needless
        // allocations.

        final int n = pattern.length();
        // Where to resume searching after last wildcard, e.g., just past
        // the last match in input.
        int last = 0;
        // For each token in pattern starting at i...
        for (int i = 0; i < n;) {
            // 1. Find the smallest j>i s.t. pattern[j] is space, *, or +.
            char c = ' ';
            int j = i;
            for (; j < n; j++) {
                char c2 = pattern.charAt(j);
                if (c2 == ' ' || c2 == '+' || c2 == '*') {
                    c = c2;
                    break;
                }
            }

            // 2. Match pattern[i..j-1] against input[last...].
            int k = subset(pattern, i, j, input, last, ignoreCase);
            if (k < 0)
                return false;

            // 3. Reset the starting search index if got ' ' or '+'.
            // Otherwise increment past the match in input.
            if (c == ' ' || c == '+')
                last = 0;
            else if (c == '*')
                last = k + j - i;
            i = j + 1;
        }
        return true;
    }

    public static boolean containsCharacters(String input, char[] chars) {
        char[] inputChars = input.toCharArray();
        Arrays.sort(inputChars);
        for (char c : chars) {
            if (Arrays.binarySearch(inputChars, c) >= 0)
                return true;
        }
        return false;
    }

    /**
     * @requires TODO3: fill this in
     * @effects returns the the smallest i>=bigStart s.t.
     *          little[littleStart...littleStop-1] is a prefix of big[i...] or
     *          -1 if no such i exists. If ignoreCase==false, case doesn't
     *          matter when comparing characters.
     */
    private static int subset(String little, int littleStart, int littleStop, String big,
            int bigStart, boolean ignoreCase) {
        // Equivalent to
        // return big.indexOf(little.substring(littleStart, littleStop),
        // bigStart);
        // but without an allocation.
        // Note special case for ignoreCase below.

        if (ignoreCase) {
            final int n = big.length() - (littleStop - littleStart) + 1;
            outerLoop: for (int i = bigStart; i < n; i++) {
                // Check if little[littleStart...littleStop-1] matches with
                // shift i
                final int n2 = littleStop - littleStart;
                for (int j = 0; j < n2; j++) {
                    char c1 = big.charAt(i + j);
                    char c2 = little.charAt(littleStart + j);
                    if (c1 != c2 && c1 != toOtherCase(c2)) // Ignore case. See
                        // below.
                        continue outerLoop;
                }
                return i;
            }
            return -1;
        } else {
            final int n = big.length() - (littleStop - littleStart) + 1;
            outerLoop: for (int i = bigStart; i < n; i++) {
                final int n2 = littleStop - littleStart;
                for (int j = 0; j < n2; j++) {
                    char c1 = big.charAt(i + j);
                    char c2 = little.charAt(littleStart + j);
                    if (c1 != c2) // Consider case. See above.
                        continue outerLoop;
                }
                return i;
            }
            return -1;
        }
    }

    /**
     * If c is a lower case ASCII character, returns Character.toUpperCase(c).
     * Else if c is an upper case ASCII character, returns
     * Character.toLowerCase(c), Else returns c. Note that this is <b>not
     * internationalized</b>; but it is fast.
     */
    public static char toOtherCase(char c) {
        int i = c;
        final int A = 'A'; // 65
        final int Z = 'Z'; // 90
        final int a = 'a'; // 97
        final int z = 'z'; // 122
        final int SHIFT = a - A;

        if (i < A) // non alphabetic
            return c;
        else if (i <= Z) // upper-case
            return (char) (i + SHIFT);
        else if (i < a) // non alphabetic
            return c;
        else if (i <= z) // lower-case
            return (char) (i - SHIFT);
        else
            // non alphabetic
            return c;
    }

    /**
     * Exactly like split(s, Character.toString(delimiter)).
     */
    public static String[] split(String s, char delimiter) {
        return split(s, Character.toString(delimiter));
    }

    /**
     * Returns the tokens of s delimited by the given delimiter, without
     * returning the delimiter. Repeated sequences of delimiters are treated as
     * one. Examples:
     * 
     * <pre>
     *    split("a//b/ c /","/")=={"a","b"," c "}
     *    split("a b", "/")=={"a b"}.
     *    split("///", "/")=={}.
     * </pre>
     * 
     * <b>Note:</b> whitespace is preserved if it is not part of the delimiter.
     * <p>
     * An older version of this trim()'ed each token of whitespace.
     */
    public static String[] split(String s, String delimiters) {
        // Tokenize s based on delimiters, adding to buffer.
        StringTokenizer tokenizer = new StringTokenizer(s, delimiters);
        List<String> tokens = new ArrayList<String>();
        while (tokenizer.hasMoreTokens())
            tokens.add(tokenizer.nextToken());

        return tokens.toArray(new String[tokens.size()]);
    }

    /**
     * Exactly like splitNoCoalesce(s, Character.toString(delimiter)).
     */
    public static String[] splitNoCoalesce(String s, char delimiter) {
        return splitNoCoalesce(s, Character.toString(delimiter));
    }

    /**
     * Similar to split(s, delimiters) except that subsequent delimiters are not
     * coalesced, so the returned array may contain empty strings. If s starts
     * (ends) with a delimiter, the returned array starts (ends) with an empty
     * strings. If s contains N delimiters, N+1 strings are always returned.
     * Examples:
     * 
     * <pre>
     *    split("a//b/ c /","/")=={"a","","b"," c ", ""}
     *    split("a b", "/")=={"a b"}.
     *    split("///", "/")=={"","","",""}.
     * </pre>
     * 
     * @return an array A s.t. s.equals(A[0]+d0+A[1]+d1+...+A[N]), where for all
     *         dI, dI.size()==1 && delimiters.indexOf(dI)>=0; and for all c in
     *         A[i], delimiters.indexOf(c)<0
     */
    public static String[] splitNoCoalesce(String s, String delimiters) {
        // Tokenize s based on delimiters, adding to buffer.
        StringTokenizer tokenizer = new StringTokenizer(s, delimiters, true);
        List<String> tokens = new ArrayList<String>();
        // True if last token was a delimiter. Initialized to true to force
        // an empty string if s starts with a delimiter.
        boolean gotDelimiter = true;
        while (tokenizer.hasMoreTokens()) {
            String token = tokenizer.nextToken();
            // Is token a delimiter?
            if (token.length() == 1 && delimiters.indexOf(token) >= 0) {
                // If so, add blank only if last token was a delimiter.
                if (gotDelimiter)
                    tokens.add("");
                gotDelimiter = true;
            } else {
                // If not, add "real" token.
                tokens.add(token);
                gotDelimiter = false;
            }
        }
        // Add trailing empty string UNLESS s is the empty string.
        if (gotDelimiter && !tokens.isEmpty())
            tokens.add("");

        return tokens.toArray(new String[tokens.size()]);
    }

    /**
     * This method will compare the two strings using full decomposition and
     * only look at primary differences The comparison will ignore case as well
     * as differences like FULLWIDTH vs HALFWIDTH.
     */
    public static int compareFullPrimary(String s1, String s2) {
        return COLLATOR.compare(s1, s2);
    }

    /**
     * Returns true iff <code>s</code> starts with prefix, ignoring case.
     * 
     * @return true iff s.toUpperCase().startsWith(prefix.toUpperCase())
     */
    public static boolean startsWithIgnoreCase(String s, String prefix) {
        final int pl = prefix.length();
        if (s.length() < pl)
            return false;
        for (int i = 0; i < pl; i++) {
            char sc = s.charAt(i);
            char pc = prefix.charAt(i);
            if (sc != pc) {
                sc = Character.toUpperCase(sc);
                pc = Character.toUpperCase(pc);
                if (sc != pc) {
                    sc = Character.toLowerCase(sc);
                    pc = Character.toLowerCase(pc);
                    if (sc != pc)
                        return false;
                }
            }
        }
        return true;
    }

    /**
     * Replaces all occurrences of old_str in str with new_str.
     * 
     * @param str the String to modify
     * @param old_str the String to be replaced
     * @param new_str the String to replace old_str with
     * 
     * @return the modified str.
     */
    public static String replace(String str, String old_str, String new_str) {
        int o = 0;
        StringBuilder buf = new StringBuilder();
        for (int i = str.indexOf(old_str); i > -1; i = str.indexOf(old_str, i + 1)) {
            if (i > o) {
                buf.append(str.substring(o, i));
            }
            buf.append(new_str);
            o = i + old_str.length();
        }
        buf.append(str.substring(o, str.length()));
        return buf.toString();
    }

    /**
     * Returns a truncated string, up to the maximum number of characters.
     */
    public static String truncate(final String string, final int maxLen) {
        if (string.length() <= maxLen)
            return string;
        else
            return string.substring(0, maxLen);
    }

    /**
     * Helper method to obtain the starting index of a substring within another
     * string, ignoring their case. This method is expensive because it has to
     * set each character of each string to lower case before doing the
     * comparison. Uses the default <code>Locale</code> for case conversion.
     * 
     * @param str the string in which to search for the <tt>substring</tt>
     *        argument
     * @param substring the substring to search for in <tt>str</tt>
     * @return if the <tt>substring</tt> argument occurs as a substring within
     *         <tt>str</tt>, then the index of the first character of the first
     *         such substring is returned; if it does not occur as a substring,
     *         -1 is returned
     */
    public static int indexOfIgnoreCase(String str, String substring) {
        return indexOfIgnoreCase(str, substring, Locale.getDefault());
    }

    /**
     * Helper method to obtain the starting index of a substring within another
     * string, ignoring their case. This method is expensive because it has to
     * set each character of each string to lower case before doing the
     * comparison.
     * 
     * @param str the string in which to search for the <tt>substring</tt>
     *        argument
     * @param substring the substring to search for in <tt>str</tt>
     * @param locale the <code>Locale</code> to use when converting the case of
     *        <code>str</code> and <code>substring</code>. This is necessary
     *        because case conversion is <code>Locale</code> specific.
     * @return if the <tt>substring</tt> argument occurs as a substring within
     *         <tt>str</tt>, then the index of the first character of the first
     *         such substring is returned; if it does not occur as a substring,
     *         -1 is returned
     */
    public static int indexOfIgnoreCase(String str, String substring, Locale locale) {
        // Look for the index after the expensive conversion to lower case.
        return str.toLowerCase(locale).indexOf(substring.toLowerCase(locale));
    }

    /**
     * Utility wrapper for getting a String object out of byte [] using the
     * ASCII encoding.
     */
    public static String getASCIIString(byte[] bytes) {
        return getEncodedString(bytes, "ISO-8859-1");
    }

    public static String getASCIIString(byte[] bytes, int offset, int length) {
        return new String(bytes, offset, length, Charset.forName("ISO-8859-1"));
    }

    /**
     * Utility wrapper for getting a String object out of byte [] using the
     * UTF-8 encoding.
     */
    public static String getUTF8String(byte[] bytes) {
        return getEncodedString(bytes, "UTF-8");
    }

    public static String getUTF8String(byte[] bytes, int offset, int length) {
        return new String(bytes, offset, length, Charset.forName("UTF-8"));
    }

    /**
     * @return a string with an encoding we know we support.
     */
    private static String getEncodedString(byte[] bytes, String encoding) {
        try {
            return new String(bytes, encoding);
        } catch (UnsupportedEncodingException impossible) {
            throw new RuntimeException(impossible);
        }
    }

    /**
     * Returns the tokens of array concatenated to a delimited by the given
     * delimiter. Examples:
     * 
     * <pre>
     *     explode({ "a", "b" }, " ") == "a b"
     *     explode({ "a", "b" }, "") == "ab"
     * </pre>
     */
    public static String explode(Object[] array, String delimeter) {
        StringBuilder sb = new StringBuilder();
        if (array.length > 0) {
            sb.append(array[0]);
            for (int i = 1; i < array.length; i++) {
                sb.append(delimeter);
                sb.append(array[i]);
            }
        }
        return sb.toString();
    }

    /**
     * Concatenates/joins the elements of <code>iteratble</code> together,
     * separated by <code>delimiter</code>
     * 
     * <pre>
     *     explode({ "a", "b" }, " ") == "a b"
     *     explode({ "a", "b" }, "") == "ab"
     * </pre>
     * 
     * @return "" if iterable doesn't have elements
     */
    public static <T> String explode(Iterable<T> iterable, String delimiter) {
        return explode(iterable, delimiter, Integer.MAX_VALUE, Integer.MAX_VALUE, "");      
    }
    
    /**
     * Concatenates/joins the elements of <code>iteratble</code> together,
     * separated by <code>delimiter</code>
     * 
     * @param <T>
     * @param iterable the list of items to join
     * @param delimiter the sequence to put between elements
     * @param maxRows the maximum number of elements to explode
     * @param maxCols the maximum number of characters in each element
     * @param moreRowsMsg the message to display if elements have been skipped
     * @return
     */
    public static <T> String explode(Iterable<T> iterable, String delimiter, int maxRows, int maxCols, String moreRowsMsg) {
        Iterator<T> iterator = iterable.iterator();
        if (!iterator.hasNext()) {
            return "";
        }
        
        int rowCount = 0;
        
        StringBuilder builder = new StringBuilder();
        while (iterator.hasNext()) {
            if (rowCount != 0) {
                builder.append(delimiter);
            }
            
            if (++rowCount > maxRows) {
                builder.append(moreRowsMsg);
                break;
            } else {
                String nextLine = String.valueOf(iterator.next());
                int length = Math.min(nextLine.length(), maxCols);
                builder.append(nextLine.substring(0, length));
                if (length == maxCols) {
                    builder.append("...");
                }
            }
        }
        return builder.toString();
    }
    
    /**
     * Concatenates the string representation of <code>object</code> 
     * <code>times</code> times together, separating it with <code>delimiter</code>.
     * 
     * @throws AssertionError whent times is smaller than 1
     */
    public static <T> String explode(T object, String delimiter, int times) {
        assert times >= 1;
        if (times == 1) {
            return String.valueOf(object);
        }
        StringBuilder builder = new StringBuilder();
        builder.append(object);
        for (int i = 1; i < times; i++) {
            builder.append(delimiter);
            builder.append(object);
        }
        return builder.toString();
    }

    /**
     * A wrapped version of {@link String#getBytes(String)} that changes the
     * unlikely encoding exception into a runtime exception. Returns empty array
     * if the passed in string is null.
     */
    public static byte[] toUTF8Bytes(String string) {
        if (string == null)
            return new byte[0];
        try {
            return string.getBytes("UTF-8");
        } catch (UnsupportedEncodingException ex) {
            throw new RuntimeException("UTF-8 not supported?", ex);
        }
    }

    public static byte[] toAsciiBytes(String string) {
        if (string == null)
            return new byte[0];
        try {
            return string.getBytes("US-ASCII");
        } catch (UnsupportedEncodingException ex) {
            throw new RuntimeException("US-ASCII not supported?", ex);
        }
    }

    /**
     * A wrapped version of {@link String#String(byte[], String)} that changes
     * the unlikely encoding exception into a runtime exception. Returns null if
     * the passed in array is null.
     */
    public static String toUTF8String(byte[] bytes) {
        if (bytes == null)
            return null;
        try {
            return new String(bytes, "UTF-8");
        } catch (UnsupportedEncodingException ex) {
            throw new RuntimeException("UTF-8 not supported?", ex);
        }
    }

    private static ThreadLocal<IdentityHashMap<Object, Object>> threadLocal = new ThreadLocal<IdentityHashMap<Object, Object>>();

    /**
     * Creates a string representation of the object <code>thiz</code>.
     * <p>
     * Can optionally be given a whitelist of fields that should be part of the
     * string output.
     * <p>
     * Note: Should synchronize calling method if the fields of the instance can
     * be modified by other threads.
     * <p>
     * Note: Creates a temporary copy of arrays of primitive elements.
     * <p>
     * Calls {@link Object#toString()} on fields.
     */
    public static String toString(Object thiz, Object... whitelist) {
        return toStringBlackAndWhite(thiz, Arrays.asList(whitelist), Collections.emptyList());
    }

    /**
     * Creates a string representation of the object <code>thiz</code>.
     * <p>
     * Can optionally be given a blacklist of fields that should not be part of
     * the string output.
     * <p>
     * Note: Should synchronize calling method if the fields of the instance can
     * be modified by other threads.
     * <p>
     * Note: Creates a temporary copy of arrays of primitive elements.
     * <p>
     * Calls {@link Object#toString()} on fields.
     */
    public static String toStringBlacklist(Object thiz, Object... blacklist) {
        return toStringBlackAndWhite(thiz, Collections.emptyList(), Arrays.asList(blacklist));
    }

    /**
     * Creates a string representation of the object <code>thiz</code>.
     * <p>
     * Can optionally be given a blacklist and whitelist of fields that should
     * not be part of the string output.
     * <p>
     * Note: Should synchronize calling method if the fields of the instance can
     * be modified by other threads.
     * <p>
     * Note: Creates a temporary copy of arrays of primitive elements.
     * <p>
     * Calls {@link Object#toString()} on fields.
     */
    private static String toStringBlackAndWhite(Object thiz,
            Collection<? extends Object> whitelist, Collection<? extends Object> blacklist) {
        boolean cleanUp = false;
        try {
            IdentityHashMap<Object, Object> handledObjects = threadLocal.get();
            if (handledObjects == null) {
                cleanUp = true;
                handledObjects = new IdentityHashMap<Object, Object>();
                threadLocal.set(handledObjects);
            }
            if (handledObjects.containsKey(thiz)) {
                return "circular structure";
            }
            handledObjects.put(thiz, thiz);
            Map<String, String> fields = new LinkedHashMap<String, String>();
            for (Field field : thiz.getClass().getDeclaredFields()) {
                try {
                    boolean accessible = field.isAccessible();
                    field.setAccessible(true);
                    Object value = field.get(thiz);
                    field.setAccessible(accessible);
                    if (!Modifier.isStatic(field.getModifiers()) && !blacklist.contains(value)
                            && (whitelist.isEmpty() || whitelist.contains(value))) {
                        if (value == null) {
                            fields.put(field.getName(), String.valueOf(value));
                        } else {
                            Class clazz = value.getClass();
                            if (clazz.isArray()) {
                                if (!clazz.getComponentType().isPrimitive()) {
                                    fields.put(field.getName(), String.valueOf(Arrays
                                            .asList((Object[]) value)));
                                } else {
                                    int length = Array.getLength(value);
                                    List<Object> copy = new ArrayList<Object>(length);
                                    for (int i = 0; i < length; i++) {
                                        copy.add(Array.get(value, i));
                                    }
                                    fields.put(field.getName(), String.valueOf(copy));
                                }
                            } else {
                                fields.put(field.getName(), String.valueOf(value));
                            }
                        }
                    }
                } catch (IllegalArgumentException e) {
                    e.printStackTrace();
                } catch (IllegalAccessException e) {
                    e.printStackTrace();
                }
            }
            return thiz.getClass().getSimpleName() + " " + fields.toString();
        } finally {
            if (cleanUp) {
                threadLocal.set(null);
            }
        }
    }

    /**
     * Returns true if the given string is null or its trimmed representation
     * is empty.
     */
    public static boolean isEmpty(String s) {
        if (s == null || s.length() == 0) {
            return true;
        }
        int length = s.length();
        for (int i = 0; i < length; i ++) {
            if (s.charAt(i) != ' ') {
                return false;
            }
        }
        return true;
    }

    /**
     * @return true if <code>sequence</code> can be encoded as ASCII only
     */
    public static boolean isAsciiOnly(CharSequence sequence) {
        return ASCII_ENCODER.get().canEncode(sequence);
    }

    /**
     * @return the number of occurrences of <code>c</code> in
     *         <code>sequence</code>
     */
    public static int countOccurrences(CharSequence sequence, char c) {
        int count = 0;
        for (int i = 0; i < sequence.length(); i++) {
            if (sequence.charAt(i) == c) {
                ++count;
            }
        }
        return count;
    }
    
    /**
     * Returns a hexString representation of the given byteArray.
     */
    public static String toHexString(byte[] block) {
        StringBuffer hexString = new StringBuffer(block.length * 2);
        char[] hexChars = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd',
                'e', 'f' };

        int high = 0;
        int low = 0;
        for (byte b : block) {
            high = ((b & 0xf0) >> 4);
            low = b & 0x0f;
            hexString.append(hexChars[high]);
            hexString.append(hexChars[low]);
        }

        return hexString.toString();
    }

    /**
     * Returns a byte array from the given hexString.
     * Assume string is a proper hexString.
     */
    public static byte[] fromHexString(String hexString) {
        byte[] bytes = new byte[hexString.length() / 2];
        for (int i = 0; i < bytes.length; i++) {
            bytes[i] = (byte) Integer.parseInt(hexString.substring(2 * i, 2 * i + 2), 16);
        }
        return bytes;
    }
}