StringUtils.java example

Explorer
Pydev-master
- plugins
/******************************************************************************
* Copyright (C) 2012-2013  Fabio Zadrozny and others
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
*     Fabio Zadrozny <fabiofz@gmail.com>    - initial API and implementation
*     Jonah Graham <jonah@kichwacoders.com> - ongoing maintenance
******************************************************************************/
package org.python.pydev.shared_core.string;

import java.io.BufferedReader;
import java.io.Reader;
import java.io.StringReader;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
import java.util.stream.Collectors;

import javax.swing.text.Document;
import javax.swing.text.EditorKit;
import javax.swing.text.html.HTMLEditorKit;

import org.eclipse.core.runtime.Assert;
import org.python.pydev.shared_core.cache.Cache;
import org.python.pydev.shared_core.cache.LRUCache;
import org.python.pydev.shared_core.log.Log;
import org.python.pydev.shared_core.structure.Tuple;

public final class StringUtils {

    public static final String EMPTY = "";

    /**
     * @author fabioz
     *
     */
    private static final class IterLines implements Iterator<String> {
        private final String string;
        private final int len;
        private int i;
        private boolean calculatedNext;
        private boolean hasNext;
        private String next;

        private IterLines(String string) {
            this.string = string;
            this.len = string.length();
        }

        @Override
        public boolean hasNext() {
            if (!calculatedNext) {
                calculatedNext = true;
                hasNext = calculateNext();
            }
            return hasNext;
        }

        private boolean calculateNext() {
            next = null;
            char c;
            int start = i;

            for (; i < len; i++) {
                c = string.charAt(i);

                if (c == '\r') {
                    if (i < len - 1 && string.charAt(i + 1) == '\n') {
                        i++;
                    }
                    i++;
                    next = string.substring(start, i);
                    return true;

                } else if (c == '\n') {
                    i++;
                    next = string.substring(start, i);
                    return true;
                }
            }
            if (start != i) {
                next = string.substring(start, i);
                i++;
                return true;
            }
            return false;
        }

        @Override
        public String next() {
            if (!hasNext()) {
                throw new NoSuchElementException();
            }
            String n = next;
            calculatedNext = false;
            next = null;
            return n;
        }

        @Override
        public void remove() {
            throw new UnsupportedOperationException();
        }
    }

    /**
     * Splits the given string in a list where each element is a line.
     *
     * @param string string to be split.
     * @return list of strings where each string is a line.
     *
     * @note the new line characters are also added to the returned string.
     *
     * IMPORTANT: The line returned will be a substring of the initial line, so, it's recommended that a copy
     * is created if it should be kept in memory (otherwise the full initial string will also be kept in memory).
     */
    public static Iterable<String> iterLines(final String string) {
        return new Iterable<String>() {

            @Override
            public Iterator<String> iterator() {
                return new IterLines(string);
            }
        };

    }

    /**
     * Same as Python join: Go through all the paths in the string and join them with the passed delimiter.
     *
     * Note: optimized to have less allocations/method calls
     * (i.e.: not using FastStringBuffer, pre-allocating proper size and doing string.getChars directly).
     *
     * Having a return type != from String (i.e.: char[].class or FastStringBuffer.class) is a bit faster
     * as it won't do an additional array/copy for the final result.
     */
    public static Object join(String delimiter, String[] splitted, Class<? extends Object> returnType) {
        //A bit faster than if..elif?
        final int len = splitted.length;
        switch (len) {
            case 0:
                return "";
            case 1:
                return splitted[0];
        }

        final int delimiterLen = delimiter.length();
        int totalSize = delimiterLen * (len - 1);
        for (int i = 0; i < len; i++) {
            totalSize += splitted[i].length();
        }

        final char[] buf = new char[totalSize];
        int count = 0;

        //Copy the first item
        String string = splitted[0];
        int strLen = string.length();
        string.getChars(0, strLen, buf, count);
        count += strLen;

        switch (delimiterLen) {
            case 0:
                //Special case when the delimiter is empty (i.e.: doesn't need to be copied).
                for (int i = 1; i < len; i++) {
                    string = splitted[i];
                    strLen = string.length();
                    string.getChars(0, strLen, buf, count);
                    count += strLen;
                }
                break;

            case 1:
                //Special case with single-char delimiter (as it's pretty common)
                final char delimiterChar = delimiter.charAt(0);
                for (int i = 1; i < len; i++) {
                    buf[count] = delimiterChar;
                    count++;

                    string = splitted[i];
                    strLen = string.length();
                    string.getChars(0, strLen, buf, count);
                    count += strLen;
                }
                break;

            case 2:
                //Special case with double-char delimiter (usually: \r\n)
                final char delimiterChar0 = delimiter.charAt(0);
                final char delimiterChar1 = delimiter.charAt(1);
                for (int i = 1; i < len; i++) {
                    buf[count] = delimiterChar0;
                    buf[count + 1] = delimiterChar1;
                    count += 2;

                    string = splitted[i];
                    strLen = string.length();
                    string.getChars(0, strLen, buf, count);
                    count += strLen;
                }
                break;

            default:
                //Copy the remaining ones with the delimiter in place.
                for (int i = 1; i < len; i++) {
                    strLen = delimiterLen;
                    delimiter.getChars(0, strLen, buf, count);
                    count += strLen;

                    string = splitted[i];
                    strLen = string.length();
                    string.getChars(0, strLen, buf, count);
                    count += strLen;
                }
                break;

        }

        if (returnType == null || returnType == String.class) {
            return new String(buf);

        } else if (returnType == FastStringBuffer.class) {
            return new FastStringBuffer(buf);

        } else if (returnType == char[].class) {
            return buf;

        } else {
            throw new RuntimeException("Don't know how to handle return type: " + returnType);
        }

    }

    /**
     * Same as Python join: Go through all the paths in the string and join them with the passed delimiter,
     * but start at the passed initial location in the splitted array.
     */
    public static String join(String delimiter, String[] splitted, int startAtSegment, int endAtSegment) {
        String[] s = Arrays.copyOfRange(splitted, startAtSegment, Math.min(splitted.length, endAtSegment));
        return StringUtils.join(delimiter, s);
    }

    /**
     * Same as Python join: Go through all the paths in the string and join them with the passed delimiter.
     */
    @SuppressWarnings({ "rawtypes", "unchecked" })
    public static String join(String delimiter, Collection splitted) {
        int size = splitted.size();
        if (size == 0) {
            return "";
        }
        Object[] arr = new Object[size];
        return join(delimiter, splitted.toArray(arr));
    }

    public static String join(String delimiter, String[] splitted) {
        return (String) join(delimiter, splitted, null);
    }

    public static String join(String delimiter, Object... splitted) {
        String[] newSplitted = new String[splitted.length];
        for (int i = 0; i < splitted.length; i++) {
            Object s = splitted[i];
            if (s == null) {
                newSplitted[i] = "null";
            } else {
                newSplitted[i] = s.toString();
            }
        }
        return join(delimiter, newSplitted);
    }

    /**
     * Formats a string, replacing %s with the arguments passed.
     *
     * %% is also changed to %.
     *
     * If % is followed by any other char, the % and the next char are ignored.
     *
     * @param str string to be formatted
     * @param args arguments passed
     * @return a string with the %s replaced by the arguments passed
     */
    public static String format(final String str, Object... args) {
        final int length = str.length();
        FastStringBuffer buffer = new FastStringBuffer(length + (16 * args.length));
        int j = 0;
        int i = 0;

        int start = 0;

        for (; i < length; i++) {
            char c = str.charAt(i);
            if (c == '%') {
                if (i + 1 < length) {
                    if (i > start) {
                        buffer.append(str.substring(start, i));
                    }
                    char nextC = str.charAt(i + 1);

                    switch (nextC) {
                        case 's':
                            buffer.appendObject(args[j]);
                            j++;
                            break;
                        case '%':
                            buffer.append('%');
                            j++;
                            break;
                    }
                    i++;
                    start = i + 1;
                }
            }
        }

        if (i > start) {
            buffer.append(str.substring(start, i));
        }
        return buffer.toString();
    }

    /**
     * A faster alternative for parsing positive longs (without exponential notation and only on decimal notation).
     * Attempting to parse an longs that's negative or has exponential notation will throw a NumberFormatException.
     *
     * Note that it doesn't check for longs overflow (so, values higher than MAX_LONG will overflow silently).
     */
    public static long parsePositiveLong(FastStringBuffer buf) {
        char[] array = buf.getInternalCharsArray();
        int len = buf.length();
        if (len == 0) {
            throw new NumberFormatException("Empty string received");
        }

        long result = 0;
        int zeroAsInt = '0';

        for (int i = 0; i < len; i++) {
            result *= 10;
            int c = array[i] - zeroAsInt;
            if (c < 0 || c > 9) {
                throw new NumberFormatException("Error getting positive int from: " + buf);
            }
            result += c;

        }
        return result;
    }

    /**
     * A faster alternative for parsing positive ints (without exponential notation and only on decimal notation).
     * Attempting to parse an ints that's negative or has exponential notation will throw a NumberFormatException.
     *
     * Note that it doesn't check for ints overflow (so, values higher than MAX_INT will overflow silently).
     */
    public static int parsePositiveInt(FastStringBuffer buf) {
        char[] array = buf.getInternalCharsArray();
        int len = buf.length();
        if (len == 0) {
            throw new NumberFormatException("Empty string received");
        }

        int result = 0;
        int zeroAsInt = '0';

        for (int i = 0; i < len; i++) {
            result *= 10;
            int c = array[i] - zeroAsInt;
            if (c < 0 || c > 9) {
                throw new NumberFormatException("Error getting positive int from: " + buf);
            }
            result += c;

        }
        return result;
    }

    /**
     * @return the number of line breaks in the passed string.
     */
    public static int countLineBreaks(final String replacementString) {
        int lineBreaks = 0;
        int ignoreNextNAt = -1;

        //we may have line breaks with \r\n, or only \n or \r
        final int len = replacementString.length();
        for (int i = 0; i < len; i++) {
            char c = replacementString.charAt(i);
            if (c == '\r') {
                lineBreaks++;
                ignoreNextNAt = i + 1;

            } else if (c == '\n') {
                if (ignoreNextNAt != i) {
                    lineBreaks++;
                }
            }
        }
        return lineBreaks;
    }

    private static final Object md5CacheLock = new Object();
    private static final LRUCache<String, String> md5Cache = new LRUCache<String, String>(1000);

    public static String md5(String str) {
        synchronized (StringUtils.md5CacheLock) {
            String obj = md5Cache.getObj(str);
            if (obj != null) {
                return obj;
            }
            try {
                byte[] bytes = str.getBytes(StandardCharsets.UTF_8);
                MessageDigest md = MessageDigest.getInstance("MD5");
                //MAX_RADIX because we'll generate the shortest string possible... (while still
                //using only numbers 0-9 and letters a-z)
                String ret = new BigInteger(1, md.digest(bytes)).toString(Character.MAX_RADIX).toLowerCase();
                md5Cache.add(str, ret);
                return ret;
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        }
    }

    /**
     * Splits the passed string based on the toSplit string.
     */
    public static List<String> split(final String string, final char toSplit, int maxPartsToSplit) {
        Assert.isTrue(maxPartsToSplit > 0);
        int len = string.length();
        if (len == 0) {
            return new ArrayList<>(0);
        }

        ArrayList<String> ret = new ArrayList<String>(maxPartsToSplit);

        int last = 0;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (c == toSplit) {
                if (last != i) {
                    if (ret.size() == maxPartsToSplit - 1) {
                        ret.add(string.substring(last, len));
                        return ret;
                    } else {
                        ret.add(string.substring(last, i));
                    }
                }
                while (c == toSplit && i < len - 1) {
                    i++;
                    c = string.charAt(i);
                }
                last = i;
            }
        }
        if (c != toSplit) {
            if (last == 0 && len > 0) {
                ret.add(string); //it is equal to the original (no char to split)

            } else if (last < len) {
                ret.add(string.substring(last, len));

            }
        }
        return ret;
    }

    /**
     * Splits the passed string based on the toSplit string.
     *
     * Corner-cases:
     * if the delimiter to do the split is empty an error is raised.
     * if the entry is an empty string, the return should be an empty array.
     */
    public static List<String> split(final String string, final String toSplit) {
        int len = string.length();
        if (len == 0) {
            return new ArrayList<>(0);
        }

        int length = toSplit.length();

        if (length == 1) {
            return split(string, toSplit.charAt(0));
        }
        ArrayList<String> ret = new ArrayList<String>();
        if (length == 0) {
            ret.add(string);
            return ret;
        }

        int last = 0;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (c == toSplit.charAt(0) && matches(string, toSplit, i)) {
                if (last != i) {
                    ret.add(string.substring(last, i));
                }
                last = i + toSplit.length();
                i += toSplit.length() - 1;
            }
        }

        if (last < len) {
            ret.add(string.substring(last, len));
        }

        return ret;
    }

    private static boolean matches(final String string, final String toSplit, int i) {
        int length = string.length();
        int toSplitLen = toSplit.length();
        if (length - i >= toSplitLen) {
            for (int j = 0; j < toSplitLen; j++) {
                if (string.charAt(i + j) != toSplit.charAt(j)) {
                    return false;
                }
            }
            return true;
        }
        return false;
    }

    /**
     * Splits keeping empty partitions.
     * 
     * Notes:
     * If ending with the char to split, adds an empty partition to the end.
     * 
     * I.e.:
     * aaa|   will give "aaa", "" 
     */
    public static List<String> splitKeepEmpty(String string, char toSplit) {
        int len = string.length();
        if (len == 0) {
            return new ArrayList<>(0);
        }
        ArrayList<String> ret = new ArrayList<String>();

        int last = -1;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (c == toSplit) {
                ret.add(string.substring(last + 1, i));
                last = i;
            }
        }
        if (c != toSplit) {
            ret.add(string.substring(last + 1, len));
        } else {
            ret.add("");
        }
        return ret;
    }

    /**
     * Splits some string given some char (that char will not appear in the returned strings)
     * Empty strings are also never added.
     */
    public static List<String> split(String string, char toSplit) {
        int len = string.length();
        if (len == 0) {
            return new ArrayList<>(0);
        }
        ArrayList<String> ret = new ArrayList<String>();

        int last = 0;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (c == toSplit) {
                if (last != i) {
                    ret.add(string.substring(last, i));
                }
                while (c == toSplit && i < len - 1) {
                    i++;
                    c = string.charAt(i);
                }
                last = i;
            }
        }
        if (c != toSplit) {
            if (last == 0 && len > 0) {
                ret.add(string); //it is equal to the original (no char to split)

            } else if (last < len) {
                ret.add(string.substring(last, len));

            }
        }
        return ret;
    }

    /**
     * Splits the given string in a list where each element is a line.
     *
     * @param string string to be split.
     * @return list of strings where each string is a line.
     *
     * @note the new line characters are also added to the returned string.
     */
    public static List<String> splitInWhiteSpaces(String string) {
        ArrayList<String> ret = new ArrayList<String>();
        int len = string.length();

        int last = 0;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (Character.isWhitespace(c)) {
                if (last != i) {
                    ret.add(string.substring(last, i));
                }
                while (Character.isWhitespace(c) && i < len - 1) {
                    i++;
                    c = string.charAt(i);
                }
                last = i;
            }
        }
        if (!Character.isWhitespace(c)) {
            if (last == 0 && len > 0) {
                ret.add(string); //it is equal to the original (no char to split)

            } else if (last < len) {
                ret.add(string.substring(last, len));

            }
        }
        return ret;
    }

    /**
     * Splits the given string in a list where each element is a line.
     *
     * @param string string to be split.
     * @return list of strings where each string is a line.
     *
     * @note the new line characters are also added to the returned string.
     */
    public static List<String> splitInLines(String string) {
        ArrayList<String> ret = new ArrayList<String>();
        int len = string.length();

        char c;
        FastStringBuffer buf = new FastStringBuffer();

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);

            buf.append(c);

            if (c == '\r') {
                if (i < len - 1 && string.charAt(i + 1) == '\n') {
                    i++;
                    buf.append('\n');
                }
                ret.add(buf.toString());
                buf.clear();
            }
            if (c == '\n') {
                ret.add(buf.toString());
                buf.clear();

            }
        }
        if (buf.length() != 0) {
            ret.add(buf.toString());
        }
        return ret;
    }

    /**
     * Splits the given string in a list where each element is a line.
     *
     * @param string string to be split.
     * @param addNewLines defines if new lines should be added to the returned strings.
     * @return list of strings where each string is a line.
     *
     */
    public static List<String> splitInLines(String string, boolean addNewLines) {
        if (addNewLines) {
            return splitInLines(string);
        }
        ArrayList<String> ret = new ArrayList<String>();
        int len = string.length();

        char c;
        FastStringBuffer buf = new FastStringBuffer();

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);

            buf.append(c);

            if (c == '\r') {
                buf.deleteLast();
                if (i < len - 1 && string.charAt(i + 1) == '\n') {
                    i++;
                }
                ret.add(buf.toString());
                buf.clear();
            }
            if (c == '\n') {
                buf.deleteLast();
                ret.add(buf.toString());
                buf.clear();

            }
        }
        if (buf.length() != 0) {
            ret.add(buf.toString());
        }
        return ret;
    }

    /**
     * This is usually what's on disk
     */
    public static String BOM_UTF8 = new String(new char[] { 0xEF, 0xBB, 0xBF });
    /**
     * When we convert a string from the disk to a java string, if it had an UTF-8 BOM, it'll have that BOM converted
     * to this BOM. See: org.python.pydev.parser.PyParser27Test.testBom()
     */
    public static String BOM_UNICODE = new String(new char[] { 0xFEFF });

    public static String removeBom(String contents) {
        if (contents.startsWith(BOM_UTF8)) {
            contents = contents.substring(BOM_UTF8.length());
        }
        return contents;
    }

    /**
     * Small cache to hold strings only with spaces (so that each width has a created string).
     */
    private static Cache<Integer, String> widthToSpaceString = new LRUCache<Integer, String>(8);

    /**
     * Creates a string of spaces of the designated length.
     * @param width number of spaces you want to create a string of
     * @return the created string
     */
    public static String createSpaceString(int width) {
        String existing = StringUtils.widthToSpaceString.getObj(width);
        if (existing != null) {
            return existing;
        }
        FastStringBuffer buf = new FastStringBuffer(width);
        buf.appendN(' ', width);
        String newStr = buf.toString();
        StringUtils.widthToSpaceString.add(width, newStr);
        return newStr;
    }

    public static String getWithClosedPeer(char c) {
        switch (c) {
            case '{':
                return "{}";
            case '(':
                return "()";
            case '[':
                return "[]";
            case '<':
                return "<>";
            case '\'':
                return "''";
            case '"':
                return "\"\"";
        }

        throw new NoPeerAvailableException("Unable to find peer for :" + c);
    }

    public static boolean isOpeningPeer(char lastChar) {
        return lastChar == '(' || lastChar == '[' || lastChar == '{' || lastChar == '<';
    }

    public static boolean isClosingPeer(char lastChar) {
        return lastChar == ')' || lastChar == ']' || lastChar == '}' || lastChar == '>';
    }

    public static char getPeer(char c) {
        switch (c) {
            case '{':
                return '}';
            case '}':
                return '{';
            case '(':
                return ')';
            case ')':
                return '(';
            case '[':
                return ']';
            case ']':
                return '[';
            case '>':
                return '<';
            case '<':
                return '>';
            case '\'':
                return '\'';
            case '\"':
                return '\"';
            case '/':
                return '/';
            case '`':
                return '`';
        }

        throw new NoPeerAvailableException("Unable to find peer for :" + c);
    }

    /**
     * Counts the number of occurences of a certain character in a string.
     *
     * @param line the string to search in
     * @param c the character to search for
     * @return an integer (int) representing the number of occurences of this character
     */
    public static int countChars(char c, StringBuffer line) {
        int ret = 0;
        int len = line.length();
        for (int i = 0; i < len; i++) {
            if (line.charAt(i) == c) {
                ret += 1;
            }
        }
        return ret;
    }

    /**
     * Counts the number of occurences of a certain character in a string.
     *
     * @param line the string to search in
     * @param c the character to search for
     * @return an integer (int) representing the number of occurences of this character
     */
    public static int countChars(char c, FastStringBuffer line) {
        int ret = 0;
        int len = line.length();
        for (int i = 0; i < len; i++) {
            if (line.charAt(i) == c) {
                ret += 1;
            }
        }
        return ret;
    }

    /**
     * Counts the number of occurences of a certain character in a string.
     *
     * @param line the string to search in
     * @param c the character to search for
     * @return an integer (int) representing the number of occurences of this character
     */
    public static int countChars(char c, String line) {
        int ret = 0;
        int len = line.length();
        for (int i = 0; i < len; i++) {
            if (line.charAt(i) == c) {
                ret += 1;
            }
        }
        return ret;
    }

    private static final Pattern compiled = Pattern.compile("\\r?\\n|\\r");

    public static String replaceNewLines(String text, String repl) {
        return compiled.matcher(text).replaceAll(repl);
    }

    public static String replaceAll(String string, String replace, String with) {
        FastStringBuffer ret = new FastStringBuffer(string, 16);
        return ret.replaceAll(replace, with).toString();
    }

    public static String shorten(String nameForUI, int maxLen) {
        if (nameForUI.length() >= maxLen) {
            maxLen -= 5;
            int first = maxLen / 2;
            int last = maxLen / 2 + (maxLen % 2);

            return nameForUI.substring(0, first) + " ... "
                    + nameForUI.substring(nameForUI.length() - last, nameForUI.length());
        }
        return nameForUI;
    }

    /**
     * Removes whitespaces and tabs at the end of the string.
     */
    public static String rightTrim(final String input) {
        int len = input.length();
        int st = 0;
        int off = 0;

        while ((st < len) && (input.charAt(off + len - 1) <= ' ')) {
            len--;
        }
        return input.substring(0, len);
    }

    /**
     * Removes whitespaces and tabs at the beginning of the string.
     */
    public static String leftTrim(String input) {
        int len = input.length();
        int off = 0;

        while ((off < len) && (input.charAt(off) <= ' ')) {
            off++;
        }
        return input.substring(off, len);
    }

    /**
     * Find the nth index of character in string
     * @param string to search
     * @param character to search for
     * @param nth count
     * @return count <= 0 returns -1. count > number of occurances of character returns -1.
     * Otherwise return index of nth occurence of character
     */
    public static int nthIndexOf(final String string, final char character, int nth) {
        if (nth <= 0) {
            return -1;
        }
        int pos = string.indexOf(character);
        while (--nth > 0 && pos != -1) {
            pos = string.indexOf(character, pos + 1);
        }
        return pos;
    }

    public static int count(String name, char c) {
        int count = 0;
        final int len = name.length();
        for (int i = 0; i < len; i++) {
            if (name.charAt(i) == c) {
                count++;
            }
        }
        return count;
    }

    /**
     * Returns whether the given input (to the number of bytes passed in len) is to be considered a valid text string
     * (otherwise, it's considered a binary string).
     *
     * If no bytes are available, it's considered valid.
     */
    public static boolean isValidTextString(byte[] buffer, int len) {
        if (len <= 0) {
            return true;
        }
        if (len > buffer.length) {
            len = buffer.length;
        }
        String s = new String(buffer, 0, len, StandardCharsets.ISO_8859_1); //Decode as latin1
        int maxLen = s.length();
        for (int i = 0; i < maxLen; i++) {
            char c = s.charAt(i);

            //based on http://casa.colorado.edu/~ajsh/iso8859-1.html
            //and http://www.ic.unicamp.br/~stolfi/EXPORT/www/ISO-8859-1-Encoding.html
            //9 - 15: \t\r\n and other feeds
            //32 - 127: standard
            //128 - 159: ok (but windows only)
            //160 - 255: ok

            if (c >= 32 && c <= 255 || c >= 9 && c <= 15) {
                //Ok, in valid range.
            } else {
                return false;
            }
        }

        return true;
    }

    /**
     * <p>Find the last position of a character which matches a given regex.</p>
     *
     * <p>This method is similar to {@link java.lang.String#lastIndexOf(String)}
     * except it allows for comparing characters akin to <i>wildcard</i> searches, i.e.
     * find the position of the last character classified as alphanumeric, without
     * the need to implement dozens of method variations where each method takes the
     * same parameters but does a slightly different search.</p>
     *
     * @param string - the string to search through, e.g. the <i>haystack</i>
     * @param regex -  a string containing a compilable {@link java.util.regex.Pattern}.
     * @return the last position of the character that matches the pattern<br>
     *         or <tt>-1</tt> if no match or some of the parameters are invalid.
     * @note the string is iterated over one char at a time, so the pattern will be
     * compared at most to one character strings.
     */
    public static int lastIndexOf(final String string, final String regex) {

        int index = -1;

        if (null == string || null == regex || string.length() == 0 || regex.length() == 0) {
            return index;
        }

        Pattern pat;
        try {
            pat = Pattern.compile(regex);
        } catch (PatternSyntaxException pse) {
            return index;
        }

        int len = string.length();
        int i = len - 1;
        char c = '\0';
        Matcher mat = null;

        while (i >= 0) {
            c = string.charAt(i);
            mat = pat.matcher(String.valueOf(c));
            if (mat.matches()) {
                index = i;
                break;
            }
            i--;
        }
        return index;
    }

    /**
     * <p>Join the elements of an <tt>Iterable</tt> by using <tt>delimiter</tt>
     * as separator.</p>
     *
     * @see http://snippets.dzone.com/posts/show/91
     *
     * @param objs - a collection which implements {@link java.lang.Iterable}
     * @param <T> - type in collection
     * @param delimiter - string used as separator
     *
     * @throws IllegalArgumentException if <tt>objs</tt> or <tt>delimiter</tt>
     *         is <tt>null</tt>.
     *
     * @return joined string
     */
    public static <T> String joinIterable(final String delimiter, final Iterable<T> objs)
            throws IllegalArgumentException {
        if (null == objs) {
            throw new IllegalArgumentException("objs can't be null!");
        }
        if (null == delimiter) {
            throw new IllegalArgumentException("delimiter can't be null");
        }

        Iterator<T> iter = objs.iterator();
        if (!iter.hasNext()) {
            return "";
        }
        String nxt = String.valueOf(iter.next());
        FastStringBuffer buffer = new FastStringBuffer(String.valueOf(nxt), nxt.length());
        while (iter.hasNext()) {
            buffer.append(delimiter).append(String.valueOf(iter.next()));
        }

        return buffer.toString();
    }

    /**
     * <p>Repeat a substring (a.k.a. <i>substring multiplication</i>).</p>
     *
     * <p>Invalid Argument Values</p>
     *
     * <ul>return an empty string if <tt>str</tt> is empty, or if
     * <tt>times <= 0</tt></ul>
     * <ul>if <tt>str</tt> is <tt>null</tt>, the string <tt>"null"</tt>
     * will be repeated.</ul>
     *
     * @param str - the substring to repeat<br>
     * @param times - how many copies
     * @return the repeated string
     */
    public static String repeatString(final String str, int times) {

        String s = String.valueOf(str);
        if (s.length() == 0 || times <= 0) {
            return "";
        }

        FastStringBuffer buffer = new FastStringBuffer();
        buffer.appendN(s, times);
        return buffer.toString();
    }

    /**
     * Counts the number of %s in the string
     *
     * @param str the string to be analyzed
     * @return the number of %s in the string
     */
    public static int countPercS(final String str) {
        int j = 0;

        final int len = str.length();
        for (int i = 0; i < len; i++) {
            char c = str.charAt(i);
            if (c == '%' && i + 1 < len) {
                char nextC = str.charAt(i + 1);
                if (nextC == 's') {
                    j++;
                    i++;
                }
            }
        }
        return j;
    }

    /**
     * Given a string remove all from the rightmost '.' onwards.
     *
     * E.g.: bbb.t would return bbb
     *
     * If it has no '.', returns the original string unchanged.
     */
    public static String stripExtension(String input) {
        return stripFromRigthCharOnwards(input, '.');
    }

    public static String getFileExtension(String name) {
        int i = name.lastIndexOf('.');
        if (i == -1) {
            return null;
        }
        if (name.length() - 1 == i) {
            return "";
        }
        return name.substring(i + 1);
    }

    public static int rFind(String input, char ch) {
        int len = input.length();
        int st = 0;
        int off = 0;

        while ((st < len) && (input.charAt(off + len - 1) != ch)) {
            len--;
        }
        len--;
        return len;
    }

    private static String stripFromRigthCharOnwards(String input, char ch) {
        int len = rFind(input, ch);
        if (len == -1) {
            return input;
        }
        return input.substring(0, len);
    }

    public static String stripFromLastSlash(String input) {
        return stripFromRigthCharOnwards(input, '/');
    }

    /**
     * Removes the occurrences of the passed char in the end of the string.
     */
    public static String rightTrim(String input, char charToTrim) {
        int len = input.length();
        int st = 0;
        int off = 0;

        while ((st < len) && (input.charAt(off + len - 1) == charToTrim)) {
            len--;
        }
        return input.substring(0, len);
    }

    /**
     * Removes the occurrences of the passed char in the end of the string.
     */
    public static String rightTrimNewLineChars(String input) {
        int len = input.length();
        int st = 0;
        int off = 0;
        char c;
        while ((st < len) && ((c = input.charAt(off + len - 1)) == '\r' || c == '\n')) {
            len--;
        }
        return input.substring(0, len);
    }

    /**
     * Removes the occurrences of the passed char in the start and end of the string.
     */
    public static String leftAndRightTrim(String input, char charToTrim) {
        return rightTrim(leftTrim(input, charToTrim), charToTrim);
    }

    /**
     * Removes the occurrences of the passed char in the end of the string.
     */
    public static String leftTrim(String input, char charToTrim) {
        int len = input.length();
        int off = 0;

        while ((off < len) && (input.charAt(off) == charToTrim)) {
            off++;
        }
        return input.substring(off, len);
    }

    /**
     * Changes all backward slashes (\) for forward slashes (/)
     *
     * @return the replaced string
     */
    public static String replaceAllSlashes(String string) {
        int len = string.length();
        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);

            if (c == '\\') { // only do some processing if there is a
                             // backward slash
                char[] ds = string.toCharArray();
                ds[i] = '/';
                for (int j = i; j < len; j++) {
                    if (ds[j] == '\\') {
                        ds[j] = '/';
                    }
                }
                return new String(ds);
            }

        }
        return string;
    }

    /**
     * Given some html, extracts its text.
     */
    public static String extractTextFromHTML(String html) {
        try {
            EditorKit kit = new HTMLEditorKit();
            Document doc = kit.createDefaultDocument();

            // The Document class does not yet handle charset's properly.
            doc.putProperty("IgnoreCharsetDirective", Boolean.TRUE);

            // Create a reader on the HTML content.
            Reader rd = new StringReader(html);

            // Parse the HTML.
            kit.read(rd, doc, 0);

            //  The HTML text is now stored in the document
            return doc.getText(0, doc.getLength());
        } catch (Exception e) {
        }
        return "";
    }

    /**
     * Helper to process parts of a string.
     */
    public static interface ICallbackOnSplit {

        /**
         * @param substring the part found
         * @return false to stop processing the string (and true to check the next part).
         */
        boolean call(String substring);

    }

    /**
     * Splits some string given some char (that char will not appear in the returned strings)
     * Empty strings are also never added.
     *
     * @return true if the onSplit callback only returned true (and false if it stopped before).
     * @note: empty strings may be yielded.
     */
    public static boolean split(String string, char toSplit, ICallbackOnSplit onSplit) {
        int len = string.length();
        int last = 0;
        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (c == toSplit) {
                if (last != i) {
                    if (!onSplit.call(string.substring(last, i))) {
                        return false;
                    }
                }
                while (c == toSplit && i < len - 1) {
                    i++;
                    c = string.charAt(i);
                }
                last = i;
            }
        }
        if (c != toSplit) {
            if (last == 0 && len > 0) {
                if (!onSplit.call(string)) { //it is equal to the original (no char to split)
                    return false;
                }

            } else if (last < len) {
                if (!onSplit.call(string.substring(last, len))) {
                    return false;
                }
            }
        }
        return true;
    }

    /**
     * Splits some string given many chars
     */
    public static List<String> split(String string, char... toSplit) {
        ArrayList<String> ret = new ArrayList<String>();
        int len = string.length();

        int last = 0;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);

            if (contains(c, toSplit)) {
                if (last != i) {
                    ret.add(string.substring(last, i));
                }
                while (contains(c, toSplit) && i < len - 1) {
                    i++;
                    c = string.charAt(i);
                }
                last = i;
            }
        }
        if (!contains(c, toSplit)) {
            if (last == 0 && len > 0) {
                ret.add(string); //it is equal to the original (no dots)

            } else if (last < len) {
                ret.add(string.substring(last, len));

            }
        }
        return ret;
    }

    private static boolean contains(char c, char[] toSplit) {
        for (char ch : toSplit) {
            if (c == ch) {
                return true;
            }
        }
        return false;
    }

    public static List<String> splitAndRemoveEmptyNotTrimmed(String string, char c) {
        List<String> split = split(string, c);
        for (int i = split.size() - 1; i >= 0; i--) {
            if (split.get(i).length() == 0) {
                split.remove(i);
            }
        }
        return split;
    }

    public static List<String> splitAndRemoveEmptyTrimmed(String string, char c) {
        List<String> split = split(string, c);
        for (int i = split.size() - 1; i >= 0; i--) {
            if (split.get(i).trim().length() == 0) {
                split.remove(i);
            }
        }
        return split;
    }

    /**
     * Splits some string given some char in 2 parts. If the separator is not found,
     * everything is put in the 1st part.
     */
    public static Tuple<String, String> splitOnFirst(String fullRep, char toSplit) {
        int i = fullRep.indexOf(toSplit);
        if (i != -1) {
            return new Tuple<String, String>(fullRep.substring(0, i), fullRep.substring(i + 1));
        } else {
            return new Tuple<String, String>(fullRep, "");
        }
    }

    /**
     * Splits some string given some char in 2 parts. If the separator is not found,
     * everything is put in the 1st part.
     */
    public static Tuple<String, String> splitOnFirst(String fullRep, String toSplit) {
        int i = fullRep.indexOf(toSplit);
        if (i != -1) {
            return new Tuple<String, String>(fullRep.substring(0, i), fullRep.substring(i + toSplit.length()));
        } else {
            return new Tuple<String, String>(fullRep, "");
        }
    }

    /**
     * Splits the string as would string.split("\\."), but without yielding empty strings
     */
    public static List<String> dotSplit(String string) {
        return splitAndRemoveEmptyTrimmed(string, '.');
    }

    /**
     * Adds a char to an array of chars and returns the new array.
     *
     * @param c The chars to where the new char should be appended
     * @param toAdd the char to be added
     * @return a new array with the passed char appended.
     */
    public static char[] addChar(char[] c, char toAdd) {
        char[] c1 = new char[c.length + 1];

        System.arraycopy(c, 0, c1, 0, c.length);
        c1[c.length] = toAdd;
        return c1;

    }

    public static String[] addString(String[] c, String toAdd) {
        String[] c1 = new String[c.length + 1];

        System.arraycopy(c, 0, c1, 0, c.length);
        c1[c.length] = toAdd;
        return c1;
    }

    public static String removeNewLineChars(String message) {
        return message.replaceAll("\r", "").replaceAll("\n", "");
    }

    private static final int STATE_LOWER = 0;
    private static final int STATE_UPPER = 1;
    private static final int STATE_NUMBER = 2;

    public static String asStyleLowercaseUnderscores(String string) {
        int len = string.length();
        FastStringBuffer buf = new FastStringBuffer(len * 2);

        int lastState = 0;
        for (int i = 0; i < len; i++) {
            char c = string.charAt(i);
            if (Character.isUpperCase(c)) {
                if (lastState != STATE_UPPER) {
                    if (buf.length() > 0 && buf.lastChar() != '_') {
                        buf.append('_');
                    }
                }
                buf.append(Character.toLowerCase(c));
                lastState = STATE_UPPER;

            } else if (Character.isDigit(c)) {
                if (lastState != STATE_NUMBER) {
                    if (buf.length() > 0 && buf.lastChar() != '_') {
                        buf.append('_');
                    }
                }

                buf.append(c);
                lastState = STATE_NUMBER;
            } else {
                buf.append(c);
                lastState = STATE_LOWER;
            }
        }
        return buf.toString();
    }

    public static boolean isAllUpper(String string) {
        int len = string.length();
        for (int i = 0; i < len; i++) {
            char c = string.charAt(i);
            if (Character.isLetter(c) && !Character.isUpperCase(c)) {
                return false;
            }
        }
        return true;
    }

    /**
     * How come that the Character class doesn't have this?
     */
    public static boolean isAsciiLetter(int c) {
        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
    }

    /**
     * How come that the Character class doesn't have this?
     */
    public static boolean isAsciiLetterOrUnderline(int c) {
        return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_';
    }

    public static boolean isAsciiLetterOrUnderlineOrNumber(int c) {
        return isAsciiLetterOrUnderline(c) || Character.isDigit(c);
    }

    public static String asStyleCamelCaseFirstLower(String string) {
        if (isAllUpper(string)) {
            string = string.toLowerCase();
        }

        int len = string.length();
        FastStringBuffer buf = new FastStringBuffer(len);
        boolean first = true;
        int nextUpper = 0;

        for (int i = 0; i < len; i++) {
            char c = string.charAt(i);
            if (first) {
                if (c == '_') {
                    //underscores at the start
                    buf.append(c);
                    continue;
                }
                buf.append(Character.toLowerCase(c));
                first = false;
            } else {

                if (c == '_') {
                    nextUpper += 1;
                    continue;
                }
                if (nextUpper > 0) {
                    c = Character.toUpperCase(c);
                    nextUpper = 0;
                }

                buf.append(c);
            }
        }

        if (nextUpper > 0) {
            //underscores at the end
            buf.appendN('_', nextUpper);
        }
        return buf.toString();
    }

    public static String asStyleCamelCaseFirstUpper(String string) {
        string = asStyleCamelCaseFirstLower(string);
        if (string.length() > 0) {
            return Character.toUpperCase(string.charAt(0)) + string.substring(1);
        }
        return string;
    }

    public static boolean endsWith(FastStringBuffer str, char c) {
        if (str.length() == 0) {
            return false;
        }
        if (str.charAt(str.length() - 1) == c) {
            return true;
        }
        return false;
    }

    public static boolean endsWith(final String str, char c) {
        int len = str.length();
        if (len == 0) {
            return false;
        }
        if (str.charAt(len - 1) == c) {
            return true;
        }
        return false;
    }

    public static boolean endsWith(final StringBuffer str, char c) {
        int len = str.length();
        if (len == 0) {
            return false;
        }
        if (str.charAt(len - 1) == c) {
            return true;
        }
        return false;
    }

    public static String safeDecodeByteArray(byte[] b, String baseCharset) {
        try {
            if (baseCharset == null) {
                return new String(b, StandardCharsets.ISO_8859_1);
            }
            return new String(b, baseCharset);
        } catch (Exception e) {
            try {
                //If it fails, go for something which shouldn't fail!
                CharsetDecoder decoder = Charset.forName(baseCharset).newDecoder();
                decoder.onMalformedInput(CodingErrorAction.IGNORE);
                decoder.onUnmappableCharacter(CodingErrorAction.IGNORE);
                CharBuffer parsed = decoder.decode(ByteBuffer.wrap(b, 0, b.length));
                return parsed.toString();
            } catch (Exception e2) {
                Log.log(e2);
                //Shouldn't ever happen!
                return new String("Unable to decode bytearray from Python.");
            }
        }
    }

    public static boolean containsWhitespace(final String name) {
        final int len = name.length();
        for (int i = 0; i < len; i++) {
            if (Character.isWhitespace(name.charAt(i))) {
                return true;
            }
        }
        return false;
    }

    public static String getWithFirstUpper(final String creationStr) {
        final int len = creationStr.length();
        if (len == 0) {
            return creationStr;
        }
        char upperCase = Character.toUpperCase(creationStr.charAt(0));
        return upperCase + creationStr.substring(1);

    }

    public static String indentTo(String source, String indent) {
        return indentTo(source, indent, true);
    }

    public static String indentTo(final String source, final String indent, boolean indentFirstLine) {
        final int indentLen = indent.length();
        if (indent == null || indentLen == 0) {
            return source;
        }
        List<String> splitInLines = splitInLines(source);
        final int sourceLen = source.length();
        FastStringBuffer buf = new FastStringBuffer(sourceLen + (splitInLines.size() * indentLen) + 2);

        for (int i = 0; i < splitInLines.size(); i++) {
            String line = splitInLines.get(i);
            if (indentFirstLine || i > 0) {
                buf.append(indent);
            }
            buf.append(line);
        }
        return buf.toString();
    }

    public static String reverse(String lineContentsToCursor) {
        return new FastStringBuffer(lineContentsToCursor, 0).reverse().toString();
    }

    /**
     * Split so that we can create multiple WildcardQuery.
     *
     * Note that it accepts wildcards (such as * or ? but if an entry would contain
     * only wildcards it'd be ignored).
     *
     * Also, anything which Character.isJavaIdentifierPart does not match is considered
     * to be a separator and will be ignored.
     */
    public static List<String> splitForIndexMatching(String string) {
        int len = string.length();
        if (len == 0) {
            return new ArrayList<>(0);
        }
        ArrayList<String> ret = new ArrayList<String>();

        int last = 0;

        char c = 0;

        for (int i = 0; i < len; i++) {
            c = string.charAt(i);
            if (!Character.isJavaIdentifierPart(c) && c != '*' && c != '?') {
                if (last != i) {
                    String substring = string.substring(last, i);
                    if (!containsOnlyWildCards(substring)) {
                        ret.add(substring);
                    }
                }
                while (!Character.isJavaIdentifierPart(c) && c != '*' && c != '?' && i < len - 1) {
                    i++;
                    c = string.charAt(i);
                }
                last = i;
            }
        }
        if (Character.isJavaIdentifierPart(c) || c == '*' || c == '?') {
            if (last == 0 && len > 0) {
                if (!containsOnlyWildCards(string)) {
                    ret.add(string); //it is equal to the original (no char to split)
                }

            } else if (last < len) {
                String substring = string.substring(last, len);
                if (!containsOnlyWildCards(substring)) {
                    //Don't add if it has only wildcards in it.
                    ret.add(substring);
                }
            }
        }
        return ret;
    }

    public static void checkTokensValidForWildcardQuery(String token) {
        List<String> splitForIndexMatching = StringUtils.splitForIndexMatching(token);

        if (splitForIndexMatching == null || splitForIndexMatching.size() == 0) {
            throw new RuntimeException(StringUtils.format(
                    "Token: %s is not a valid token to search for.", token));
        }
    }

    public static boolean containsOnlyWildCards(String string) {
        boolean onlyWildCardsInPart = true;
        int length = string.length();
        for (int i = 0; i < length; i++) {
            char c = string.charAt(i);
            if (c != '*' && c != '?') {
                onlyWildCardsInPart = false;
                break; //break inner for
            }
        }
        return onlyWildCardsInPart;
    }

    public static String readAll(Reader reader) {
        BufferedReader bufferedReader = new BufferedReader(reader);
        //java8 idiom to read all lines.
        return bufferedReader.lines().collect(Collectors.joining());
    }

}