TextUtil.java example

Explorer
anyedittools-master
- AnyEditTools
  - src
    - de
      - loskutov
        anyedit
        AnyEditToolsPlugin.java
        IAnyEditConstants.java
        IOpenEditorParticipant.java
        Messages.java
        actions
        AbstractAction.java
        AbstractOpenAction.java
        AbstractReplaceAction.java
        AbstractTextAction.java
        Base64UnEncode.java
        ChangeCase.java
        ConvertAllAction.java
        ConvertAllInFolderAction.java
        ConvertUnicode.java
        CountAllInFolderAction.java
        DefaultOpenEditorParticipant.java
        OpenFile.java
        OpenType.java
        SaveToFileAction.java
        SaveToFileParticipant.java
        Spaces.java
        ToggleWhitespace.java
        UnEscape.java
        compare
        CompareWithAction.java
        CompareWithClipboardAction.java
        CompareWithEditorAction.java
        CompareWithExternalAction.java
        CompareWithResourceAction.java
        internal
        AnyeditStartup.java
        IDirtyWorkaround.java
        InternalOpenType.java
        StartupHelper2.java
        replace
        ReplaceWithAction.java
        ReplaceWithClipboardAction.java
        ReplaceWithEditorAction.java
        ReplaceWithExternalAction.java
        ReplaceWithResourceAction.java
        sort
        AbstractSortAction.java
        AbstractSortComparator.java
        SortAlphabeticallyCaseInsensitiveAscending.java
        SortAlphabeticallyCaseInsensitiveDescending.java
        SortAlphabeticallyCaseSensitiveAscending.java
        SortAlphabeticallyCaseSensitiveDescending.java
        SortLineLengthAscending.java
        SortLineLengthDescending.java
        SortNumericallyAscending.java
        SortNumericallyDescending.java
        compare
        AnyeditCompareInput.java
        ClipboardStreamContent.java
        ContentWrapper.java
        EditableSharedDocumentAdapter.java
        ExternalFileStreamContent.java
        FileStreamContent.java
        StreamContent.java
        TextStreamContent.java
        console
        SplitStackPropertyTester.java
        SplitStackTraceHandler.java
        jdt
        JdtUtils.java
        SelectWorkingSetsAction.java
        TypeFactory.java
        ui
        editor
        AbstractEditor.java
        EditorAdapterFactory.java
        EditorPropertyTester.java
        preferences
        AndyEditPreferenceInitializer.java
        AnyEditPreferencePage.java
        CombinedPreferences.java
        ProjectPreferencePage.java
        SupportPanel.java
        util
        SelectionTester.java
        wizards
        ExportPage.java
        IWSAction.java
        ImportPage.java
        WSPage.java
        WorkingSetExportWizard.java
        WorkingSetImportWizard.java
        util
        Base64Preferences.java
        EclipseUtils.java
        LineReplaceResult.java
        TextReplaceResultSet.java
        TextUtil.java
  - test
    - de
      - loskutov
        anyedit
        util
        TestUtils.java
/*******************************************************************************
 * Copyright (c) 2009 Andrey Loskutov.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * Contributor:  Andrey Loskutov - initial API and implementation
 *******************************************************************************/

package de.loskutov.anyedit.util;

import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

import org.eclipse.core.resources.ResourcesPlugin;
import org.eclipse.jface.preference.IPreferenceStore;

import de.loskutov.anyedit.AnyEditToolsPlugin;
import de.loskutov.anyedit.IAnyEditConstants;
import de.loskutov.anyedit.Messages;


/**
 * @author Andrey
 */
public class TextUtil {


    public static final String SYSTEM_CHARSET = ResourcesPlugin.getEncoding();

    /** The predefined line delimiters */
    private static final char[] CR = { '\r' };

    private static final char[] LF = { '\n' };

    private static final char[] CRLF = { '\r', '\n' };

    private static final char[] EMPTY = {};

    public static final String DEFAULT_CHARACTERS_REQUIRED_IN_PATH = ".";

    public static final String WINDOOF_DEF = " \n\"'*?><|=(){};&$,%@";
    public static final String LINUX_DEF =   " \n\"'*?><|=(){};&$,%@:"; // ':' is invalid in some cases too
    public static final String DEFAULT_CHARACTERS_DISALLOWED_IN_PATH = EclipseUtils.isWindows()?
            WINDOOF_DEF : LINUX_DEF;

    public static final String DEFAULT_LINE_SEPARATOR_REGEX = ":|\\s+";

    private static final String INVALID_PATH_ENDS_CHARACTERS = "/\\";
    //    private static final String VARIABLE_DELIMITERS = "${}()";

    public static final boolean DEFAULT_UNICODIFY_ALL = false;

    public static final int DEFAULT_BASE64_LINE_LENGTH = 100;

    private static TextUtil instance;

    public boolean useRequiredInPathChars;

    private String charsDisallowedInPath;

    private String charsRequiredInPath;

    private String lineSeparatorRegex;

    private int base64LineLength;

    private boolean unicodifyAll;

    private static final Pattern WHITE_SPACE_PATTERN = Pattern.compile("(\\n|\\r| |\\\t)");
    private static final Pattern UNICODE_PATTERN = Pattern.compile("\\\\u[0-9a-fA-F]{2,4}");
    /** $HOME (group "one") or ( $(HOME) or ${HOME} ) (group "two")  */

    // XXX Java 6 doesn't support named groups!!!: (?<one>) causes crash
    //    private static final Pattern VARIABLE_PATTERN = Pattern.compile("(~/)|\\$((?<one>\\w+)|[\\{\\(](?<two>\\w+)[\\)\\}])");
    private static final Pattern VARIABLE_PATTERN = Pattern.compile("(~/)|\\$((\\w+)|[\\{\\(](\\w+)[\\)\\}])");

    private TextUtil() {
        useRequiredInPathChars = true;
        charsDisallowedInPath = DEFAULT_CHARACTERS_DISALLOWED_IN_PATH;
        charsRequiredInPath = DEFAULT_CHARACTERS_REQUIRED_IN_PATH;
        base64LineLength = DEFAULT_BASE64_LINE_LENGTH;
        unicodifyAll = DEFAULT_UNICODIFY_ALL;
    }

    private static synchronized TextUtil getInstance() {
        if (instance == null) {
            instance = new TextUtil();
        }
        return instance;
    }

    /**
     * @param string
     *            in the "camel" notation like "beMyCamel"
     * @return the resulting string in usual notation like "be_my_camel"
     */
    public static String fromCamelToUnderscore(String string) {
        int size = string.length();
        StringBuffer sb = new StringBuffer(size);
        for (int i = 0; i < size; i++) {
            char c = string.charAt(i);
            if (i > 0 && i < size - 1) {
                char next = string.charAt(i + 1);
                char prev = string.charAt(i - 1);
                if (Character.isUpperCase(c) && Character.isJavaIdentifierPart(next)
                        && Character.isJavaIdentifierPart(prev)
                        && !Character.isUpperCase(next)) {
                    sb.append('_');
                    c = Character.toLowerCase(c);
                }
            }
            sb.append(c);
        }
        return sb.toString();
    }

    /**
     * @param string
     *            in the "underscore" notation like "be_my_camel"
     * @return the resulting string in "camel" notation like "beMyCamel"
     */
    public static String fromUnderscoreToCamel(String string) {
        int size = string.length();
        StringBuffer sb = new StringBuffer(size);
        boolean skipChar = false;
        boolean toUpper = false;
        for (int i = 0; i < size; i++) {
            char c = string.charAt(i);
            skipChar = i > 0 && c == '_';
            if (skipChar && i < size - 1
                    && !Character.isJavaIdentifierPart(string.charAt(i + 1))) {
                skipChar = false;
            } else if (i == size - 1) {
                skipChar = false;
            }
            if (!skipChar) {
                if (toUpper) {
                    sb.append(Character.toUpperCase(c));
                } else {
                    if (i > 0) {
                        if (Character.isJavaIdentifierPart(string.charAt(i - 1))) {
                            sb.append(Character.toLowerCase(c));
                        } else {
                            sb.append(c);
                        }
                    } else {
                        sb.append(Character.toLowerCase(c));
                    }
                }
            }
            toUpper = skipChar;
        }
        return sb.toString();
    }

    /**
     * @param string in the "camel" notation like "beMyCamel"
     * @param toCamel determines the direction of conversion, true-to Camel, false-to Pascal
     * @return the resulting string in Pascal notation like "BeMyCamel"
     */
    public static String fromCamelCaseToPascalCaseBidirectional(String string, boolean toCamel) {
        int size = string.length();
        StringBuffer sb = new StringBuffer(size);
        boolean isNewWord = true;
        for (int i = 0; i < size; i++) {
            char c = string.charAt(i);
            if(isNewWord == true && !Character.isWhitespace(c)){
                isNewWord = false;
                if(Character.isLetter(c)){
                    if(toCamel){
                        if(Character.isUpperCase(c)){
                            c = Character.toLowerCase(c);
                        }
                    } else {
                        if(Character.isLowerCase(c)){
                            c = Character.toUpperCase(c);
                        }
                    }
                }
            } else {
                if(Character.isWhitespace(c)){
                    isNewWord = true;
                }
            }
            sb.append(c);
        }
        return sb.toString();
    }

    /** Check String to match real path name
     * @return false if this path is may be not a File/Dir path, i.e. contains
     * not alloved characters etc.
     */
    public boolean isPath(String path) {
        if (path == null) {
            return false;
        }
        path = path.trim();
        if (path.length() == 0 || path.length() > 400) {
            return false;
        }

        String disallowed = getCharsDisallowedInPath();
        for (int i = 0; i < disallowed.length(); i++) {
            if (path.indexOf(disallowed.charAt(i)) >= 0) {
                return false;
            }
        }
        if (isUseRequiredInPathChars()) {
            String required = getCharsRequiredInPath();
            for (int i = 0; i < required.length(); i++) {
                if (path.indexOf(required.charAt(i)) >= 0) {
                    return true;
                }
            }
            return false;
        }
        return true;
    }

    /**
     * Check if given string can contain real <b>file</b> path name
     * @return false if this path is may be not a <b>file</b> path, i.e. contains
     * not alloved characters etc.
     */
    public boolean isFilePath(String path) {
        if (path == null || (path = path.trim()).length() == 0) {
            return false;
        }
        int lastIdx = path.length() - 1;
        for (int i = 0; i < INVALID_PATH_ENDS_CHARACTERS.length(); i++) {
            if (path.charAt(lastIdx) == INVALID_PATH_ENDS_CHARACTERS.charAt(i)) {
                return false;
            }
        }
        return isPath(path);
    }

    /**
     * Check if given string can contain real <b>Java type</b> name
     * @return false if this type is may be not a <b>Java type</b> name, i.e. contains
     * not alloved characters etc.
     */
    public boolean isJavaType(String type) {
        if (type == null || (type = type.trim()).length() == 0) {
            return false;
        }
        if (!Character.isJavaIdentifierStart(type.charAt(0))) {
            return false;
        }
        for (int i = 1; i < type.length(); i++) {
            if (!Character.isJavaIdentifierPart(type.charAt(i))) {
                return false;
            }
        }
        return true;
    }

    /**
     * @param path string to check
     * @return a copy of the string, with leading and trailing whitespace
     * and not in path allowed characters (leading and trailing) omitted.
     */
    public String trimPath(String path) {
        if (path == null) {
            return path; // shit in, shit out
        }
        path = path.trim();
        if (path.length() == 0) {
            return path; // shit in, shit out
        }

        if (EclipseUtils.isWindows()) {
            path = path.replace('/', '\\');
            // make "\test.txt" to "test.txt" but do not touch "\\share\text.txt"
            // "\test.txt" causes problems by selecting file in "open resource" dialog
            if (path.charAt(0) == '\\' && path.length() > 1 && path.charAt(1) != '\\') {
                path = path.substring(1);
            }
        }

        String disallowed = getCharsDisallowedInPath().replace("$", "");
        /*
         * trim leading characters
         */
        for (int i = 0; i < disallowed.length(); i++) {
            if (path.charAt(0) == disallowed.charAt(i)) {
                path = path.substring(1);
                if (path.length() > 0) {
                    i = -1; // start search again with new first character
                } else {
                    break;
                }
            }
        }
        if (path.length() == 0) {
            return path; // shit in, shit out
        }
        /*
         * trim trailing characters
         */
        disallowed = getCharsDisallowedInPath().replace(")", "");
        disallowed = disallowed.replace("}", "");
        for (int i = 0; i < disallowed.length(); i++) {
            if (path.charAt(path.length() - 1) == disallowed.charAt(i)) {
                path = path.substring(0, path.length() - 1);
                if (path.length() > 0) {
                    i = -1; // start search again with new last character
                } else {
                    break;
                }
            }
        }
        int length = path.length();
        path = path.trim();
        if (length != path.length()) {
            // start again!!!
            return trimPath(path);
        }
        return path;
    }


    public static class LineAndCaret {
        public String line;
        public int caret;
        public LineAndCaret(String line, int caret) {
            this.line = line;
            this.caret = caret;
        }
    }

    public String findPath(/* @Nonnull */ LineAndCaret position) {
        if (badData(position)) {
            return null; // shit in, shit out
        }
        position = resolveVariables(position);
        if (badData(position)) {
            return null; // shit in, shit out
        }

        String line = position.line;

        /**
         * we search for nearest to caret 'invalid' path characters in both directions
         */
        int backwardSearchIdx = -1;
        String disallowed = getCharsDisallowedInPath();
        for (int i = 0; i < disallowed.length(); i++) {
            char charAt = disallowed.charAt(i);
            int matchIdx = indexOf(line, charAt, position.caret, backwardSearchIdx, false);
            // search nearest to caret, also biggest
            if (matchIdx > backwardSearchIdx) {
                backwardSearchIdx = matchIdx;
            }
        }

        int forwardSearchIdx = line.length();
        for (int i = 0; i < disallowed.length(); i++) {
            int matchIdx = indexOf(line, disallowed.charAt(i), position.caret,
                    forwardSearchIdx, true);
            // search nearest to caret, also smaller
            if (matchIdx != -1 && matchIdx < forwardSearchIdx) {
                forwardSearchIdx = matchIdx;
            }
        }

        if (EclipseUtils.isWindows() && disallowed.indexOf(':') < 0) {
            int matchIdx = indexOf(line, ':', position.caret, forwardSearchIdx, true);
            // search nearest to caret, also smaller
            if (matchIdx != -1 && matchIdx < forwardSearchIdx) {
                forwardSearchIdx = matchIdx;
            }

        }
        /**
         * now we have (or not) both ends of new line: check for identity with line and for
         * needed path characters (like '.') inside
         */
        if (forwardSearchIdx == line.length() && backwardSearchIdx == -1) {
            return trimPath(line);
        } else if (forwardSearchIdx - backwardSearchIdx > 1) {
            line = line.substring(backwardSearchIdx + 1, forwardSearchIdx);
            if (isFilePath(line)) {
                return trimPath(line);
            }
        }
        return null;
    }

    private boolean badData(LineAndCaret position) {
        String line = position.line;
        return line == null || line.length() < 2 || position.caret >= line.length()
                || position.caret < 0;
    }

    private static LineAndCaret resolveVariables(LineAndCaret position) {
        String line = position.line;
        Matcher matcher = VARIABLE_PATTERN.matcher(line);
        if(!matcher.find()){
            return position;
        }
        int newCaret = position.caret;
        StringBuffer sb = new StringBuffer();
        do {
            String var = matcher.group(1);
            if(var == null){
                // XXX Java 6 doesn't support named groups!!!
                //            String var = matcher.group("one");
                var = matcher.group(3);
                if(var == null){
                    //                var = matcher.group("two");
                    var = matcher.group(4);
                    if(var == null){
                        // paranoia
                        break;
                    }
                }
            }
            int start = matcher.start();
            int end = matcher.end();
            // for unresolved variables just use "null". Shit in, shit out.
            String value = getEnv(var);
            matcher.appendReplacement(sb, value);
            if(position.caret >= start && position.caret < end){
                // caret inside current variable: place it at the end of the current input
                newCaret = sb.length() - 1;
            } else if (position.caret >= end){
                // caret after current variable: update it with the diff
                newCaret += value.length() - (end - start);
            }
        } while(matcher.find());
        matcher.appendTail(sb);

        return new LineAndCaret(sb.toString(), newCaret);
    }

    private static String getEnv(String var) {
        if("~/".equals(var)){
            return System.getProperty("user.home") + "/";
        }
        return System.getenv(var) + "";
    }

    public String trimJavaType(String type) {
        if (type == null || (type = type.trim()).length() == 0) {
            return type; // shit in, shit out
        }
        // trick: compute virtual "caret" in the middle of string
        int caretIdx = type.length() / 2;

        return findJavaType(type, caretIdx);
    }

    /**
     * Search for occurencies of line references in text, like
     * <pre>
     * foo/Foo.java RegexSeparator 156
     * </pre>
     * If the regular expression separator can't be compiled it just uses the separator as string.
     * @param line
     * @param startOffset
     * @return integer value guessed as line reference in text (this is not a offset in given line!!!)
     */
    public int findLineReferenceRegex(String line, int startOffset) {
        try {
            Pattern p = Pattern.compile("(" + getLineSeparatorRegex() + ")(\\d+)" );
            Matcher m = p.matcher(line);
            if(m.find()){
                int groupCount = m.groupCount();
                if(groupCount == 0){
                    return -1;
                }
                String group = m.group(groupCount);
                if(group == null){
                    return -1;
                }
                try {
                    return Integer.parseInt(group);
                } catch (Exception e) {
                    // ignore, there was no line info?
                    return -1;
                }
            }

        } catch (PatternSyntaxException e) {
            //since input values in preferences dialog are checked for valid patterns
            //exception normally can't happen.
            return findLineReference(line, startOffset);
        }
        return -1;
    }

    /**
     * Search for occurencies of line references in text, like
     * <pre>
     * foo/Foo.java:156
     * </pre>
     * @return integer value guessed as line reference in text (this is not a offset in given line!!!)
     */
    private int findLineReference(String line, int startOffset) {
        if (line == null || line.length() == 0 || startOffset >= line.length()
                || startOffset < 0) {
            return -1; // shit in, shit out
        }

        // search for first ':', if any
        int doppIndx = line.indexOf(getLineSeparatorRegex(), startOffset);

        // means > -1 and not the same occurence
        if (doppIndx > startOffset) {
            // try to find most common occurence: after first ':'
            int firstTry = findLineReference(line, doppIndx);
            // found? ok.
            if (firstTry >= 0) {
                return firstTry;
            }
            // else: we doesn't have line info after ':' or it is before!
        }

        int startChar = -1, stopChar = -1;
        boolean digit;
        for (int i = startOffset; i < line.length(); i++) {
            digit = Character.isDigit(line.charAt(i));
            if (digit) {
                if (startChar < 0) {
                    // let see on pevious character: is it letter, then
                    // followed digit cannot be line number, but is part of
                    // path or java name like 6 in Base64.java:125
                    if (i - 1 >= 0 && Character.isLetter(line.charAt(i - 1))) {
                        continue;
                    }
                    startChar = i;
                }
                stopChar = i + 1;
            } else if (startChar >= 0) {
                stopChar = i;
                break;
            }
        }
        if (startChar >= 0 && stopChar > 0) {
            line = line.substring(startChar, stopChar);
            int result = Integer.parseInt(line);
            return result;
        }
        return -1;
    }

    public String findJavaType(String line, int caretOffset) {
        if (line == null || line.length() == 0 || caretOffset >= line.length()
                || caretOffset < 0) {
            return null; // shit in, shit out
        }

        /**
         * we search for nearest to caret 'invalid' java characters in both directions
         */
        int forwardSearchIdx = caretOffset;
        for (int i = caretOffset; i < line.length(); i++) {
            if (Character.isJavaIdentifierPart(line.charAt(i))) {
                forwardSearchIdx++;
            } else {
                break;
            }
        }

        int backwardSearchIdx = caretOffset;
        for (int i = caretOffset; i >= 0; i--) {
            if (Character.isJavaIdentifierPart(line.charAt(i))) {
                backwardSearchIdx--;
            } else {
                break;
            }
        }
        if (backwardSearchIdx < 0) {
            backwardSearchIdx = 0;
        }

        // find first valid first java character
        for (int i = backwardSearchIdx; i < forwardSearchIdx; i++) {
            if (Character.isJavaIdentifierStart(line.charAt(i))) {
                backwardSearchIdx = i;
                break;
            }
        }

        /**
         * now we have (or not) both ends of new line: check for identity with line and for
         * needed path characters (like '.') inside
         */
        if (forwardSearchIdx == line.length() && backwardSearchIdx == 0) {
            return line;
        } else if (forwardSearchIdx > backwardSearchIdx) {
            return line.substring(backwardSearchIdx, forwardSearchIdx);
        }
        return null;
    }

    public static int indexOf(String line, char c, int startOffset, int stopOffset,
            boolean forward) {
        int i = startOffset;
        while (forward ? i < stopOffset : i > stopOffset) {
            if (line.charAt(i) == c) {
                return i;
            }
            if (forward) {
                i++;
            } else {
                i--;
            }
        }
        return -1;
    }

    public String getCharsDisallowedInPath() {
        return charsDisallowedInPath;
    }

    public String getCharsRequiredInPath() {
        return charsRequiredInPath;
    }

    public String getLineSeparatorRegex() {
        return lineSeparatorRegex;
    }

    public boolean isUseRequiredInPathChars() {
        return useRequiredInPathChars;
    }

    public void setCharsDisallowedInPath(String string) {
        charsDisallowedInPath = string;
    }

    public void setCharsRequiredInPath(String string) {
        charsRequiredInPath = string;
    }

    public void setLineSeparatorRegex(String string) {
        lineSeparatorRegex = string;
    }

    public void setUseRequiredInPathChars(boolean b) {
        useRequiredInPathChars = b;
    }

    /* @SuppressFBWarnings("NP_NULL_PARAM_DEREF_ALL_TARGETS_DANGEROUS") */
    public String base64decode(String base64, String charset) {
        Base64Preferences prefs = new Base64Preferences();
        prefs.put(null, base64);
        byte[] byteArray = prefs.getByteArray(null, null);
        if(byteArray == null) {
            // not base64 encoded => return input back
            return base64;
        }
        try {
            return new String(byteArray, charset);
        } catch (UnsupportedEncodingException e) {
            return new String(byteArray);
        }
    }

    public String base64encode(String plainText, String charset) {
        Base64Preferences prefs = new Base64Preferences();
        prefs.putByteArray(null, plainText.getBytes());
        return prefs.get(null, null);
    }

    public String base64trim(String text, String lineDelim) {
        text = WHITE_SPACE_PATTERN.matcher(text).replaceAll("");
        StringBuffer resultText = new StringBuffer();
        for (int i = 0; i < text.length(); i += base64LineLength) {
            if ((i + base64LineLength) >= text.length()) {
                resultText.append(text.substring(i));
            } else {
                resultText.append(text.substring(i, i + base64LineLength));
            }
            resultText.append(lineDelim);
        }
        String string = resultText.toString();
        if (string.endsWith(lineDelim)) {
            string = string.substring(0, string.length() - lineDelim.length());
        }
        return string;
    }

    public String toUnicode(String input) {
        StringBuffer ret = new StringBuffer();
        for (int i = 0; i < input.length(); i++) {
            char ch = input.charAt(i);
            if (unicodifyAll || (!Character.isWhitespace(ch) && ch < 0x20 || ch > 0x7e)) {
                ret.append("\\u");
                // requires 1.5 VM
                // ret.append(String.format("%1$04x", new Object[] { Integer.valueOf(ch) }));
                ret.append(leading4Zeros(Integer.toHexString(ch)));
            } else {
                ret.append(ch);
            }
        }
        return ret.toString();
    }

    /**
     * @param hexString max 4 characters length
     * @return same string with leading zeros
     */
    private char[] leading4Zeros(String hexString) {
        char[] chars = "0000".toCharArray();
        int length = hexString.length();
        hexString.getChars(0, length, chars, 4 - length);
        return chars;
    }

    /**
     *
     * @param charset may be null. If null, no checks for the supported encoding would be
     * performed
     * @param input non null
     * @throws UnsupportedOperationException if given charset does not support characters
     * from given text
     */
    public String fromUnicode(String charset, String input)
            throws UnsupportedOperationException {
        StringBuffer ret = new StringBuffer();
        Matcher matcher = UNICODE_PATTERN.matcher(input);
        String error = null;
        while (matcher.find()) {
            try {
                String uniValue = matcher.group().substring(2);
                String newValue = new String(new char[] { (char) Integer.parseInt(uniValue, 16) });
                if(charset != null) {
                    error = canEncode(charset, newValue, uniValue);
                    if(error != null) {
                        break;
                    }
                }
                matcher.appendReplacement(ret, quoteReplacement(newValue));
            } catch (NumberFormatException t) {
                matcher.appendReplacement(ret, quoteReplacement(matcher.group()));
            }
        }
        if(error != null) {
            throw new UnsupportedOperationException(error);
        }
        matcher.appendTail(ret);
        return ret.toString();
    }

    // TODO already exists in 1.5 JDK, but here to be compatible with 1.4
    public static String quoteReplacement(String s) {
        if (s.indexOf('\\') == -1 && s.indexOf('$') == -1) {
            return s;
        }
        int length = s.length();
        StringBuffer sb = new StringBuffer(length + 10);
        for (int i = 0; i < length; i++) {
            char c = s.charAt(i);
            if (c == '\\') {
                sb.append('\\').append('\\');
            } else if (c == '$') {
                sb.append('\\').append('$');
            } else {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    /**
     *
     * @param charset non null
     * @param text non null
     * @param unicodeValue
     * @return null if text could be encoded, error message otherwise
     */
    public static String canEncode(String charset, CharSequence text, String unicodeValue) {
        Charset cs;
        try {
            cs = Charset.forName(charset);
        } catch (IllegalCharsetNameException e) {
            return "Charset name '" + charset + "' is illegal.";
        } catch (UnsupportedCharsetException e) {
            return "Charset '" + charset + "' is not supported.";
        }
        if(cs.canEncode() && cs.newEncoder().canEncode(text)) {
            return null;
        }
        return "Charset '" + charset + "' does not support encoding for \\u" + unicodeValue + ".";
    }

    public static boolean isValidLineSeparatorRegex(String regex) {
        try {
            Pattern.compile(regex);
            return true;
        } catch (PatternSyntaxException e) {
            AnyEditToolsPlugin.errorDialog(Messages.OpenLineSeparatorRegex_WarningInvalidRegex, e);
            return false;
        }
    }

    public static synchronized void updateTextUtils() {
        TextUtil textUtils = getInstance();
        IPreferenceStore store = AnyEditToolsPlugin.getDefault().getPreferenceStore();
        textUtils.setCharsDisallowedInPath(store
                .getString(IAnyEditConstants.CHARACTERS_DISALLOWED_IN_PATH));
        textUtils.setCharsRequiredInPath(store
                .getString(IAnyEditConstants.CHARACTERS_REQUIRED_IN_PATH));
        textUtils.setLineSeparatorRegex(store
                .getString(IAnyEditConstants.LINE_SEPARATOR_REGEX));

        textUtils.setUseRequiredInPathChars(store
                .getBoolean(IAnyEditConstants.USE_REQUIRED_IN_PATH_CHARACTERS));
        textUtils.base64LineLength = store.getInt(IAnyEditConstants.BASE64_LINE_LENGTH);
        if(textUtils.base64LineLength <= 0) {
            // paranoia
            textUtils.base64LineLength = DEFAULT_BASE64_LINE_LENGTH;
        }
        textUtils.unicodifyAll = store.getBoolean(IAnyEditConstants.UNICODIFY_ALL);
    }

    public static TextUtil getDefaultTextUtilities() {
        updateTextUtils();
        return getInstance();
    }

    public static boolean convertTabsToSpaces(StringBuffer line, int tabWidth,
            boolean removeTrailing, boolean ignoreBlankLines, boolean replaceAllTabs, boolean useModulo4Tabs) {
        char lastChar;
        boolean changed = false;
        if (removeTrailing) {
            changed = removeTrailingSpace(line, ignoreBlankLines);
        }
        int lineLength = line.length();
        int spacesCount = 0;
        int tabsCount = 0;
        int lastIdx = 0;
        for (; lastIdx < lineLength; lastIdx++) {
            lastChar = line.charAt(lastIdx);
            if (lastChar == '\t') {
                changed = true;
                tabsCount++;
            } else if (lastChar == ' ') {
                spacesCount++;
            } else {
                break;
            }
        }
        if (tabsCount > 0) {
            spacesCount = calculateSpaces4Tabs(spacesCount, tabsCount, tabWidth,
                    useModulo4Tabs);

            // delete whitespace to 'last' index, replace with spaces
            line.delete(0, lastIdx);
            line.insert(0, fillWith(spacesCount, ' '));
        }
        if (replaceAllTabs) {
            if (lastIdx >= lineLength) {
                lastIdx = 0;
            }
            changed |= replaceAllTabs(line, lastIdx, tabWidth);
        }
        return changed;
    }

    private static int calculateSpaces4Tabs(int spacesCount, int tabsCount, int tabWidth,
            boolean useModulo4Tabs) {
        if (!useModulo4Tabs) {
            return spacesCount + tabsCount * tabWidth;
        }
        /*
         * This does work well if and only if all three conditions below are met:
         * 1) the same tab size was used as the one set in AnyEdit preferences
         * 2) spaces wasn't "cross over" mixed with tabs multiple times in a line
         * 3) spaces prepends tabs
         */
        return spacesCount - (spacesCount % tabWidth) + tabsCount * tabWidth;
    }

    private static int calculateTabs4Spaces(int spacesCount, int tabWidth) {
        int tabs = spacesCount / tabWidth;
        int rest = spacesCount % tabWidth != 0? 1 : 0;
        return tabs + rest;
    }

    private static boolean replaceAllTabs(StringBuffer line, int start, int tabWidth) {
        String spaces = null;
        boolean changed = false;
        for (int i = start; i < line.length(); i++) {
            char c = line.charAt(i);
            if (c == '\t') {
                if (spaces == null) {
                    spaces = String.valueOf(fillWith(tabWidth, ' '));
                }
                line.replace(i, i + 1, spaces);
                changed = true;
            }
        }
        return changed;
    }

    private static boolean replaceAllSpaces(StringBuffer line, int start, int tabWidth) {
        boolean changed = false;
        int spacesCount = 0;
        int lastIdx = start;
        int firstIdx = start;
        for (; lastIdx < line.length(); lastIdx++) {
            char c = line.charAt(lastIdx);
            if (c == ' ') {
                if(spacesCount == 0){
                    firstIdx = lastIdx;
                }
                spacesCount ++;
            } else if(spacesCount > 0){
                int tabsCount = calculateTabs4Spaces(spacesCount, tabWidth);
                line.replace(firstIdx, lastIdx, String.valueOf(fillWith(tabsCount, '\t')));
                changed = true;
                spacesCount = 0;
                lastIdx = firstIdx + tabsCount;
            }
        }
        if(spacesCount > 0){
            int tabsCount = calculateTabs4Spaces(spacesCount, tabWidth);
            line.replace(firstIdx, lastIdx, String.valueOf(fillWith(tabsCount, '\t')));
            changed = true;
        }
        return changed;
    }

    public static boolean removeTrailingSpace(StringBuffer line, boolean ignoreBlankLine) {
        boolean changed = false;
        char lastChar;
        int lineLength = line.length();
        int lastCharsLength = getLineEnd(line).length;
        int lastIdx = lineLength - lastCharsLength - 1;

        if (ignoreBlankLine) {
            boolean nonWhitespaceFound = false;
            for (int i = lastIdx; i >= 0; i--) {
                lastChar = line.charAt(i);
                if (lastChar != ' ' && lastChar != '\t') {
                    nonWhitespaceFound = true;
                    break;
                }
            }
            if (nonWhitespaceFound == false) {
                return false;
            }
        }

        while (lastIdx >= 0) {
            lastChar = line.charAt(lastIdx);
            if (lastChar != ' ' && lastChar != '\t') {
                break;
            }
            lastIdx--;
        }
        if (lastIdx != lineLength - lastCharsLength - 1) {
            line.delete(lastIdx + 1, lineLength - lastCharsLength);
            changed = true;
        }

        return changed;
    }

    public static boolean convertSpacesToTabs(StringBuffer line, int tabWidth,
            boolean removeTrailing, boolean ignoreBlankLines, boolean replaceAllSpaces) {
        boolean changed = false;
        if (removeTrailing) {
            changed = removeTrailingSpace(line, ignoreBlankLines);
        }
        int lineLength = line.length();
        int spacesCount = 0;
        int tabsCount = 0;
        int lastIdx = 0;
        char lastChar = '?';
        for (; lastIdx < lineLength; lastIdx++) {
            lastChar = line.charAt(lastIdx);
            if (lastChar == ' ') {
                changed = true;
                spacesCount++;
            } else if (lastChar == '\t') {
                tabsCount++;
            } else {
                break;
            }
        }

        if (spacesCount > 0) {
            boolean isComment = lastChar == '*';
            int additionalTabs = spacesCount / tabWidth;
            if(additionalTabs == 0 && tabsCount == 0){
                if(replaceAllSpaces) {
                    additionalTabs = 1;
                    spacesCount = 0;
                } else {
                    // XXX remove leading spaces, except for javadoc
                    if(!isComment){
                        line.delete(0, lastIdx);
                        changed = true;
                    }
                    return changed;
                }
            }
            if (additionalTabs == 0 && !replaceAllSpaces) {
                line.delete(0, tabsCount + spacesCount);
                if(tabsCount > 0) {
                    line.insert(0, fillWith(tabsCount, '\t'));
                }
                // XXX add extra space for javadoc
                if(isComment){
                    line.insert(tabsCount, fillWith(1, ' '));
                }
                return true;
            }
            tabsCount += additionalTabs;
            // modulo rest
            int extraSpaces = spacesCount % tabWidth;

            // delete whitespace to 'last' index, replace with tabs
            line.delete(0, lastIdx);
            line.insert(0, fillWith(tabsCount, '\t'));
            // if some last spaces exists, add them back
            if (extraSpaces > 0) {
                if(replaceAllSpaces){
                    line.insert(tabsCount, fillWith(1, '\t'));
                } else {
                    line.insert(tabsCount, fillWith(extraSpaces, ' '));
                }
            }
        }
        if (replaceAllSpaces) {
            changed |= replaceAllSpaces(line, tabsCount, tabWidth);
        }
        return changed;
    }

    private static char[] getLineEnd(StringBuffer line) {
        if (line == null) {
            return EMPTY;
        }
        int lastIdx = line.length() - 1;
        if (lastIdx < 0) {
            return EMPTY;
        }
        char last = line.charAt(lastIdx);
        if (last == '\n') {
            if (lastIdx > 0) {
                if (line.charAt(lastIdx - 1) == '\r') {
                    return CRLF; // windows
                }
            }
            return LF; // unix
        } else if (last == '\r') {
            return CR; // mac
        } else {
            return EMPTY;
        }
    }

    /**
     * @return number of occurencies of c in s
     */
    public static int count(String s, char c) {
        if (s == null) {
            return 0;
        }
        int count = 0;
        for (int i = 0; i < s.length(); i++) {
            if (s.charAt(i) == c) {
                count++;
            }
        }
        return count;
    }

    /**
     * @return char array with specified amount of given characters.
     */
    private static char[] fillWith(int length, char c) {
        char[] chars = new char[length];
        for (int i = 0; i < length; i++) {
            chars[i] = c;
        }
        return chars;
    }

    /**
     * Characters used for escape operations
     */
    private static final String[][] HTML_ESCAPE_CHARS = { {
        "<", "<" }, {
            ">", ">" }, {
                "&", "&" }, {
                    """, "\"" }, {
                        "à", "\u00e0" }, {
                            "À", "\u00c0" }, {
                                "â", "\u00e2" }, {
                                    "ä", "\u00e4" }, {
                                        "Ä", "\u00c4" }, {
                                            "Â", "\u00c2" }, {
                                                "å", "\u00e5" }, {
                                                    "Å", "\u00c5" }, {
                                                        "æ", "\u00e6" }, {
                                                            "Æ", "\u00c6" }, {
                                                                "ç", "\u00e7" }, {
                                                                    "Ç", "\u00c7" }, {
                                                                        "é", "\u00e9" }, {
                                                                            "É", "\u00c9" }, {
                                                                                "á", "\u00e1" }, {
                                                                                    "Á", "\u00c1" }, {
                                                                                        "è", "\u00e8" }, {
                                                                                            "È", "\u00c8" }, {
                                                                                                "ê", "\u00ea" }, {
                                                                                                    "Ê", "\u00ca" }, {
                                                                                                        "ë", "\u00eb" }, {
                                                                                                            "Ë", "\u00cb" }, {
                                                                                                                "ï", "\u00ef" }, {
                                                                                                                    "Ï", "\u00cf" }, {
                                                                                                                        "í", "\u00ed" }, {
                                                                                                                            "Í", "\u00cd" }, {
                                                                                                                                "ã", "\u00e3" }, {
                                                                                                                                    "Ã", "\u00c3" }, {
                                                                                                                                        "õ", "\u00f5" }, {
                                                                                                                                            "Õ", "\u00d5" }, {
                                                                                                                                                "ó", "\u00f3" }, {
                                                                                                                                                    "Ó", "\u00d3" }, {
                                                                                                                                                        "ô", "\u00f4" }, {
                                                                                                                                                            "Ô", "\u00d4" }, {
                                                                                                                                                                "ö", "\u00f6" }, {
                                                                                                                                                                    "Ö", "\u00d6" }, {
                                                                                                                                                                        "ø", "\u00f8" }, {
                                                                                                                                                                            "Ø", "\u00d8" }, {
                                                                                                                                                                                "ß", "\u00df" }, {
                                                                                                                                                                                    "ù", "\u00f9" }, {
                                                                                                                                                                                        "Ù", "\u00d9" }, {
                                                                                                                                                                                            "ú", "\u00fa" }, {
                                                                                                                                                                                                "Ú", "\u00da" }, {
                                                                                                                                                                                                    "û", "\u00fb" }, {
                                                                                                                                                                                                        "Û", "\u00db" }, {
                                                                                                                                                                                                            "ü", "\u00fc" }, {
                                                                                                                                                                                                                "Ü", "\u00dc" }, {
                                                                                                                                                                                                                    " ", " " }, {
                                                                                                                                                                                                                        "®", "\u00AE" }, {
                                                                                                                                                                                                                            "©", "\u00A9" }, {
                                                                                                                                                                                                                                "€", "\u20A0" }, {
                                                                                                                                                                                                                                    "€", "\u20AC" }

    };

    /**
     * Get html entity for escape character
     * @return null, if no entity found for given character
     */
    public static String getEntityForChar(char ch) {
        switch (ch) {
        case '<':
            return "<";
        case '>':
            return ">";
        case '&':
            return "&";
        case '"':
            return """;
        case '\u00e0':
            return "à";
        case '\u00e1':
            return "á";
        case '\u00c0':
            return "À";
        case '\u00c1':
            return "Á";
        case '\u00e2':
            return "â";
        case '\u00c2':
            return "Â";
        case '\u00e4':
            return "ä";
        case '\u00c4':
            return "Ä";
        case '\u00e5':
            return "å";
        case '\u00c5':
            return "Å";
        case '\u00e3':
            return "ã";
        case '\u00c3':
            return "Ã";
        case '\u00e6':
            return "æ";
        case '\u00c6':
            return "Æ";
        case '\u00e7':
            return "ç";
        case '\u00c7':
            return "Ç";
        case '\u00e9':
            return "é";
        case '\u00c9':
            return "É";
        case '\u00e8':
            return "è";
        case '\u00c8':
            return "È";
        case '\u00ea':
            return "ê";
        case '\u00ca':
            return "Ê";
        case '\u00eb':
            return "ë";
        case '\u00cb':
            return "Ë";
        case '\u00ed':
            return "í";
        case '\u00cd':
            return "Í";
        case '\u00ef':
            return "ï";
        case '\u00cf':
            return "Ï";
        case '\u00f5':
            return "õ";
        case '\u00d5':
            return "Õ";
        case '\u00f3':
            return "ó";
        case '\u00f4':
            return "ô";
        case '\u00d3':
            return "Ó";
        case '\u00d4':
            return "Ô";
        case '\u00f6':
            return "ö";
        case '\u00d6':
            return "Ö";
        case '\u00f8':
            return "ø";
        case '\u00d8':
            return "Ø";
        case '\u00df':
            return "ß";
        case '\u00f9':
            return "ù";
        case '\u00d9':
            return "Ù";
        case '\u00fa':
            return "ú";
        case '\u00da':
            return "Ú";
        case '\u00fb':
            return "û";
        case '\u00db':
            return "Û";
        case '\u00fc':
            return "ü";
        case '\u00dc':
            return "Ü";
        case '\u00AE':
            return "®";
        case '\u00A9':
            return "©";
        case '\u20A0':
            return "€";
        case '\u20AC':
            return "€";
            // case '' :   return "€";
            // case '\u20AC': return "€";  // euro

            // be carefull with this one (non-breaking white space)
            //case ' ' :    return " ";
        default: {
            //Submitted by S. Bayer.
            int ci = 0xffff & ch;
            if (ci < 160) {
                // nothing special only 7 Bit
                return null;
            }
            // Not 7 Bit use the unicode system
            return "&#" + ci + ";";
        }
        }
    }

    /**
     * change escape characters to html entities (from  http://www.rgagnon.com/howto.html)
     * @param s string to be modified
     * @return string with escape characters, changed to html entities
     */
    public static String escapeText(String s) {
        if (s == null) {
            // shit in, shit out
            return null;
        }
        StringBuffer sb = new StringBuffer();
        int n = s.length();
        char c;
        String entity;
        for (int i = 0; i < n; i++) {
            c = s.charAt(i);
            entity = getEntityForChar(c);
            if (entity != null) {
                sb.append(entity);
            } else {
                sb.append(c);
            }
        }
        return sb.toString();
    }

    /**
     * change html entities to escape characters (from http://www.rgagnon.com/howto.html)
     * @param s string to unescape
     * @return new string with html entities changed to escape characters
     */
    public static String unescapeText(String s) {
        int i, j, k;
        if (s != null && (i = s.indexOf('&')) > -1) {
            j = s.indexOf(';', i);
            if (j > i) {
                String temp = s.substring(i, j + 1);
                // search in escape[][] if temp is there
                k = 0;
                int arraySize = HTML_ESCAPE_CHARS.length;
                while (k < arraySize) {
                    if (HTML_ESCAPE_CHARS[k][0].equals(temp)) {
                        break;
                    }
                    k++;
                }
                // now we found html escape character
                if (k < arraySize) {
                    // replace it to ASCII
                    s = new StringBuffer(s.substring(0, i)).append(
                            HTML_ESCAPE_CHARS[k][1]).append(s.substring(j + 1))
                            .toString();
                    return unescapeText(s); // recursive call
                } else if (k == arraySize) {
                    s = new StringBuffer(s.substring(0, i)).append('&')
                            .append(unescapeText(s.substring(i + 1))).toString();
                    return s;
                }
            }
        }
        return s;
    }

    /**
     * get index of first non-whitespace letter (one of " \t\r\n")
     * @return -1 if no such (non-whitespace) character found from given
     * startOffset (inclusive)
     */
    private static int indexOfNextWord(String line, int startOffset, int lastIdx) {
        int size = line.length();
        char c;
        boolean continueSequence = lastIdx + 1 == startOffset;
        for (int i = startOffset; i < size; i++) {
            c = line.charAt(i);
            if (Character.isWhitespace(c)) {
                continueSequence = false;
                continue;
            } else if (continueSequence) {
                continue;
            }
            return i;
        }
        return -1;
    }

    public static String capitalize(String line) {
        StringBuffer sb = new StringBuffer(line);
        int size = line.length();
        boolean changed = false;
        char c;
        int lastWordIdx = 0;
        for (int i = 0; i < size; i++) {
            i = indexOfNextWord(line, i, lastWordIdx);
            if (i < 0) {
                break;
            }
            c = line.charAt(i);
            if (Character.isLowerCase(c)) {
                c = Character.toUpperCase(c);
                sb.setCharAt(i, c);
                changed = true;
            }
            lastWordIdx = i;
        }
        if (changed) {
            return new String(sb);
        }
        return line;
    }

    public static String invertCase(String line) {
        char[] chars = line.toCharArray();
        char c;
        boolean changed = false;
        for (int i = 0; i < chars.length; i++) {
            c = chars[i];
            // XXX DOESN'T WORK WITH UNICODE SPECIAL CHARS!!!!
            if (Character.isLowerCase(c)) {
                chars[i] = Character.toUpperCase(c);
                changed = true;
            } else if (Character.isUpperCase(c)) {
                chars[i] = Character.toLowerCase(c);
                changed = true;
            }
        }
        if (changed) {
            return String.valueOf(chars);
        }
        return line;
    }

}