StringUtils.java example

Explorer
sejda-master
/*
 * This file is part of the Sejda source code
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */
package org.sejda.core.support.util;

import java.util.*;

public final class StringUtils {
    private StringUtils() {
        // hide
    }

    /**
     * Removes control characters like \n, \t or \r
     * Replaces whitespace, including Unicode 'non-breaking-space', with plain regular 'space'
     */
    public static String normalizeWhitespace(String in) {
        // removes control characters like \n, \r or \t
        // replaces all whitespace (eg:  ) with ' ' (space)
        String result = in.replaceAll("[\\n\\t\\r]", "").replaceAll("\\p{Z}\\s", " ");
        result = result.replace((char) 160, ' ');
        return result;
    }

    // Useful to debug weird strings
    public static String asUnicodes(String in) {
        if(in == null) return null;

        StringBuilder result = new StringBuilder();
        for (int offset = 0; offset < in.length(); ) {
            int codepoint = in.codePointAt(offset);
            result.append("\\U+").append(Integer.toHexString(codepoint).toUpperCase());
            offset += Character.charCount(codepoint);
        }
        return result.toString();
    }

    /**
     * Returns a list of characters that exist in s1 but not in s2
     */
    public static Set<Character> difference(String s1, String s2) {
        Set<Character> result = new LinkedHashSet<>();
        for(Character c: s1.toCharArray()) {
            if(!s2.contains(c.toString())) {
                result.add(c);
            }
        }

        return result;
    }
}