BibtexCaseChanger.java example

Explorer
jabref-master
- src
package org.jabref.logic.bst;

import java.util.Locale;
import java.util.Optional;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

public final class BibtexCaseChanger {

    private static final Log LOGGER = LogFactory.getLog(BibtexCaseChanger.class);

    // stores whether the char before the current char was a colon
    private boolean prevColon = true;

    // global variable to store the current brace level
    private int braceLevel;

    public enum FORMAT_MODE {
        // First character and character after a ":" as upper case - everything else in lower case. Obey {}.
        TITLE_LOWERS('t'),

        // All characters lower case - Obey {}
        ALL_LOWERS('l'),

        // all characters upper case - Obey {}
        ALL_UPPERS('u');

        // the following would have to be done if the functionality of CaseChangers would be included here
        // However, we decided against it and will probably do the other way round: https://github.com/JabRef/jabref/pull/215#issuecomment-146981624

        // Each word should start with a capital letter
        //EACH_FIRST_UPPERS('f'),

        // Converts all words to upper case, but converts articles, prepositions, and conjunctions to lower case
        // Capitalizes first and last word
        // Does not change words starting with "{"
        // DIFFERENCE to old CaseChangers.TITLE: last word is NOT capitalized in all cases
        //TITLE_UPPERS('T');

        private final char asChar;

        FORMAT_MODE(char asChar) {
            this.asChar = asChar;
        }

        public char asChar() {
            return asChar;
        }


        /**
         * Convert bstFormat char into ENUM
         *
         * @throws IllegalArgumentException if char is not 't', 'l', 'u'
         */
        public static FORMAT_MODE getFormatModeForBSTFormat(final char bstFormat) {
            for (FORMAT_MODE mode : FORMAT_MODE.values()) {
                if (mode.asChar == bstFormat) {
                    return mode;
                }
            }
            throw new IllegalArgumentException();
        }
    }

    private BibtexCaseChanger() {
    }

    /**
     * Changes case of the given string s
     *
     * @param s the string to handle
     * @param format the format
     * @return
     */
    public static String changeCase(String s, FORMAT_MODE format) {
        return (new BibtexCaseChanger()).doChangeCase(s, format);
    }

    private String doChangeCase(String s, FORMAT_MODE format) {
        char[] c = s.toCharArray();

        StringBuilder sb = new StringBuilder();

        int i = 0;
        int n = s.length();

        while (i < n) {
            if (c[i] == '{') {
                braceLevel++;
                if ((braceLevel != 1) || ((i + 4) > n) || (c[i + 1] != '\\')) {
                    prevColon = false;
                    sb.append(c[i]);
                    i++;
                    continue;
                }
                if ((format == FORMAT_MODE.TITLE_LOWERS) && ((i == 0) || (prevColon && Character.isWhitespace(c[i - 1])))) {
                    sb.append('{');
                    i++;
                    prevColon = false;
                    continue;
                }
                i = convertSpecialChar(sb, c, i, format);
                continue;
            }
            if (c[i] == '}') {
                sb.append(c[i]);
                i++;
                if (braceLevel == 0) {
                    LOGGER.warn("Too many closing braces in string: " + s);
                } else {
                    braceLevel--;
                }
                prevColon = false;
                continue;
            }
            if (braceLevel == 0) {
                i = convertCharIfBraceLevelIsZero(c, i, sb, format);
                continue;
            }
            sb.append(c[i]);
            i++;
        }
        if (braceLevel > 0) {
            LOGGER.warn("No enough closing braces in string: " + s);
        }
        return sb.toString();
    }

    /**
     * We're dealing with a special character (usually either an undotted `\i'
     * or `\j', or an accent like one in Table~3.1 of the \LaTeX\ manual, or a
     * foreign character like one in Table~3.2) if the first character after the
     * |left_brace| is a |backslash|; the special character ends with the
     * matching |right_brace|. How we handle what is in between depends on the
     * special character. In general, this code will do reasonably well if there
     * is other stuff, too, between braces, but it doesn't try to do anything
     * special with |colon|s.
     *
     * @param c
     * @param i the current position. It points to the opening brace
     * @param format
     * @return
     */
    private int convertSpecialChar(StringBuilder sb, char[] c, int start, FORMAT_MODE format) {
        int i = start;

        sb.append(c[i]);
        i++; // skip over open brace

        while ((i < c.length) && (braceLevel > 0)) {
            sb.append(c[i]);
            i++;
            // skip over the |backslash|

            Optional<String> s = BibtexCaseChanger.findSpecialChar(c, i);
            if (s.isPresent()) {
                i = convertAccented(c, i, s.get(), sb, format);
            }

            while ((i < c.length) && (braceLevel > 0) && (c[i] != '\\')) {
                if (c[i] == '}') {
                    braceLevel--;
                } else if (c[i] == '{') {
                    braceLevel++;
                }
                i = convertNonControl(c, i, sb, format);
            }
        }
        return i;
    }

    /**
     * Convert the given string according to the format character (title, lower,
     * up) and append the result to the stringBuffer, return the updated
     * position.
     *
     * @param c
     * @param start
     * @param s
     * @param sb
     * @param format
     * @return the new position
     */
    private int convertAccented(char[] c, int start, String s, StringBuilder sb, FORMAT_MODE format) {
        int pos = start;
        pos += s.length();

        switch (format) {
        case TITLE_LOWERS:
        case ALL_LOWERS:
            if ("L O OE AE AA".contains(s)) {
                sb.append(s.toLowerCase(Locale.ROOT));
            } else {
                sb.append(s);
            }
            break;
        case ALL_UPPERS:
            if ("l o oe ae aa".contains(s)) {
                sb.append(s.toUpperCase(Locale.ROOT));
            } else if ("i j ss".contains(s)) {
                sb.deleteCharAt(sb.length() - 1); // Kill backslash
                sb.append(s.toUpperCase(Locale.ROOT));
                while ((pos < c.length) && Character.isWhitespace(c[pos])) {
                    pos++;
                }
            } else {
                sb.append(s);
            }
            break;
        default:
            LOGGER.info("convertAccented - Unknown format: " + format);
            break;
        }
        return pos;
    }

    private int convertNonControl(char[] c, int start, StringBuilder sb, FORMAT_MODE format) {
        int pos = start;
        switch (format) {
        case TITLE_LOWERS:
        case ALL_LOWERS:
            sb.append(Character.toLowerCase(c[pos]));
            pos++;
            break;
        case ALL_UPPERS:
            sb.append(Character.toUpperCase(c[pos]));
            pos++;
            break;
        default:
            LOGGER.info("convertNonControl - Unknown format: " + format);
            break;
        }
        return pos;
    }

    private int convertCharIfBraceLevelIsZero(char[] c, int start, StringBuilder sb, FORMAT_MODE format) {
        int i = start;
        switch (format) {
        case TITLE_LOWERS:
            if ((i == 0) || (prevColon && Character.isWhitespace(c[i - 1]))) {
                sb.append(c[i]);
            } else {
                sb.append(Character.toLowerCase(c[i]));
            }
            if (c[i] == ':') {
                prevColon = true;
            } else if (!Character.isWhitespace(c[i])) {
                prevColon = false;
            }
            break;
        case ALL_LOWERS:
            sb.append(Character.toLowerCase(c[i]));
            break;
        case ALL_UPPERS:
            sb.append(Character.toUpperCase(c[i]));
            break;
        default:
            LOGGER.info("convertCharIfBraceLevelIsZero - Unknown format: " + format);
            break;
        }
        i++;
        return i;
    }

    /**
     * Determine whether there starts a special char at pos (e.g., oe, AE). Return it as string.
     * If nothing found, return Optional.empty()
     *
     * Also used by BibtexPurify
     *
     * @param c the current "String"
     * @param pos the position
     * @return the special LaTeX character or null
     */
    public static Optional<String> findSpecialChar(char[] c, int pos) {
        if ((pos + 1) < c.length) {
            if ((c[pos] == 'o') && (c[pos + 1] == 'e')) {
                return Optional.of("oe");
            }
            if ((c[pos] == 'O') && (c[pos + 1] == 'E')) {
                return Optional.of("OE");
            }
            if ((c[pos] == 'a') && (c[pos + 1] == 'e')) {
                return Optional.of("ae");
            }
            if ((c[pos] == 'A') && (c[pos + 1] == 'E')) {
                return Optional.of("AE");
            }
            if ((c[pos] == 's') && (c[pos + 1] == 's')) {
                return Optional.of("ss");
            }
            if ((c[pos] == 'A') && (c[pos + 1] == 'A')) {
                return Optional.of("AA");
            }
            if ((c[pos] == 'a') && (c[pos + 1] == 'a')) {
                return Optional.of("aa");
            }
        }
        if ("ijoOlL".indexOf(c[pos]) >= 0) {
            return Optional.of(String.valueOf(c[pos]));
        }
        return Optional.empty();
    }
}