package org.jabref.logic.bst;
import java.util.Locale;
import java.util.Optional;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
public final class BibtexCaseChanger {
private static final Log LOGGER = LogFactory.getLog(BibtexCaseChanger.class);
// stores whether the char before the current char was a colon
private boolean prevColon = true;
// global variable to store the current brace level
private int braceLevel;
public enum FORMAT_MODE {
// First character and character after a ":" as upper case - everything else in lower case. Obey {}.
TITLE_LOWERS('t'),
// All characters lower case - Obey {}
ALL_LOWERS('l'),
// all characters upper case - Obey {}
ALL_UPPERS('u');
// the following would have to be done if the functionality of CaseChangers would be included here
// However, we decided against it and will probably do the other way round: https://github.com/JabRef/jabref/pull/215#issuecomment-146981624
// Each word should start with a capital letter
//EACH_FIRST_UPPERS('f'),
// Converts all words to upper case, but converts articles, prepositions, and conjunctions to lower case
// Capitalizes first and last word
// Does not change words starting with "{"
// DIFFERENCE to old CaseChangers.TITLE: last word is NOT capitalized in all cases
//TITLE_UPPERS('T');
private final char asChar;
FORMAT_MODE(char asChar) {
this.asChar = asChar;
}
public char asChar() {
return asChar;
}
/**
* Convert bstFormat char into ENUM
*
* @throws IllegalArgumentException if char is not 't', 'l', 'u'
*/
public static FORMAT_MODE getFormatModeForBSTFormat(final char bstFormat) {
for (FORMAT_MODE mode : FORMAT_MODE.values()) {
if (mode.asChar == bstFormat) {
return mode;
}
}
throw new IllegalArgumentException();
}
}
private BibtexCaseChanger() {
}
/**
* Changes case of the given string s
*
* @param s the string to handle
* @param format the format
* @return
*/
public static String changeCase(String s, FORMAT_MODE format) {
return (new BibtexCaseChanger()).doChangeCase(s, format);
}
private String doChangeCase(String s, FORMAT_MODE format) {
char[] c = s.toCharArray();
StringBuilder sb = new StringBuilder();
int i = 0;
int n = s.length();
while (i < n) {
if (c[i] == '{') {
braceLevel++;
if ((braceLevel != 1) || ((i + 4) > n) || (c[i + 1] != '\\')) {
prevColon = false;
sb.append(c[i]);
i++;
continue;
}
if ((format == FORMAT_MODE.TITLE_LOWERS) && ((i == 0) || (prevColon && Character.isWhitespace(c[i - 1])))) {
sb.append('{');
i++;
prevColon = false;
continue;
}
i = convertSpecialChar(sb, c, i, format);
continue;
}
if (c[i] == '}') {
sb.append(c[i]);
i++;
if (braceLevel == 0) {
LOGGER.warn("Too many closing braces in string: " + s);
} else {
braceLevel--;
}
prevColon = false;
continue;
}
if (braceLevel == 0) {
i = convertCharIfBraceLevelIsZero(c, i, sb, format);
continue;
}
sb.append(c[i]);
i++;
}
if (braceLevel > 0) {
LOGGER.warn("No enough closing braces in string: " + s);
}
return sb.toString();
}
/**
* We're dealing with a special character (usually either an undotted `\i'
* or `\j', or an accent like one in Table~3.1 of the \LaTeX\ manual, or a
* foreign character like one in Table~3.2) if the first character after the
* |left_brace| is a |backslash|; the special character ends with the
* matching |right_brace|. How we handle what is in between depends on the
* special character. In general, this code will do reasonably well if there
* is other stuff, too, between braces, but it doesn't try to do anything
* special with |colon|s.
*
* @param c
* @param i the current position. It points to the opening brace
* @param format
* @return
*/
private int convertSpecialChar(StringBuilder sb, char[] c, int start, FORMAT_MODE format) {
int i = start;
sb.append(c[i]);
i++; // skip over open brace
while ((i < c.length) && (braceLevel > 0)) {
sb.append(c[i]);
i++;
// skip over the |backslash|
Optional<String> s = BibtexCaseChanger.findSpecialChar(c, i);
if (s.isPresent()) {
i = convertAccented(c, i, s.get(), sb, format);
}
while ((i < c.length) && (braceLevel > 0) && (c[i] != '\\')) {
if (c[i] == '}') {
braceLevel--;
} else if (c[i] == '{') {
braceLevel++;
}
i = convertNonControl(c, i, sb, format);
}
}
return i;
}
/**
* Convert the given string according to the format character (title, lower,
* up) and append the result to the stringBuffer, return the updated
* position.
*
* @param c
* @param start
* @param s
* @param sb
* @param format
* @return the new position
*/
private int convertAccented(char[] c, int start, String s, StringBuilder sb, FORMAT_MODE format) {
int pos = start;
pos += s.length();
switch (format) {
case TITLE_LOWERS:
case ALL_LOWERS:
if ("L O OE AE AA".contains(s)) {
sb.append(s.toLowerCase(Locale.ROOT));
} else {
sb.append(s);
}
break;
case ALL_UPPERS:
if ("l o oe ae aa".contains(s)) {
sb.append(s.toUpperCase(Locale.ROOT));
} else if ("i j ss".contains(s)) {
sb.deleteCharAt(sb.length() - 1); // Kill backslash
sb.append(s.toUpperCase(Locale.ROOT));
while ((pos < c.length) && Character.isWhitespace(c[pos])) {
pos++;
}
} else {
sb.append(s);
}
break;
default:
LOGGER.info("convertAccented - Unknown format: " + format);
break;
}
return pos;
}
private int convertNonControl(char[] c, int start, StringBuilder sb, FORMAT_MODE format) {
int pos = start;
switch (format) {
case TITLE_LOWERS:
case ALL_LOWERS:
sb.append(Character.toLowerCase(c[pos]));
pos++;
break;
case ALL_UPPERS:
sb.append(Character.toUpperCase(c[pos]));
pos++;
break;
default:
LOGGER.info("convertNonControl - Unknown format: " + format);
break;
}
return pos;
}
private int convertCharIfBraceLevelIsZero(char[] c, int start, StringBuilder sb, FORMAT_MODE format) {
int i = start;
switch (format) {
case TITLE_LOWERS:
if ((i == 0) || (prevColon && Character.isWhitespace(c[i - 1]))) {
sb.append(c[i]);
} else {
sb.append(Character.toLowerCase(c[i]));
}
if (c[i] == ':') {
prevColon = true;
} else if (!Character.isWhitespace(c[i])) {
prevColon = false;
}
break;
case ALL_LOWERS:
sb.append(Character.toLowerCase(c[i]));
break;
case ALL_UPPERS:
sb.append(Character.toUpperCase(c[i]));
break;
default:
LOGGER.info("convertCharIfBraceLevelIsZero - Unknown format: " + format);
break;
}
i++;
return i;
}
/**
* Determine whether there starts a special char at pos (e.g., oe, AE). Return it as string.
* If nothing found, return Optional.empty()
*
* Also used by BibtexPurify
*
* @param c the current "String"
* @param pos the position
* @return the special LaTeX character or null
*/
public static Optional<String> findSpecialChar(char[] c, int pos) {
if ((pos + 1) < c.length) {
if ((c[pos] == 'o') && (c[pos + 1] == 'e')) {
return Optional.of("oe");
}
if ((c[pos] == 'O') && (c[pos + 1] == 'E')) {
return Optional.of("OE");
}
if ((c[pos] == 'a') && (c[pos + 1] == 'e')) {
return Optional.of("ae");
}
if ((c[pos] == 'A') && (c[pos + 1] == 'E')) {
return Optional.of("AE");
}
if ((c[pos] == 's') && (c[pos + 1] == 's')) {
return Optional.of("ss");
}
if ((c[pos] == 'A') && (c[pos + 1] == 'A')) {
return Optional.of("AA");
}
if ((c[pos] == 'a') && (c[pos + 1] == 'a')) {
return Optional.of("aa");
}
}
if ("ijoOlL".indexOf(c[pos]) >= 0) {
return Optional.of(String.valueOf(c[pos]));
}
return Optional.empty();
}
}