package org.jabref.logic.openoffice;
import java.util.Map;
import org.jabref.logic.layout.LayoutFormatter;
import org.jabref.logic.util.strings.HTMLUnicodeConversionMaps;
import org.jabref.model.strings.StringUtil;
/**
* This formatter preprocesses JabRef fields before they are run through the layout of the
* bibliography style. It handles translation of LaTeX italic/bold commands into HTML tags.
*/
public class OOPreFormatter implements LayoutFormatter {
private static final Map<String, String> CHARS = HTMLUnicodeConversionMaps.LATEX_UNICODE_CONVERSION_MAP;
@Override
public String format(String field) {
int i;
String finalResult = field.replaceAll("&|\\\\&", "&") // Replace & and \& with &
.replace("\\$", "$") // Replace \$ with $
.replaceAll("\\$([^\\$]*)\\$", "\\{$1\\}"); // Replace $...$ with {...} to simplify conversion
StringBuilder sb = new StringBuilder();
StringBuilder currentCommand = null;
char c;
boolean escaped = false;
boolean incommand = false;
for (i = 0; i < finalResult.length(); i++) {
c = finalResult.charAt(i);
if (escaped && (c == '\\')) {
sb.append('\\');
escaped = false;
} else if (c == '\\') {
if (incommand) {
/* Close Command */
String command = currentCommand.toString();
String result = OOPreFormatter.CHARS.get(command);
if (result == null) {
sb.append(command);
} else {
sb.append(result);
}
}
escaped = true;
incommand = true;
currentCommand = new StringBuilder();
} else if (!incommand && ((c == '{') || (c == '}'))) {
//Swallow braces, necessary for replacing encoded characters
} else if (Character.isLetter(c) || (c == '%')
|| StringUtil.SPECIAL_COMMAND_CHARS.contains(String.valueOf(c))) {
escaped = false;
if (!incommand) {
sb.append(c);
} else {
currentCommand.append(c);
testCharCom: if ((currentCommand.length() == 1)
&& StringUtil.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) {
// This indicates that we are in a command of the type
// \^o or \~{n}
if (i >= (finalResult.length() - 1)) {
break testCharCom;
}
String command = currentCommand.toString();
i++;
c = finalResult.charAt(i);
String combody;
if (c == '{') {
String part = StringUtil.getPart(finalResult, i, false);
i += part.length();
combody = part;
} else {
combody = finalResult.substring(i, i + 1);
}
String result = OOPreFormatter.CHARS.get(command + combody);
if (result != null) {
sb.append(result);
}
incommand = false;
escaped = false;
} else {
// Are we already at the end of the string?
if ((i + 1) == finalResult.length()) {
String command = currentCommand.toString();
String result = OOPreFormatter.CHARS.get(command);
/* If found, then use translated version. If not,
* then keep
* the text of the parameter intact.
*/
if (result == null) {
sb.append(command);
} else {
sb.append(result);
}
}
}
}
} else {
String argument;
if (!incommand) {
sb.append(c);
} else if (Character.isWhitespace(c) || (c == '{') || (c == '}')) {
String command = currentCommand.toString();
// Test if we are dealing with a formatting
// command.
// If so, handle.
String tag = getHTMLTag(command);
if (!tag.isEmpty()) {
String part = StringUtil.getPart(finalResult, i, true);
i += part.length();
sb.append('<').append(tag).append('>').append(part).append("</").append(tag).append('>');
} else if (c == '{') {
String part = StringUtil.getPart(finalResult, i, true);
i += part.length();
argument = part;
// handle common case of general latex command
String result = OOPreFormatter.CHARS.get(command + argument);
// If found, then use translated version. If not, then keep
// the
// text of the parameter intact.
if (result == null) {
sb.append(argument);
} else {
sb.append(result);
}
} else if (c == '}') {
// This end brace terminates a command. This can be the case in
// constructs like {\aa}. The correct behaviour should be to
// substitute the evaluated command and swallow the brace:
String result = OOPreFormatter.CHARS.get(command);
if (result == null) {
// If the command is unknown, just print it:
sb.append(command);
} else {
sb.append(result);
}
} else {
String result = OOPreFormatter.CHARS.get(command);
if (result == null) {
sb.append(command);
} else {
sb.append(result);
}
sb.append(' ');
}
} /* else if (c == '}') {
System.out.printf("com term by }: '%s'\n", currentCommand.toString());
argument = "";
}*/ else {
/*
* TODO: this point is reached, apparently, if a command is
* terminated in a strange way, such as with "$\omega$".
* Also, the command "\&" causes us to get here. The former
* issue is maybe a little difficult to address, since it
* involves the LaTeX math mode. We don't have a complete
* LaTeX parser, so maybe it's better to ignore these
* commands?
*/
}
incommand = false;
escaped = false;
}
}
return sb.toString().replace("$", "$"); // Replace $ with $
}
private String getHTMLTag(String latexCommand) {
String result = "";
switch (latexCommand) {
// Italic
case "textit":
case "it":
case "emph": // Should really separate between emphasized and italic but since in later stages both are converted to italic...
case "em":
result = "i";
break;
// Bold font
case "textbf":
case "bf":
result = "b";
break;
// Small capitals
case "textsc":
result = "smallcaps"; // Not a proper HTML tag, but used here for convenience
break;
// Underline
case "underline":
result = "u";
break;
// Strikeout, sout is the "standard" command, although it is actually based on the package ulem
case "sout":
result = "s";
break;
// Monospace font
case "texttt":
result = "tt";
break;
// Superscript
case "textsuperscript":
result = "sup";
break;
// Subscript
case "textsubscript":
result = "sub";
break;
default:
break;
}
return result;
}
}