package org.jabref.logic.layout.format;
import java.util.Map;
import org.jabref.logic.layout.LayoutFormatter;
import org.jabref.logic.util.strings.HTMLUnicodeConversionMaps;
import org.jabref.model.strings.StringUtil;
/**
* This formatter escapes characters so they are suitable for HTML.
*/
public class HTMLChars implements LayoutFormatter {
private static final Map<String, String> HTML_CHARS = HTMLUnicodeConversionMaps.LATEX_HTML_CONVERSION_MAP;
@Override
public String format(String inField) {
int i;
String field = inField.replaceAll("&|\\\\&", "&") // Replace & and \& with &
.replaceAll("[\\n]{2,}", "<p>") // Replace double line breaks with <p>
.replace("\n", "<br>") // Replace single line breaks with <br>
.replace("\\$", "$") // Replace \$ with $
.replaceAll("\\$([^\\$]*)\\$", "\\{$1\\}"); // Replace $...$ with {...} to simplify conversion
StringBuilder sb = new StringBuilder();
StringBuilder currentCommand = null;
char c;
boolean escaped = false;
boolean incommand = false;
for (i = 0; i < field.length(); i++) {
c = field.charAt(i);
if (escaped && (c == '\\')) {
sb.append('\\');
escaped = false;
} else if (c == '\\') {
if (incommand) {
/* Close Command */
String command = currentCommand.toString();
String result = HTML_CHARS.get(command);
if (result == null) {
sb.append(command);
} else {
sb.append(result);
}
}
escaped = true;
incommand = true;
currentCommand = new StringBuilder();
} else if (!incommand && ((c == '{') || (c == '}'))) {
// Swallow the brace.
} else if (Character.isLetter(c) || (c == '%')
|| StringUtil.SPECIAL_COMMAND_CHARS.contains(String.valueOf(c))) {
escaped = false;
if (!incommand) {
sb.append(c);
} else {
currentCommand.append(c);
testCharCom: if ((currentCommand.length() == 1)
&& StringUtil.SPECIAL_COMMAND_CHARS.contains(currentCommand.toString())) {
// This indicates that we are in a command of the type
// \^o or \~{n}
if (i >= (field.length() - 1)) {
break testCharCom;
}
String command = currentCommand.toString();
i++;
c = field.charAt(i);
String commandBody;
if (c == '{') {
String part = StringUtil.getPart(field, i, false);
i += part.length();
commandBody = part;
} else {
commandBody = field.substring(i, i + 1);
}
String result = HTML_CHARS.get(command + commandBody);
if (result == null) {
sb.append(commandBody);
} else {
sb.append(result);
}
incommand = false;
escaped = false;
} else {
// Are we already at the end of the string?
if ((i + 1) == field.length()) {
String command = currentCommand.toString();
String result = HTML_CHARS.get(command);
/* If found, then use translated version. If not,
* then keep
* the text of the parameter intact.
*/
if (result == null) {
sb.append(command);
} else {
sb.append(result);
}
}
}
}
} else {
if (!incommand) {
sb.append(c);
} else if (Character.isWhitespace(c) || (c == '{') || (c == '}')) {
String command = currentCommand.toString();
// Test if we are dealing with a formatting
// command.
// If so, handle.
String tag = getHTMLTag(command);
if (!tag.isEmpty()) {
String part = StringUtil.getPart(field, i, true);
i += part.length();
sb.append('<').append(tag).append('>').append(part).append("</").append(tag).append('>');
} else if (c == '{') {
String argument = StringUtil.getPart(field, i, true);
i += argument.length();
// handle common case of general latex command
String result = HTML_CHARS.get(command + argument);
// If found, then use translated version. If not, then keep
// the text of the parameter intact.
if (result == null) {
if (argument.isEmpty()) {
// Maybe a separator, such as in \LaTeX{}, so use command
sb.append(command);
} else {
// Otherwise, use argument
sb.append(argument);
}
} else {
sb.append(result);
}
} else if (c == '}') {
// This end brace terminates a command. This can be the case in
// constructs like {\aa}. The correct behaviour should be to
// substitute the evaluated command and swallow the brace:
String result = HTML_CHARS.get(command);
if (result == null) {
// If the command is unknown, just print it:
sb.append(command);
} else {
sb.append(result);
}
} else {
String result = HTML_CHARS.get(command);
if (result == null) {
sb.append(command);
} else {
sb.append(result);
}
sb.append(' ');
}
} else {
/*
* TODO: this point is reached, apparently, if a command is
* terminated in a strange way, such as with "$\omega$".
* Also, the command "\&" causes us to get here. The former
* issue is maybe a little difficult to address, since it
* involves the LaTeX math mode. We don't have a complete
* LaTeX parser, so maybe it's better to ignore these
* commands?
*/
}
incommand = false;
escaped = false;
}
}
return sb.toString().replace("~", " "); // Replace any remaining ~ with (non-breaking spaces)
}
private String getHTMLTag(String latexCommand) {
String result = "";
switch (latexCommand) {
// Italic
case "textit":
case "it":
result = "i";
break;
// Emphasize
case "emph":
case "em":
result = "em";
break;
// Bold font
case "textbf":
case "bf":
result = "b";
break;
// Underline
case "underline":
result = "u";
break;
// Strikeout, sout is the "standard" command, although it is actually based on the package ulem
case "sout":
result = "s";
break;
// Monospace font
case "texttt":
result = "tt";
break;
// Superscript
case "textsuperscript":
result = "sup";
break;
// Subscript
case "textsubscript":
result = "sub";
break;
default:
break;
}
return result;
}
}