package net.sf.jabref.export.layout.format;
import net.sf.jabref.Globals;
import net.sf.jabref.export.layout.LayoutFormatter;
/**
* This formatter escapes characters so they are suitable for HTML.
*
* @version $Revision: 3128 $ ($Date: 2009-11-01 11:38:08 +0100 (So, 01 Nov 2009) $)
*/
public class HTMLChars implements LayoutFormatter {
public String format(String field) {
int i;
field = field.replaceAll("&|\\\\&", "&").replaceAll("[\\n]{2,}", "<p>")
.replaceAll("\\n", "<br>");
StringBuffer sb = new StringBuffer();
StringBuffer currentCommand = null;
char c;
boolean escaped = false, incommand = false;
for (i = 0; i < field.length(); i++) {
c = field.charAt(i);
if (escaped && (c == '\\')) {
sb.append('\\');
escaped = false;
} else if (c == '\\') {
if (incommand){
/* Close Command */
String command = currentCommand.toString();
Object result = Globals.HTMLCHARS.get(command);
if (result != null) {
sb.append((String) result);
} else {
sb.append(command);
}
}
escaped = true;
incommand = true;
currentCommand = new StringBuffer();
} else if (!incommand && (c == '{' || c == '}')) {
// Swallow the brace.
} else if (Character.isLetter(c) || (c == '%')
|| (Globals.SPECIAL_COMMAND_CHARS.indexOf(String.valueOf(c)) >= 0)) {
escaped = false;
if (!incommand)
sb.append(c);
// Else we are in a command, and should not keep the letter.
else {
currentCommand.append(c);
testCharCom: if ((currentCommand.length() == 1)
&& (Globals.SPECIAL_COMMAND_CHARS.indexOf(currentCommand.toString()) >= 0)) {
// This indicates that we are in a command of the type
// \^o or \~{n}
if (i >= field.length() - 1)
break testCharCom;
String command = currentCommand.toString();
i++;
c = field.charAt(i);
// System.out.println("next: "+(char)c);
String combody;
if (c == '{') {
IntAndString part = getPart(field, i, false);
i += part.i;
combody = part.s;
} else {
combody = field.substring(i, i + 1);
// System.out.println("... "+combody);
}
Object result = Globals.HTMLCHARS.get(command + combody);
if (result != null)
sb.append((String) result);
incommand = false;
escaped = false;
} else {
// Are we already at the end of the string?
if (i + 1 == field.length()){
String command = currentCommand.toString();
Object result = Globals.HTMLCHARS.get(command);
/* If found, then use translated version. If not,
* then keep
* the text of the parameter intact.
*/
if (result != null) {
sb.append((String) result);
} else {
sb.append(command);
}
}
}
}
} else {
String argument = null;
if (!incommand) {
sb.append(c);
} else if (Character.isWhitespace(c) || (c == '{') || (c == '}')) {
// First test if we are already at the end of the string.
// if (i >= field.length()-1)
// break testContent;
String command = currentCommand.toString();
// Then test if we are dealing with a italics or bold
// command.
// If so, handle.
if (command.equals("em") || command.equals("emph") || command.equals("textit")) {
IntAndString part = getPart(field, i, true);
i += part.i;
sb.append("<em>").append(part.s).append("</em>");
} else if (command.equals("textbf")) {
IntAndString part = getPart(field, i, true);
i += part.i;
sb.append("<b>").append(part.s).append("</b>");
} else if (c == '{') {
IntAndString part = getPart(field, i, true);
i += part.i;
argument = part.s;
if (argument != null) {
// handle common case of general latex command
Object result = Globals.HTMLCHARS.get(command + argument);
// System.out.print("command: "+command+", arg: "+argument);
// System.out.print(", result: ");
// If found, then use translated version. If not, then keep
// the
// text of the parameter intact.
if (result != null) {
sb.append((String) result);
} else {
sb.append(argument);
}
}
} else if (c == '}') {
// This end brace terminates a command. This can be the case in
// constructs like {\aa}. The correct behaviour should be to
// substitute the evaluated command and swallow the brace:
Object result = Globals.HTMLCHARS.get(command);
if (result != null) {
sb.append((String) result);
} else {
// If the command is unknown, just print it:
sb.append(command);
}
} else {
Object result = Globals.HTMLCHARS.get(command);
if (result != null) {
sb.append((String) result);
} else {
sb.append(command);
}
sb.append(' ');
}
}/* else if (c == '}') {
System.out.printf("com term by }: '%s'\n", currentCommand.toString());
argument = "";
}*/ else {
/*
* TODO: this point is reached, apparently, if a command is
* terminated in a strange way, such as with "$\omega$".
* Also, the command "\&" causes us to get here. The former
* issue is maybe a little difficult to address, since it
* involves the LaTeX math mode. We don't have a complete
* LaTeX parser, so maybe it's better to ignore these
* commands?
*/
}
incommand = false;
escaped = false;
}
}
return sb.toString();
}
private IntAndString getPart(String text, int i, boolean terminateOnEndBraceOnly) {
char c;
int count = 0;
StringBuffer part = new StringBuffer();
// advance to first char and skip wihitespace
i++;
while (i < text.length() && Character.isWhitespace(text.charAt(i))){
i++;
}
// then grab whathever is the first token (counting braces)
while (i < text.length()){
c = text.charAt(i);
if (!terminateOnEndBraceOnly && count == 0 && Character.isWhitespace(c)) {
i--; // end argument and leave whitespace for further
// processing
break;
}
if (c == '}' && --count < 0)
break;
else if (c == '{')
count++;
part.append(c);
i++;
}
return new IntAndString(part.length(), format(part.toString()));
}
private class IntAndString {
public int i;
String s;
public IntAndString(int i, String s) {
this.i = i;
this.s = s;
}
}
}