/**
* Copyright (c) 2003-2005 Fernando Dobladez
*
* This file is part of AntDoclet.
*
* AntDoclet is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* AntDoclet is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with AntDoclet; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*
*/
package com.neuroning.antdoclet.latex;
import java.awt.*;
import javax.swing.text.*;
import javax.swing.text.html.*;
import javax.swing.text.html.parser.*;
import java.util.*;
import java.io.*;
/**
* This class implements a simple HTML to LaTeX translator.
* It's very ugly to my taste... but it works good enough for now.
* It should be replaced with new code (may be limit it to xhtml, and use any
* XML parser to implement it).
*
* It's a modified version of source code I took from Soren Caspersen, who in
* turn took it from Gregg Wonderly (http://texdoclet.dev.java.net/)
*
* It's implemented using the HTML parser that is part of Swing.
*
* Fernando Dobladez <dobladez@gmail.com>
*
*/
public class HTML2Latex extends HTMLEditorKit.ParserCallback {
/**
* Buffer containing the translated HTML.
*/
StringBuffer ret;
Stack tblstk = new Stack();
TableInfo tblinfo;
int verbat = 0;
int colIdx = 0;
Hashtable colors = new Hashtable(10);
String block = "";
String refurl = null;
String doNotPrintURL = null;
String refname = null;
String refimg = null;
boolean notex = false;
int imageindex = 0;
boolean _hyperref = true;
boolean escape = true;
/**
* Constructs a new instance.
*
* @param StringBuffer
* The <CODE>StringBuffer</CODE> where the translated HTML is
* appended.
*/
public HTML2Latex(StringBuffer ret) {
this.ret = ret;
}
public HTML2Latex() {
}
/**
* This method handles simple HTML tags (eg. <CODE><HR></CODE>-tags).
* It is called by the parser whenever such a tag is encountered.
*/
public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attrSet,
int pos) {
if (tag.toString().equalsIgnoreCase("tex")) {
if (attrSet.containsAttribute(HTML.Attribute.ENDTAG, "true")) {
notex = false;
} else {
String tex = (String) attrSet.getAttribute("txt");
ret.append(tex);
notex = true;
}
} else if (notex) {
return;
} else if (tag == HTML.Tag.META) {
} else if (tag == HTML.Tag.HR) {
// String sz = (String) attrSet.getAttribute(HTML.Attribute.SIZE);
// int size = 1;
// if (sz != null) size = Integer.parseInt(sz);
// ret.append("\\mbox{}\\newline\\rule[2mm]{\\hsize}{"+(1*size*.5)+"mm}\\newline\n");
// FERD. Using hsize is wrong, since the rule may not start on
// the very left, in which case \hsize would span over
// the right margin.
ret.append("\\hspace*{3cm}\\hrulefill\\hspace*{3cm}\\newline\n\n"); // FERD
} else if (tag == HTML.Tag.BR) {
ret.append("\\mbox{}\\newline ");
}
}
/**
* This method handles HTML tags that mark a beginning (eg. <CODE><P></CODE>-tags).
* It is called by the parser whenever such a tag is encountered.
*/
public void handleStartTag(HTML.Tag tag, MutableAttributeSet attrSet,
int pos) {
if (notex) return;
if (tag == HTML.Tag.PRE) {
// ret.append( "{\\tt\\small\n\\mbox{}\\newline ");
// verbat++;
ret.append("\n\\begin{lstlisting}\n");
escape = false;
} else if (tag == HTML.Tag.H1) {
ret.append("\\chapter*{");
} else if (tag == HTML.Tag.H2) {
ret.append("\\section*{");
} else if (tag == HTML.Tag.H3) {
ret.append("\\subsection*{");
} else if (tag == HTML.Tag.H4) {
ret.append("\\subsubsection*{");
} else if (tag == HTML.Tag.H5) {
ret.append("\\subsubsection*{");
} else if (tag == HTML.Tag.H6) {
ret.append("\\subsubsection*{");
} else if (tag == HTML.Tag.SUB) {
ret.append("$_{");
} else if (tag == HTML.Tag.SUP) {
ret.append("$^{");
// } else if (tag == HTML.Tag.HTML) {
} else if (tag == HTML.Tag.HEAD) {
} else if (tag == HTML.Tag.CENTER) {
ret.append("\\makebox[\\hsize]{ ");
} else if (tag == HTML.Tag.TITLE) {
ret.append("\\chapter{");
} else if (tag == HTML.Tag.FORM) {
} else if (tag == HTML.Tag.INPUT) {
} else if (tag == HTML.Tag.BODY) {
} else if (tag == HTML.Tag.CODE) {
ret.append( "{\\tt\\small " );
// ret.append("\\api{"); // ferd
} else if (tag == HTML.Tag.TT) {
ret.append("{\\tt ");
} else if (tag == HTML.Tag.P) {
ret.append("\n\n");
} else if (tag == HTML.Tag.B) {
ret.append("{\\bf ");
} else if (tag == HTML.Tag.STRONG) {
ret.append("{\\bf ");
} else if (tag == HTML.Tag.A) {
refurl = (String) attrSet.getAttribute(HTML.Attribute.HREF);
doNotPrintURL = (String) attrSet.getAttribute("donotprinturl");
if (refurl != null) {
if (_hyperref) {
/*
* if (refurl.toLowerCase().startsWith("doc-files")) { File
* file = new File(TexDoclet.packageDir, refurl); if
* (file.exists()) { if
* (TexDoclet.appendencies.contains(file.getPath())) {
* refurl = (String)
* TexDoclet.appendencies.get(file.getPath()); } else {
* refurl = "appendix" + new
* Integer(TexDoclet.appendencies.size()+1);
* TexDoclet.appendencies.put(file.getPath(), refurl); }
* ret.append("\\hyperref{}{" + refurl + "}{}{"); return; } }
*/
String sharp = "";
if (refurl.indexOf("#") >= 0) {
sharp = refurl.substring(refurl.indexOf("#") + 1,
refurl.length());
if (sharp.indexOf("%") >= 0) sharp = ""; // Don't
// know
// what
// to
// do
// with
// '%'
refurl = refurl.substring(0, refurl.indexOf("#"));
}
ret.append("\\hyperref{" + refurl + "}{" + sharp + "}{}{");
// ret.append("\\href{" + refurl + "}{");
} else
ret.append("{\\bf ");
} else {
refname = (String) attrSet.getAttribute(HTML.Attribute.NAME);
if (refname != null && _hyperref) {
ret.append("\\hyperdef{" + refname + "}{");
}
}
} else if (tag == HTML.Tag.OL) {
ret.append("\n\\begin{enumerate}");
} else if (tag == HTML.Tag.DL) {
ret.append("\n\\begin{itemize}");
} else if (tag == HTML.Tag.LI) {
ret.append("\n\\item{\\vskip -.8ex ");
} else if (tag == HTML.Tag.DT) {
ret.append("\\item[");
} else if (tag == HTML.Tag.DD) {
ret.append("{");
} else if (tag == HTML.Tag.UL) {
ret.append("\\begin{itemize}");
} else if (tag == HTML.Tag.I) {
ret.append("{\\it ");
} else if (tag == HTML.Tag.TABLE) {
tblstk.push(tblinfo);
tblinfo = new TableInfo();
ret = tblinfo.startTable(ret, attrSet);
} else if (tag == HTML.Tag.TH) {
tblinfo.startHeadCol(attrSet);
} else if (tag == HTML.Tag.TD) {
tblinfo.startCol(attrSet);
} else if (tag == HTML.Tag.TR) {
tblinfo.startRow(attrSet);
} else if (tag == HTML.Tag.FONT) {
//String sz = (String) attrSet.getAttribute(HTML.Attribute.SIZE);
String col = (String) attrSet.getAttribute(HTML.Attribute.COLOR);
ret.append("{");
if (col != null) {
if ("redgreenbluewhiteyellowblackcyanmagenta".indexOf(col) != -1)
ret.append("\\color{" + col + "}");
else {
if ("abcdefABCDEF0123456789".indexOf(col.charAt(0)) != -1) {
Color cc = new Color((int) Long.parseLong(col, 16));
String name = (String) colors
.get("color" + cc.getRGB());
if (name == null) {
ret.append("\\definecolor{color" + colIdx
+ "}[rgb]{" + (cc.getRed() / 255.0)
+ "," + (cc.getBlue() / 255.0) + ","
+ (cc.getGreen() / 255.0) + "}");
name = "color" + colIdx;
colIdx++;
colors.put("color" + cc.getRGB(), name);
}
ret.append("\\color{" + name + "}");
++colIdx;
}
}
}
}
}
/**
* This method handles HTML tags that mark an ending (eg. <CODE></P></CODE>-tags).
* It is called by the parser whenever such a tag is encountered.
*/
public void handleEndTag(HTML.Tag tag, int pos) {
if (notex) {
return;
} else if (tag == HTML.Tag.PRE) {
// verbat--;
// ret.append( "}\n" );
ret.append("\n\\end{lstlisting}\n");
escape = true;
} else if (tag == HTML.Tag.H1) {
ret.append("}");
} else if (tag == HTML.Tag.H2) {
ret.append("}");
} else if (tag == HTML.Tag.H3) {
ret.append("}");
} else if (tag == HTML.Tag.H4) {
ret.append("}");
} else if (tag == HTML.Tag.H5) {
ret.append("}");
} else if (tag == HTML.Tag.H6) {
ret.append("}");
} else if (tag == HTML.Tag.SUB) {
ret.append("}$");
} else if (tag == HTML.Tag.SUP) {
ret.append("}$");
// } else if (tag == HTML.Tag.HTML) {
} else if (tag == HTML.Tag.HEAD) {
} else if (tag == HTML.Tag.CENTER) {
ret.append("}");
} else if (tag == HTML.Tag.TITLE) {
ret.append("}{");
} else if (tag == HTML.Tag.FORM) {
} else if (tag == HTML.Tag.INPUT) {
} else if (tag == HTML.Tag.BODY) {
} else if (tag == HTML.Tag.CODE) {
ret.append("}");
} else if (tag == HTML.Tag.TT) {
ret.append("}");
} else if (tag == HTML.Tag.P) {
ret.append("\n\n");
} else if (tag == HTML.Tag.B) {
ret.append("}");
} else if (tag == HTML.Tag.STRONG) {
ret.append("}");
} else if (tag == HTML.Tag.A) {
if (refurl != null) {
ret.append("} ");
if (doNotPrintURL == null) {
if (!refurl.equals("")) {
ret.append("(at ");
ret.append(fixText(refurl));
ret.append(")");
}
}
} else if (refname != null) {
ret.append("}");
}
} else if (tag == HTML.Tag.LI) {
ret.append("}");
} else if (tag == HTML.Tag.DT) {
ret.append("]");
} else if (tag == HTML.Tag.DD) {
ret.append("}");
} else if (tag == HTML.Tag.DL) {// /
ret.append("\n\\end{itemize}\n");
} else if (tag == HTML.Tag.OL) {
ret.append("\n\\end{enumerate}\n");
} else if (tag == HTML.Tag.UL) {
ret.append("\n\\end{itemize}\n");
} else if (tag == HTML.Tag.I) {
ret.append("}");
} else if (tag == HTML.Tag.TABLE) {
ret = tblinfo.endTable();
tblinfo = (TableInfo) tblstk.pop();
} else if (tag == HTML.Tag.TH) {
tblinfo.endCol();
} else if (tag == HTML.Tag.TD) {
tblinfo.endCol();
} else if (tag == HTML.Tag.TR) {
tblinfo.endRow();
} else if (tag == HTML.Tag.FONT) {
ret.append("}");
}
}
/**
* This method handles all other text.
*/
public void handleText(char[] data, int pos) {
String str = new String(data);
for (int i = 0; i < str.length(); ++i) {
int c = str.charAt(i);
if (notex) continue;
if (!escape) {
ret.append((char) c);
continue;
}
switch (c) {
case 160: //
ret.append("\\phantom{ }");
break;
case ' ':
if (verbat > 0) {
ret.append("\\phantom{ }");
} else {
ret.append(' ');
}
break;
case '[':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\lbrack\\ ");
i++;
} else {
ret.append("\\lbrack ");
}
break;
case ']':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\rbrack\\ ");
i++;
} else {
ret.append("\\rbrack ");
}
break;
case '_':
case '%':
case '$':
case '#':
case '}':
case '{':
case '&':
ret.append('\\');
ret.append((char) c);
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\ ");
i++;
}
break;
case 'æ':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\ae\\ ");
i++;
} else {
ret.append("\\ae ");
}
break;
case 'Æ':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\AE\\ ");
i++;
} else {
ret.append("\\AE ");
}
break;
case 'å':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\aa\\ ");
i++;
} else {
ret.append("\\aa ");
}
break;
case 'Å':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\AA\\ ");
i++;
} else {
ret.append("\\AA ");
}
break;
case 'ø':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\o\\ ");
i++;
} else {
ret.append("\\o ");
}
break;
case 'Ø':
if (i < str.length() - 1 && str.charAt(i + 1) == ' ') {
ret.append("\\O\\ ");
i++;
} else {
ret.append("\\O ");
}
break;
case '^':
ret.append("$\\wedge$");
break;
case '<':
ret.append("\\textless ");
break;
case '\r':
case '\n':
if (tblstk.size() > 0) {
// Swallow new lines while tables are in progress,
// <tr> controls new line emission.
if (verbat > 0) {
ret.append("}\\mbox{}\\newline\n{\\tt\\small ");
} else
ret.append(" ");
} else {
if (verbat > 0)
ret.append("}\\mbox{}\\newline\n{\\tt\\small ");
else if ((i + 1) < str.length() && str.charAt(i + 1) == 10) {
ret.append("\\bl ");
++i;
} else
ret.append((char) c);
}
break;
case '/':
ret.append("/");
break;
case '>':
ret.append("\\textgreater ");
break;
case '\\':
ret.append("\\textbackslash ");
break;
default:
ret.append((char) c);
break;
}
}
}
/**
* Converts a HTML string into <TEX txt="\LaTeX{}">LaTeX</TEX> using an
* instance of <CODE>HTML2Latex</CODE>.
*
* @returns The converted string.
*/
public static String fixText(String str) {
StringBuffer result = new StringBuffer(str.length());
HTML2Latex b = new HTML2Latex(result);
Reader reader = new StringReader(str);
try {
new ParserDelegator().parse(reader, b, false);
} catch (IOException e) {
}
return new String(result);
}
}