package org.karmaexchange.util;
import org.apache.commons.lang.StringEscapeUtils;
import org.jsoup.Jsoup;
import org.jsoup.helper.StringUtil;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Node;
import org.jsoup.nodes.TextNode;
import org.jsoup.select.NodeTraversor;
import org.jsoup.select.NodeVisitor;
/**
* HTML utility functions.
*
* @author Jonathan Hedley, jonathan@hedley.net
* @author Amir Valiani
*/
public class HtmlUtil {
/**
* Convert a plain-text string to an HTML string.
*
* @param palinText a string containing plain text characters
* @return formatted text
*/
public static String toHtml(String plainText) {
// Escape any html chars.
String htmlCharsEscaped = StringEscapeUtils.escapeHtml(plainText);
// Preserve newlines.
return htmlCharsEscaped.replaceAll("\n", "<br>");
}
/**
* Convert an HTML string to a plain-text string.
*
* @param htmlStr a string containing HTML markup
* @return formatted text
*/
public static String toPlainText(String htmlStr) {
Document doc = Jsoup.parse(htmlStr);
PlainTextFormattingVisitor formatter = new PlainTextFormattingVisitor();
NodeTraversor traversor = new NodeTraversor(formatter);
traversor.traverse(doc); // walk the DOM, and call .head() and .tail() for each node
return formatter.toString();
}
// the formatting rules, implemented in a breadth-first DOM traverse
private static class PlainTextFormattingVisitor implements NodeVisitor {
private StringBuilder accum = new StringBuilder(); // holds the accumulated text
// hit when the node is first seen
public void head(Node node, int depth) {
String name = node.nodeName();
if (node instanceof TextNode)
append(((TextNode) node).text()); // TextNodes carry all user-readable text in the DOM.
else if (name.equals("li")) append("\n * ");
}
// hit when all of the node's children (if any) have been visited
public void tail(Node node, int depth) {
String name = node.nodeName();
if (name.equals("br"))
append("\n");
else if (StringUtil.in(name, "p", "h1", "h2", "h3", "h4", "h5"))
append("\n");
}
// appends text to the string builder with a simple word wrap method
private void append(String text) {
if (text.equals(" ") &&
( (accum.length() == 0) ||
StringUtil.in(accum.substring(accum.length() - 1), " ", "\n")))
return; // don't accumulate long runs of empty spaces
accum.append(text);
}
public String toString() {
return accum.toString();
}
}
}