package com.trydone.core.util;
import net.jforum.exceptions.ForumException;
import net.jforum.view.forum.common.ViewCommon;
import org.htmlparser.Node;
import org.htmlparser.lexer.Lexer;
import java.util.MissingResourceException;
import java.util.ResourceBundle;
import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;
public class HtmlUtils {
private static final String BUNDLE_NAME = "com.trydone.core.util.HtmlUtils";
private static ResourceBundle resourceBundle = null;
private static Pattern entityPattern = null;
public static String getText(String html, int size) {
if (html == null || html.trim().length() == 0) return "";
html = new HtmlToText().getTextFromHtml(html);//textFromHTML(html);//getText(html);
return html.substring(0, html.length() > size ? size : html.length());
}
public static String getText(String contents) {
if (contents == null || contents.trim().length() == 0) {
return contents;
}
try {
StringBuffer sb = new StringBuffer(512);
Lexer lexer = new Lexer(contents);
Node node;
while ((node = lexer.nextNode()) != null) {
String ps = node.toPlainTextString();
sb.append(ps);
}
contents = sb.toString();
ViewCommon.replaceAll(contents, "<br/>", "");
ViewCommon.replaceAll(contents, "<p/>", "");
ViewCommon.replaceAll(contents, "<p>", "");
ViewCommon.replaceAll(contents, "</p>", "");
ViewCommon.replaceAll(contents, "</cite>", "");
ViewCommon.replaceAll(contents, "<cite>", "");
} catch (Exception e) {
throw new ForumException("Problems while parsing HTML: " + e, e);
}
return contents;
}
public static String textFromHTML(String s) {
if (s == null) return null;
StringBuffer sb = new StringBuffer();
//�ѿհף����У��س���TABȥ��,
for (int i = 0; i < s.length(); i++) {
char tmpC = s.charAt(i);
if (tmpC != ' ' && tmpC != '\n' && tmpC != '\r' && tmpC != '\t') {
sb.append(s.charAt(i));
}
}
return convertCharacterEntities(stripHTMLTags(sb.toString()));
}
public static String convertCharacterEntities(String s) {
// The resource bundle contains the mappings for symbolic entity
// names like "amp". Note: Must protect matching and MatchResult in
// a critical section, for thread-safety. See javadocs for
// Perl5Util.
synchronized (HtmlUtils.class) {
try {
if (entityPattern == null)
entityPattern = Pattern.compile("&(#?[^; \t]+);");
}
catch (PatternSyntaxException ex) {
// Should not happen unless I've screwed up the pattern.
// Throw a runtime error.
assert (false);
}
}
ResourceBundle bundle = getResourceBundle();
//Properties bundle = HtmlUtils.class.getResourceAsStream(BUNDLE_NAME);
StringBuffer buf = new StringBuffer();
Matcher matcher;
synchronized (HtmlUtils.class) {
matcher = entityPattern.matcher(s);
}
for (; ;) {
String match;
String preMatch;
String postMatch;
if (!matcher.find())
break;
match = matcher.group(1);
preMatch = s.substring(0, matcher.start(1) - 1);
postMatch = s.substring(matcher.end(1) + 1);
if (preMatch != null)
buf.append(preMatch);
if (match.charAt(0) == '#') {
if (match.length() == 1)
buf.append('#');
else {
// It might be a numeric entity code. Try to parse it
// as a number. If the parse fails, just put the whole
// string in the result, as is.
try {
int cc = Integer.parseInt(match.substring(1));
// It parsed. Is it a valid Unicode character?
if (Character.isDefined((char) cc))
buf.append((char) cc);
else {
buf.append("");
buf.append(match);
buf.append(";");
}
}
catch (NumberFormatException ex) {
buf.append("");
buf.append(match);
buf.append(";");
}
}
} else {
// Not a numeric entity. Try to find a matching symbolic
// entity.
try {
String rep = bundle.getString("html_" + match);
buf.append(rep);
}
catch (MissingResourceException ex) {
buf.append("&");
buf.append(match);
buf.append(";");
}
}
if (postMatch == null)
break;
s = postMatch;
matcher.reset(s);
}
if (s.length() > 0)
buf.append(s);
return buf.toString();
}
public static String stripHTMLTags(String s) {
char[] ch = s.toCharArray();
boolean inElement = false;
StringBuffer buf = new StringBuffer();
for (int i = 0; i < ch.length; i++) {
switch (ch[i]) {
case'<':
inElement = true;
break;
case'>':
if (inElement)
inElement = false;
else
buf.append(ch[i]);
break;
default:
if (!inElement)
buf.append(ch[i]);
break;
}
}
return buf.toString();
}
private static ResourceBundle getResourceBundle() {
synchronized (HtmlUtils.class) {
if (resourceBundle == null)
resourceBundle = ResourceBundle.getBundle(BUNDLE_NAME);
}
return resourceBundle;
}
}