package org.oelerich.BBCodeParser; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This class parses some bbcode formatted text and replaces the bbcode * according to the BBCodeTag objects, that must be registered to the parser. * * It handles malformed input text as well as tag arguments. */ public class BBCodeParser { // the skeleton regex for the bbcodes. %1$s must be replaced by // the allowed bbcodes private String mRegexSkeleton = "(.*?)((\\[\\s*(%1$s)\\s*(=((\\s*((\"[^\"]+?\")|" + "([^,\\]]+?))\\s*,)*(\\s*((\"[^\"]+?\")|([^,\\]]+?))\\s*)))?\\])|" + "(\\[/\\s*((%1$s))\\s*\\]))"; // this map holds our registered tags private Map<String, BBCodeTag> mTags = new HashMap<String, BBCodeTag>(); // register a new tag public void registerTag(BBCodeTag tag) { mTags.put(tag.mTag, tag); } // generate the regex by joining their names and compile the pattern private Pattern generatePattern() { String tags = ""; for (Map.Entry<String, BBCodeTag> entry : mTags.entrySet()) tags += Pattern.quote(entry.getKey()) + "|"; tags = tags.substring(0, tags.length() - 1); Pattern pattern = Pattern.compile(String.format(mRegexSkeleton, tags), Pattern.CASE_INSENSITIVE | Pattern.DOTALL | Pattern.MULTILINE); return pattern; } // parse the input public String parse(String input) throws UnknownErrorException { Integer lastMatched = 0; List<Token> tokens = new ArrayList<Token>(); input = input.replace("<", "<"); input = input.replace(">", ">"); input = input.replace("\n", "<br/>\n"); Pattern pattern = generatePattern(); Matcher matcher = pattern.matcher(input); Node root = new Node(); Node current = root; // tokenize // matcher.group(0) contains all // matcher.group(1) contains the prefixing string // matcher.group(2) contains the raw bbcode tag (like [b]) // matcher.group(4) contains the start bbcode tag (like b) // matcher.group(7) contains the arguments // matcher.group(16) contains the end tag (like b) while (matcher.find()) { // the node is prefixed by some text if (matcher.group(1).length() > 0) { Token t = new Token(Token.TYPE_STRING, matcher.group(1)); tokens.add(t); } if (matcher.group(2).indexOf("[/") > -1) { if (matcher.group(16) == null) System.out.println(matcher.group(2)); Token t = new Token(Token.TYPE_CLOSE, matcher.group(16) .toLowerCase(Locale.US), matcher.group(2)); tokens.add(t); } else { String[] args = {}; if (matcher.group(6) != null && matcher.group(6).length() > 0) args = new String[] { matcher.group(6).replace("\"", "") }; Token t = new Token(Token.TYPE_OPEN, matcher.group(4) .toLowerCase(Locale.US), matcher.group(2), args); tokens.add(t); } lastMatched = matcher.end(); } if (input.substring(lastMatched).length() > 0) { Token t = new Token(Token.TYPE_STRING, input.substring(lastMatched)); tokens.add(t); } // parse while (tokens.size() > 0) { Token t; if (tokens.get(0).mType == Token.TYPE_STRING) { try { current = add_string(current, tokens.get(0).mText); } catch (InvalidTokenException e) { switch (current.mTag.mInvalidStringRecovery) { case BBCodeTag.RECOVERY_ADD: // we add a new token to the list t = new Token(Token.TYPE_OPEN, current.mTag.mInvalidRecoveryAddTag); tokens.add(0, t); break; default: throw new UnknownErrorException(); } continue; } } if (tokens.get(0).mType == Token.TYPE_OPEN) { // we add a new opening tag try { current = add_start(current, tokens.get(0).mTag, tokens.get(0).mArgs, tokens.get(0).mText); } catch (InvalidTokenException e) { int recovery; // this tag is not allowed anywhere in this branch, // so we make it a string. if (!is_allowed_anywhere(current, tokens.get(0).mTag)) recovery = BBCodeTag.RECOVERY_STRING; else recovery = current.mTag.mInvalidStartRecovery; // this hack is needed to prevent an infinite loop // in certain cases. if (current.mTag.mTag.equals(tokens.get(0).mTag) && current.mTag.mInvalidStartRecovery == BBCodeTag.RECOVERY_ADD) recovery = BBCodeTag.RECOVERY_CLOSE; // policy switch (recovery) { case BBCodeTag.RECOVERY_ADD: t = new Token(Token.TYPE_OPEN, current.mTag.mInvalidRecoveryAddTag); tokens.add(0, t); break; case BBCodeTag.RECOVERY_CLOSE: t = new Token(Token.TYPE_CLOSE, current.mTag.mTag); tokens.add(0, t); break; case BBCodeTag.RECOVERY_STRING: tokens.get(0).mType = Token.TYPE_STRING; break; default: throw new UnknownErrorException(); } continue; } catch (InvalidParameterCountException e) { tokens.get(0).mType = Token.TYPE_STRING; continue; } } if (tokens.get(0).mType == Token.TYPE_CLOSE) { // new closing tag try { current = add_end(current, tokens.get(0).mTag, tokens.get(0).mText); } catch (InvalidTokenException e) { int recovery; if (!is_open(current, tokens.get(0).mTag)) recovery = BBCodeTag.RECOVERY_STRING; else recovery = current.mTag.mInvalidEndRecovery; switch (recovery) { case BBCodeTag.RECOVERY_REOPEN: t = new Token(Token.TYPE_CLOSE, current.mTag.mTag); tokens.add(0, t); t = new Token(Token.TYPE_OPEN, current.mTag.mTag); tokens.add(2, t); break; case BBCodeTag.RECOVERY_CLOSE: t = new Token(Token.TYPE_CLOSE, current.mTag.mTag); tokens.add(0, t); break; case BBCodeTag.RECOVERY_STRING: tokens.get(0).mType = Token.TYPE_STRING; break; default: throw new UnknownErrorException(); } continue; } } tokens.remove(0); } // build the string and return return root.toString(); } public Node add_string(Node current, String str) throws InvalidTokenException { if (!is_root(current) && !current.mTag.mAllowedTags.contains("string")) throw new InvalidTokenException(); Node new_node = new Node(str); new_node.mParent = current; current.mChildren.add(new_node); return current; } public Node add_end(Node current, String tagStr, String raw) throws InvalidTokenException { if (is_root(current) || !tagStr.equals(current.mTag.mTag)) throw new InvalidTokenException(); return current.close(raw); } public Node close(Node current) { return current.close(); } public Node add_start(Node current, String tag) { Node new_node = new Node(mTags.get(tag), null, "[" + tag + "]"); new_node.mParent = current; current.mChildren.add(new_node); return new_node; } public Node add_start(Node current, String tagStr, String[] args, String raw) throws InvalidTokenException, InvalidParameterCountException { // create the node BBCodeTag tag = mTags.get(tagStr); Node new_node = new Node(tag, args, raw); // check if bbcode is allowed here if (!is_root(current) && !current.mTag.mAllowedTags.contains(tagStr)) throw new InvalidTokenException(); // check the parameter count if (!tag.mHtml.containsKey(args.length)) throw new InvalidParameterCountException(); new_node.mParent = current; current.mChildren.add(new_node); return new_node; } public Boolean is_open(Node current, String tag) { if (is_root(current)) { return false; } else { if (current.mTag.mTag.equals(tag)) return true; else return is_open(current.mParent, tag); } } public Boolean is_allowed_anywhere(Node current, String tag) { if (is_root(current)) { return false; } else { if (current.mTag.mAllowedTags.contains(tag)) return true; else return is_allowed_anywhere(current.mParent, tag); } } public Boolean is_root(Node current) { return current.mTag == null && current.mText == null; } /** * This class describes one bbcode tag. For each allowed tag it must be * instanciated and its members filled. The tag is then registered to the * parser via the registerTag() function. */ public static class BBCodeTag { public static final int RECOVERY_NONE = 0; public static final int RECOVERY_STRING = 1; public static final int RECOVERY_CLOSE = 2; public static final int RECOVERY_REOPEN = 3; public int mInvalidEndRecovery = BBCodeTag.RECOVERY_STRING; public static final int RECOVERY_ADD = 4; public int mInvalidStringRecovery = BBCodeTag.RECOVERY_NONE; public int mInvalidStartRecovery = BBCodeTag.RECOVERY_STRING; public String mInvalidRecoveryAddTag = ""; public String mTag; public String mDescription = ""; public List<String> mAllowedTags = new ArrayList<String>(); public HashMap<Integer, String> mHtml = new HashMap<Integer, String>(); public String mText = ""; public BBCodeTag allow(String tags) { String[] t = {}; t = tags.split(","); for (String tag : t) { mAllowedTags.add(tag.replace(" ", "")); } return this; } public void html(String htmlcode) { mHtml.put(0, htmlcode); } public void html(Integer nArgs, String htmlcode) { mHtml.put(nArgs, htmlcode); } // for subclasses public String replaceArgument(int id, String what) { return what; } } public static class Token { public static final int TYPE_STRING = 0; public static final int TYPE_OPEN = 1; public static final int TYPE_CLOSE = 2; public int mType; public String mText; public String mTag; public String[] mArgs = {}; public Token(int type, String text) { mType = type; if (mType == Token.TYPE_STRING) { mText = text; } else { mTag = text; if (mType == Token.TYPE_OPEN) { mText = "[" + mTag + "]"; } if (mType == Token.TYPE_CLOSE) { mText = "[/" + mTag + "]"; } } } public Token(int type, String tag, String text) { mType = type; mText = text; mTag = tag; } public Token(int type, String tag, String text, String[] args) { mType = type; mText = text; mTag = tag; mArgs = args; } } /** * This is one Node in our lexigraphical tree. */ private class Node { public List<Node> mChildren = new ArrayList<Node>(); public Node mParent; public BBCodeTag mTag = null; public String mText = null; public String[] mArgs; public String mRawStart = null; public String mRawEnd = null; public Boolean mInvalid = false; // initializer for the root element public Node() { }; // initializer for a new bbcode node public Node(BBCodeTag type, String[] args, String raw) { mTag = type; mArgs = args; mRawStart = raw; } // initializer for a String node. public Node(String text) { mText = text; } public Node close(String raw) { mRawEnd = raw; return mParent; } public Node close() { mRawEnd = "[/" + mTag.mTag + "]"; return mParent; } // the creation of the html string public String toString() { // is this a string? if (mText != null) return mText; // build the result string by concatenating all children String res = ""; for (Node n : mChildren) res = res + n.toString(); // this is just for the root element. if (mTag == null) return res; // return empty tags if (res == "") return ""; // invalid? if (mInvalid) return String.format("%s" + res + "%s", mRawStart, mRawEnd); // replace the arguments if there are some int num_args = 0; if (mArgs != null) num_args = mArgs.length; String html = mTag.mHtml.get(num_args).replace("{0}", mTag.replaceArgument(0, res)); if (num_args > 0) for (int i = 0; i < num_args; i++) html = html.replace("{" + (i + 1) + "}", mTag.replaceArgument(i + 1, mArgs[i])); return html; } } public class InvalidTokenException extends Exception { private static final long serialVersionUID = 42L; public InvalidTokenException() { super("Invalid token"); } } public class InvalidParameterCountException extends Exception { private static final long serialVersionUID = 44L; public InvalidParameterCountException() { super("Invalid parameter count"); } } public class UnknownErrorException extends Exception { private static final long serialVersionUID = 43L; public UnknownErrorException() { super("Unknown error"); } } }