package org.mule.devkit.doclet.markdown; import java.util.ArrayList; import java.util.Collection; import java.util.List; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * Mutable String with common operations used in Markdown processing. */ public class TextEditor { private StringBuffer text; /** * Create a new TextEditor based on the contents of a String or * StringBuffer. * * @param text */ public TextEditor(CharSequence text) { this.text = new StringBuffer(text.toString()); } /** * Give up the contents of the TextEditor. * @return */ @Override public String toString() { return text.toString(); } /** * Replace all occurrences of the regular expression with the replacement. The replacement string * can contain $1, $2 etc. referring to matched groups in the regular expression. * * @param regex * @param replacement * @return */ public TextEditor replaceAll(String regex, String replacement) { if (text.length() > 0) { final String r = replacement; Pattern p = Pattern.compile(regex, Pattern.MULTILINE); Matcher m = p.matcher(text); StringBuffer sb = new StringBuffer(); while (m.find()) { m.appendReplacement(sb, r); } m.appendTail(sb); text = sb; } return this; } /** * Same as replaceAll(String, String), but does not interpret * $1, $2 etc. in the replacement string. * @param regex * @param replacement * @return */ public TextEditor replaceAllLiteral(String regex, final String replacement) { return replaceAll(Pattern.compile(regex, Pattern.MULTILINE), new Replacement() { public String replacement(@SuppressWarnings("unused") Matcher m) { return replacement; } }); } /** * Replace all occurrences of the Pattern. The Replacement object's replace() method is * called on each match, and it provides a replacement, which is placed literally * (i.e., without interpreting $1, $2 etc.) * * @param pattern * @param replacement * @return */ public TextEditor replaceAll(Pattern pattern, Replacement replacement) { Matcher m = pattern.matcher(text); int lastIndex = 0; StringBuffer sb = new StringBuffer(); while (m.find()) { sb.append(text.subSequence(lastIndex, m.start())); sb.append(replacement.replacement(m)); lastIndex = m.end(); } sb.append(text.subSequence(lastIndex, text.length())); text = sb; return this; } /** * Remove all occurrences of the given regex pattern, replacing them * with the empty string. * * @param pattern Regular expression * @return * @see java.util.regex.Pattern */ public TextEditor deleteAll(String pattern) { return replaceAll(pattern, ""); } /** * Convert tabs to spaces given the default tab width of 4 spaces. * @return */ public TextEditor detabify() { return detabify(4); } /** * Convert tabs to spaces. * * @param tabWidth Number of spaces per tab. * @return */ public TextEditor detabify(final int tabWidth) { replaceAll(Pattern.compile("(.*?)\\t"), new Replacement() { public String replacement(Matcher m) { String lineSoFar = m.group(1); int width = lineSoFar.length(); StringBuffer replacement = new StringBuffer(lineSoFar); do { replacement.append(' '); ++width; } while (width % tabWidth != 0); return replacement.toString(); } }); return this; } /** * Remove a number of spaces at the start of each line. * @param spaces * @return */ public TextEditor outdent(int spaces) { return deleteAll("^(\\t|[ ]{1," + spaces + "})"); } /** * Remove one tab width (4 spaces) from the start of each line. * @return */ public TextEditor outdent() { return outdent(4); } /** * Remove leading and trailing space from the start and end of the buffer. Intermediate * lines are not affected. * @return */ public TextEditor trim() { text = new StringBuffer(text.toString().trim()); return this; } /** * Introduce a number of spaces at the start of each line. * @param spaces * @return */ public TextEditor indent(int spaces) { StringBuffer sb = new StringBuffer(spaces); for (int i = 0; i < spaces; i++) { sb.append(' '); } return replaceAll("^", sb.toString()); } /** * Add a string to the end of the buffer. * @param s */ public void append(CharSequence s) { text.append(s); } /** * Parse HTML tags, returning a Collection of HTMLToken objects. * @return */ public Collection<HTMLToken> tokenizeHTML() { List<HTMLToken> tokens = new ArrayList<HTMLToken>(); String nestedTags = nestedTagsRegex(6); Pattern p = Pattern.compile("" + "(?s:<!(--.*?--\\s*)+>)" + "|" + "(?s:<\\?.*?\\?>)" + "|" + nestedTags + "", Pattern.CASE_INSENSITIVE); Matcher m = p.matcher(text); int lastPos = 0; while (m.find()) { if (lastPos < m.start()) { tokens.add(HTMLToken.text(text.substring(lastPos, m.start()))); } tokens.add(HTMLToken.tag(text.substring(m.start(), m.end()))); lastPos = m.end(); } if (lastPos < text.length()) { tokens.add(HTMLToken.text(text.substring(lastPos, text.length()))); } return tokens; } /** * Regex to match a tag, possibly with nested tags such as <a href="<MTFoo>">. * * @param depth - How many levels of tags-within-tags to allow. The example <a href="<MTFoo>"> has depth 2. */ private String nestedTagsRegex(int depth) { if (depth == 0) { return ""; } return "(?:<[a-z/!$](?:[^<>]|" + nestedTagsRegex(depth - 1) + ")*>)"; } /** * Add a string to the start of the first line of the buffer. * @param s */ public void prepend(CharSequence s) { StringBuffer newText = new StringBuffer(); newText.append(s); newText.append(text); text = newText; } /** * Find out whether the buffer is empty. * @return */ public boolean isEmpty() { return text.length() == 0; } }