package com.google.sitebricks.compiler; import java.util.ArrayDeque; import java.util.ArrayList; import java.util.Collections; import java.util.Deque; import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.regex.Pattern; import com.google.inject.Inject; import com.google.sitebricks.conversion.TypeConverter; import com.google.sitebricks.rendering.Strings; /** * Utility tokenizes text into expressions and raw text, and provides other * text parsing tools. * * @author Dhanji R. Prasanna (dhanji at gmail com) * @since 1.0 */ public class Parsing { private static TypeConverter converter; private Parsing() { } //converts comma-separated name/value pairs into expression/variable bindings public static Map<String, String> toBindMap(String expression) { if (Strings.empty(expression)) return Collections.emptyMap(); Deque<Character> escapes = new ArrayDeque<Character>(); List<String> pairs = new ArrayList<String>(); int index = 0; for (int i = 0; i < expression.length(); i++) { char c = expression.charAt(i); if (('"' == c && (escapes.isEmpty() || escapes.peek().charValue() != c)) || '[' == c || '{' == c || '(' == c) { escapes.push(c); } else if (('"' == c && escapes.peek().charValue() == c) || ']' == c || '}' == c || ')' == c) { escapes.pop(); } if (escapes.isEmpty() && ',' == c) { if (index < i) pairs.add(expression.substring(index, i)); //skip comma & whitespace if any for (; i < expression.length() && (',' == expression.charAt(i) || ' ' == expression.charAt(i));) i++; //reset new start index index = i; } } //add last pair if needed if (index < expression.length()) { //chew up leading comma & whitespace if any //noinspection StatementWithEmptyBody for (; ',' == expression.charAt(index) || ' ' == expression.charAt(index); index++) ; final String pair = expression.substring(index, expression.length()).trim(); //only consider this a pair if it has something in it! if (pair.length() > 1) pairs.add(pair); } //nice to preserve insertion order final Map<String, String> map = new LinkedHashMap<String, String>(); for (String pair : pairs) { final String[] nameAndValue = pair.split("=", 2); //do some validation if (nameAndValue.length != 2) throw new IllegalArgumentException("Invalid parameter binding format: " + pair); Strings.nonEmpty(nameAndValue[0], "Cannot have an empty left hand side target parameter: " + pair); Strings.nonEmpty(nameAndValue[1], "Must provide a non-empty right hand side expression: " + pair); map.put(nameAndValue[0].trim(), nameAndValue[1].trim()); } return Collections.unmodifiableMap(map); } //tokenizes text into raw text chunks interspersed with expression chunks public static List<Token> tokenize(String warpRawText, EvaluatorCompiler compiler) throws ExpressionCompileException { ArrayList<Token> tokens = new ArrayList<Token>(); //simple state machine to iterate the text and break it up into chunks char[] characters = warpRawText.toCharArray(); StringBuilder token = new StringBuilder(); TokenizerState state = TokenizerState.READING_TEXT; for (int i = 0; i < characters.length; i++) { //test for start of an expression if (TokenizerState.READING_TEXT.equals(state)) { if ('$' == characters[i]) { if ('{' == characters[i + 1]) { //YES it is the start of an expr, so close up the existing token & start a new one if (token.length() > 0) { tokens.add(CompiledToken.text(token.toString())); token = new StringBuilder(); } state = TokenizerState.READING_EXPRESSION; } } } //test for end of an expr if (TokenizerState.READING_EXPRESSION.equals(state)) { if ('}' == characters[i]) { //YES it is the end of the expr, so close it up and start a new token token.append(characters[i]); tokens.add(CompiledToken.expression(token.toString(), compiler)); token = new StringBuilder(); state = TokenizerState.READING_TEXT; continue; //dont add the trailing } to the new text field } } //add characters to the token normally token.append(characters[i]); } //should never be in reading expr mode at this point if (TokenizerState.READING_EXPRESSION.equals(state)) throw new IllegalStateException("Error. Expression was not terminated properly: " + token.toString()); //add last token read if it has any content (is always text) if (token.length() > 0) tokens.add(CompiledToken.text(token.toString())); // Pack list capacity to size (saves memory). tokens.trimToSize(); return tokens; } public static String stripExpression(String expr) { return expr.substring(2, expr.length() - 1); } public static boolean isExpression(String attribute) { return attribute.startsWith("${"); } //dont pass null or empty string or 1 char public static String stripQuotes(String var) { return var.substring(1, var.length() - 1); } /** * Remember this method is not so much about verifying something is XML as it is * verifying that something is a NON-Xml template. In other words, read this as * whether or not we should *treat* something as XML, then complain that it's malformed * later (if necessary). * * @param template A fully loaded template as a string. * @return Returns true if this template should be treated as an XML template. * Templates that are not XML *MUST* begin with a {@code @Meta} annotation. */ public static boolean treatAsXml(String template) { return 0 > indexOfMeta(template); } /** * Converts the given token stream into a rendered output evaluating each expression * against the provided context object which may be a regular Java POJO with getters * and setters or a map of string/value pairs. */ public static String render(List<Token> tokens, Map<String, Object> arguments) { StringBuilder builder = new StringBuilder(); for (Token token : tokens) { builder.append(token.render(arguments)); } return builder.toString(); } public static int indexOfMeta(String template) { //do a manual character scan (coz indexOf(regex) will be O(n) runtime) for (int i = 0; i < template.length(); i++) { char c = template.charAt(i); //skip leading whitespace if (isWhitespace(c)) continue; //Does this template begin with @Meta or @Meta( --> then it is *not* XML if ('@' == c) { final char trailing = template.charAt(i + 5); if ("Meta".equals(template.substring(i + 1, i + 5)) && ('(' == trailing || isWhitespace(trailing))) return i; } //do not go past the first non-whitespace character (short-circuit) return -1; } //treat everything else as XML return -1; } private static boolean isWhitespace(char c) { return ' ' == c || '\n' == c || '\r' == c || '\t' == c; } private static enum TokenizerState { READING_TEXT, READING_EXPRESSION } //URI test regex: (([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)? //Taken from stylus studio message board http://www.stylusstudio.com/xmldev/200108/post10890.html private final static Pattern URI_REGEX = Pattern.compile("(([a-zA-Z][0-9a-zA-Z+\\-\\.]*:)?/{0,2}[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\.\\-_!~*'()%]+)?"); // "(([a-zA-Z][0-9a-zA-Z+\\\\-\\\\.]*:)?/{0,2}[0-9a-zA-Z;" + // "/?:@&=+$\\\\.\\\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\\\.\\\\-_!~*'()%]+)?"); //TODO private final static Pattern TEMPLATE_URI_PATTERN = Pattern.compile("(([a-zA-Z][0-9a-zA-Z+\\\\-\\\\.]*:)?/{0,2}[0-9a-zA-Z;" + "/?:@&=+$\\\\.\\\\-_!~*'()%]+)?(#[0-9a-zA-Z;/?:@&=+$\\\\.\\\\-_!~*'()%]+)?"); //less expensive method tests whether string is a valid URI public static boolean isValidURI(String uri) { return (null != uri) && URI_REGEX .matcher(uri) .matches(); } public static TypeConverter getTypeConverter() { return Parsing.converter; } @Inject public static void setTypeConverter(TypeConverter converter) { Parsing.converter = converter; } }