/* * Copyright 2012 James Moger * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.moxie.maxml; import static java.text.MessageFormat.format; import java.io.BufferedReader; import java.io.IOException; import java.text.DateFormat; import java.text.DecimalFormat; import java.text.MessageFormat; import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Calendar; import java.util.Collection; import java.util.Date; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.moxie.utils.StringUtils; /** * MaxmlParser is a simple recursive parser that can deserialize an Maxml * document. Maxml is based mostly on YAML but borrows ideas from XML and JSON * such as space-insensitivity. * * @author James Moger * */ public class MaxmlParser { DateFormat canonical = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); DateFormat iso8601 = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ssZ"); DateFormat date = new SimpleDateFormat("yyyy-MM-dd"); Pattern datePattern = Pattern.compile("^[0-9]{4}-[0-9]{2}-[0-9]{2}"); Pattern wholePattern = Pattern.compile("^\\d{1,3}(,\\d{1,3})*$"); DecimalFormat wholeFormat = new DecimalFormat("#,###,###,###,###,###,###"); String csvPattern = ",(?=(?:[^\\\"]*\\\"[^\\\"]*[\\\"^,]*\\\")*(?![^\\\"]*\\\"))"; int tabWidth = 4; int lineCount; MaxmlMap rootMap; /** * Recursive method to parse an Maxml document. * * @param lines * @return an object map */ public MaxmlMap parse(BufferedReader reader) throws IOException, MaxmlException { String lastKey = null; MaxmlMap map = new MaxmlMap(); if (rootMap == null) { rootMap = map; } ArrayList<Object> array = null; String line = null; try { while ((line = reader.readLine()) != null) { lineCount++; // trim the line String untrimmed = line; line = line.trim(); if (line.length() == 0) { // ignore blanks continue; } if (line.charAt(0) == '#') { // ignore comment continue; } else if (line.equals("...")) { // ignore end of document continue; } else if (line.equals("---")) { // ignore new document continue; } else if (line.equals("\"\"\"") || line.equals("'''") || line.equals("\"\"") || line.equals("''")) { // start text block, offset is 0 String value = parseTextBlock(reader, 0); map.put(lastKey, value); } else if (line.charAt(0) == '}') { // end this map return map; } else if ((line.charAt(0) == '-') || (line.charAt(0) == '+')) { // array element if (array == null) { array = new ArrayList<Object>(); map.put(lastKey, array); } boolean addAll = line.charAt(0) == '+'; String rem = line.substring(1).trim(); Object value; if (rem.charAt(0) == '{' && rem.length() == 1) { Map<String, Object> submap = parse(reader); value = submap; array.add(value); } else if (rem.startsWith("\"\"\"")) { // start text block String block = rem.substring(3) + parseTextBlock(reader, 0); value = block; array.add(block); } else if (rem.startsWith("'''")) { // start text block String block = rem.substring(3) + parseTextBlock(reader, 0); value = block; array.add(value); } else if (rem.startsWith("\"\"")) { // start offset text block int offset = countWhitespace(untrimmed.substring(0, untrimmed.indexOf("\"\""))); String block = rem.substring(2) + parseTextBlock(reader, offset); value = block; array.add(value); } else if (rem.startsWith("''")) { // start offset text block int offset = countWhitespace(untrimmed.substring(0, untrimmed.indexOf("''"))); String block = rem.substring(2) + parseTextBlock(reader, offset); value = block; array.add(value); } else { value = parseValue(rem); if (addAll && value instanceof Collection) { Collection<?> c = (Collection<?>) value; array.addAll(c); } else { array.add(value); } } } else { // field:value String key; String value; if (line.charAt(0) == '\"') { // "key" : value // quoted key because of colons int quote = line.indexOf('\"', 1); key = line.substring(1, quote).trim(); value = line.substring(quote + 1).trim(); int colon = value.indexOf(':'); value = value.substring(colon + 1).trim(); } else if (line.charAt(0) == '\'') { // 'key' : value // quoted key because of colons int quote = line.indexOf('\'', 1); key = line.substring(1, quote).trim(); value = line.substring(quote + 1).trim(); int colon = value.indexOf(':'); value = value.substring(colon + 1).trim(); } else { // key : value int colon = line.indexOf(':'); key = line.substring(0, colon).trim(); value = line.substring(colon + 1).trim(); } Object o; if (value.length() == 0) { // empty string o = value; } else if (value.charAt(0) == '{') { // map Map<String, Object> submap = parse(reader); o = submap; } else if (value.equals("\"\"\"")) { // start text block String block = parseTextBlock(reader, 0); o = block; } else if (value.equals("'''")) { // start text block String block = parseTextBlock(reader, 0); o = block; } else if (value.equals("\"\"")) { // start text block int offset = untrimmed.indexOf("\"\""); String block = parseTextBlock(reader, offset); o = block; } else if (value.equals("''")) { // start text block int offset = untrimmed.indexOf("''"); String block = parseTextBlock(reader, offset); o = block; } else { // value o = parseValue(value); } // put the value into the map map.put(key, o); // reset lastKey = key; array = null; } } return map; } catch (MaxmlException e) { throw e; } catch (Exception e) { throw new MaxmlException(MessageFormat.format("Parsing failed on line {0,number,0}: {1}", lineCount, line), e); } } /** * Parse an Maxml value into an Object. * * @param value * @return and object */ public Object parseValue(String value) throws MaxmlException { value = value.trim(); if (value.length() == 0) { // empty value return value; } if (value.equals("~")) { // null return null; } // object reference if (value.charAt(0) == '&') { if (value.indexOf(' ') == -1) { String v = value.substring(1).trim(); if (v.indexOf('[') > -1 && v.indexOf("..") > -1 && v.indexOf(']') > -1) { String name = v.substring(0, v.indexOf('[')); int a = Integer.parseInt(v.substring(v.indexOf('[') + 1, v.indexOf(".."))); int b = Integer.parseInt(v.substring(v.indexOf("..") + 2, v.indexOf(']'))); List<Object> list = new ArrayList<Object>(); for (int i = a; i <= b; i++) { String valName = name + i; if (name.endsWith("'") || name.endsWith("\"")) { valName = name.substring(0, name.length() - 1) + i + name.charAt(name.length() - 1); } Object o = getObject(valName, rootMap); list.add(o); } return list; } return getObject(v, rootMap); } } if (value.charAt(0) == '\'' && value.charAt(value.length() - 1) == '\'') { // quoted string, strip single quotes return value.substring(1, value.length() - 1).trim(); } if (value.charAt(0) == '"' && value.charAt(value.length() - 1) == '"') { // quoted string, strip double quotes return value.substring(1, value.length() - 1).trim(); } if (value.charAt(0) == '[' && value.charAt(value.length() - 1) == ']') { // inline list ArrayList<Object> array = new ArrayList<Object>(); String inside = value.substring(1, value.length() - 1).trim(); // http://www.programmersheaven.com/user/Jonathan/blog/73-Splitting-CSV-with-regex for (String field : inside .split(csvPattern)) { Object object = parseValue(field); array.add(object); } return array; } if (value.charAt(0) == '{' && value.charAt(value.length() - 1) == '}') { // inline map MaxmlMap map = new MaxmlMap(); String inside = value.substring(1, value.length() - 1).trim(); for (String kvp : inside.split(csvPattern)) { int colon = kvp.indexOf(':'); if (colon < 0) { throw new MaxmlException( format("Illegal value \"{0}\". Inline map must have key:value pairs!\n{1}", kvp, value)); } String[] chunks = kvp.split(":", 2); Object o = parseValue(chunks[1].trim()); map.put(chunks[0].trim(), o); } return map; } String vlc = value.toLowerCase(); if (vlc.equals("true") || vlc.equals("yes") || vlc.equals("on")) { return Boolean.TRUE; } else if (vlc.equals("false") || vlc.equals("no") || vlc.equals("off")) { return Boolean.FALSE; } else if (value.length() > 0) { // try parsing a whole number try { long along; if (value.charAt(0) == '0') { // octal along = Long.decode(value); } else if (wholePattern.matcher(value).find()) { // whole number comma-formatted along = wholeFormat.parse(value).longValue(); } else { // hexadecimal, plain number along = Long.decode(value); } if (along <= Integer.MAX_VALUE && along >= Integer.MIN_VALUE) { // if it fits in an int, return an int return (int) along; } return along; } catch (Exception e) { } // try parsing a decimal value try { double adouble = Double.parseDouble(value); if (adouble <= Float.MAX_VALUE && adouble >= Float.MIN_VALUE) { return (float) adouble; } return adouble; } catch (Throwable t) { } // date/time parsing if (datePattern.matcher(value).find()) { DateFormat[] formats = { canonical, iso8601, date }; for (DateFormat df : formats) { try { Date aDate = df.parse(value); // reset milliseconds to 0 Calendar cal = Calendar.getInstance(); cal.setTime(aDate); cal.set(Calendar.MILLISECOND, 0); return cal.getTime(); } catch (Throwable t) { // t.printStackTrace(); } } } } // default to string return value; } protected String parseTextBlock(BufferedReader reader, int offset) throws IOException, MaxmlException { String line = null; StringBuilder sb = new StringBuilder(); while ((line = reader.readLine()) != null) { lineCount++; // text block processing if (line.equals("\"\"\"") || line.equals("'''") || line.equals("\"\"") || line.equals("''")) { // end block line = stripWhitespace(offset, line); return sb.toString(); } else if (line.endsWith("\"\"\"") || line.endsWith("'''")) { // end textblock line = line.substring(0, line.length() - 3); line = stripWhitespace(offset, line); sb.append(line); return sb.toString(); } else if (line.endsWith("\"\"") || line.endsWith("''")) { // end offset text`block line = line.substring(0, line.length() - 2); line = stripWhitespace(offset, line); sb.append(line); return sb.toString(); } else { // append line line = stripWhitespace(offset, line); sb.append(line); sb.append('\n'); } } throw new MaxmlException(MessageFormat.format("Failed to parse textblock at line {0,number,0}", lineCount)); } protected int countWhitespace(String chunk) { int count = 0; for (char c : chunk.toCharArray()) { switch (c) { case ' ': count++; break; case '\t': count += tabWidth; break; default: break; } } return count; } protected String stripWhitespace(int offset, String line) throws MaxmlException { if (offset > 0) { // attempt to eliminate leading whitespace if (line.length() >= offset) { String leading = line.substring(0, offset); int whiteCount = 0; boolean stripWhitespace = true; for (char c : leading.toCharArray()) { boolean ws = c== ' '; if (ws) { whiteCount++; } stripWhitespace &= ws; } if (stripWhitespace) { return line.substring(offset); } else { throw new MaxmlException(MessageFormat.format("Line {0,number,0} in a textblock is expected to have {1,number,0} indentation spaces, found {2,number,0}!", lineCount, offset, whiteCount, line)); } } } return line; } protected Object getObject(String value, MaxmlMap container) { Object o = null; if (value.charAt(0) == '.') { value = value.substring(1); } if (value.charAt(0) == '\'') { int i = value.indexOf('\'', 1); String id = value.substring(1, i); o = container.get(id); String remainder = value.substring(i + 1); if (StringUtils.isEmpty(remainder)) { return o; } if (o instanceof List) { return getObject(remainder, (List<?>) o); } return getObject(remainder, container); } else if (value.charAt(0) == '\"') { int i = value.indexOf('\"', 1); String id = value.substring(1, i); o = container.get(id); String remainder = value.substring(i + 1); if (StringUtils.isEmpty(remainder)) { return o; } if (o instanceof List) { return getObject(remainder, (List<?>) o); } return getObject(remainder, container); } String [] fields = value.split("\\."); Pattern p = Pattern.compile("(.*)\\[(\\d+)\\]"); for (String field : fields) { int index = -1; Matcher m = p.matcher(field); if (m.find()) { String i = m.group(2); index = Integer.parseInt(i); field = field.substring(0, field.indexOf('[')); } o = container.get(field); if (o instanceof MaxmlMap) { container = (MaxmlMap) o; } else if (o instanceof List) { // grab indexed element from list if (index >= 0) { o = ((List<?>) o).get(index); } } } return o; } protected Object getObject(String value, List<?> list) { Pattern p = Pattern.compile("(.*)\\[(\\d+)\\]"); int index = -1; Matcher m = p.matcher(value); if (m.find()) { String i = m.group(2); index = Integer.parseInt(i); } if (index > -1) { return list.get(index); } return null; } }