/******************************************************************************* * Copyright (c) 2008 Vlad Dumitrescu and others. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Vlad Dumitrescu *******************************************************************************/ package org.erlide.util.erlang; import java.io.IOException; import java.io.StreamTokenizer; import java.io.StringReader; import java.util.ArrayList; import java.util.List; import java.util.Stack; import org.erlide.util.ErlLogger; import com.ericsson.otp.erlang.OtpErlangAtom; import com.ericsson.otp.erlang.OtpErlangException; import com.ericsson.otp.erlang.OtpErlangList; import com.ericsson.otp.erlang.OtpErlangLong; import com.ericsson.otp.erlang.OtpErlangMap; import com.ericsson.otp.erlang.OtpErlangObject; import com.ericsson.otp.erlang.OtpErlangString; import com.ericsson.otp.erlang.OtpErlangTuple; import com.google.common.base.Strings; public class OtpParser { public OtpErlangObject parse(final String s) throws OtpParserException { return doParse(s); } protected static OtpErlangObject doParse(final String s) throws OtpParserException { if (Strings.isNullOrEmpty(s)) { return null; } return parse(scan(s)); } private static OtpErlangObject parse(final List<Token> tokens) throws OtpParserException { if (tokens.isEmpty()) { return null; } OtpErlangObject result = null; final Token t = tokens.remove(0); final String text = t.text; if (text == null) { throw new OtpParserException("null token" + t.toString()); } switch (t.kind) { case ATOM: result = new OtpErlangAtom(text); break; case VARIABLE: result = new OtpPatternVariable(text); break; case STRING: result = new OtpErlangString(text); break; case INTEGER: result = new OtpErlangLong(Long.parseLong(text)); break; case PLACEHOLDER: result = new OtpFormatPlaceholder(text); break; case TUPLESTART: result = parseTuple(tokens, new Stack<OtpErlangObject>()); break; case TUPLEEND: throw new OtpParserException("unexpected " + t.toString()); case LISTSTART: result = parseList(tokens, new Stack<OtpErlangObject>(), null); break; case LISTEND: throw new OtpParserException("unexpected " + t.toString()); case MAP: result = parseMap(tokens, new Stack<OtpErlangObject>()); break; case COMMA: throw new OtpParserException("unexpected " + t.toString()); default: throw new OtpParserException("unknown token" + t.toString()); } return result; } private static OtpErlangObject parseList(final List<Token> tokens, final Stack<OtpErlangObject> stack, final OtpErlangObject tail) throws OtpParserException { if (tokens.isEmpty()) { return null; } final Token t = tokens.get(0); if (t.kind == TokenKind.LISTEND) { tokens.remove(0); try { return new OtpErlangList(stack.toArray(new OtpErlangObject[stack.size()]), tail); } catch (final OtpErlangException e) { ErlLogger.error(e); // can't happen return null; } } OtpErlangObject atail = tail; if (t.kind == TokenKind.CONS) { tokens.remove(0); atail = parse(tokens); } else { stack.push(parse(tokens)); if (tokens.get(0).kind == TokenKind.COMMA) { tokens.remove(0); } else if (tokens.get(0).kind != TokenKind.LISTEND && tokens.get(0).kind != TokenKind.CONS) { throw new OtpParserException("missing comma in list"); } } return parseList(tokens, stack, atail); } private static OtpErlangObject parseTuple(final List<Token> tokens, final Stack<OtpErlangObject> stack) throws OtpParserException { if (tokens.isEmpty()) { return null; } final Token t = tokens.get(0); if (t.kind == TokenKind.TUPLEEND) { tokens.remove(0); return new OtpErlangTuple(stack.toArray(new OtpErlangObject[stack.size()])); } if (t.kind == TokenKind.CONS) { throw new OtpParserException("cons is invalid in tuple"); } stack.push(parse(tokens)); if (tokens.get(0).kind == TokenKind.COMMA) { tokens.remove(0); } else if (tokens.get(0).kind != TokenKind.TUPLEEND) { throw new OtpParserException("missing comma in tuple"); } return parseTuple(tokens, stack); } private static OtpErlangObject parseMap(final List<Token> tokens, final Stack<OtpErlangObject> stack) throws OtpParserException { if (tokens.isEmpty()) { return null; } final Token t = tokens.get(0); if (t.kind == TokenKind.TUPLEEND) { tokens.remove(0); final int size = stack.size(); final OtpErlangObject[] all = stack.toArray(new OtpErlangObject[size]); final OtpErlangObject[] keys = new OtpErlangObject[size / 2]; final OtpErlangObject[] values = new OtpErlangObject[size / 2]; for (int i = 0; i < size / 2; i++) { keys[i] = all[i * 2]; values[i] = all[i * 2 + 1]; } return new OtpErlangMap(keys, values); } stack.push(parse(tokens)); if (tokens.get(0).kind != TokenKind.ARROW) { throw new OtpParserException("badly constructed map"); } tokens.remove(0); stack.push(parse(tokens)); if (tokens.get(0).kind == TokenKind.COMMA) { tokens.remove(0); } else if (tokens.get(0).kind != TokenKind.TUPLEEND) { throw new OtpParserException("missing comma in map"); } return parseMap(tokens, stack); } private static enum TokenKind { ATOM, VARIABLE, STRING, INTEGER, PLACEHOLDER, TUPLESTART, TUPLEEND, LISTSTART, LISTEND, COMMA, CONS, MAP, ARROW, UNKNOWN; } private static class Token { TokenKind kind; int start; int end; String text; @Override public String toString() { return "<" + kind.toString() + ": !" + text + "!>"; } public static Token nextToken(final String s) { if (s == null || s.length() == 0) { return null; } final Token result = new Token(); char c; int i = 0; do { c = s.charAt(i++); if (i >= s.length()) { return null; } } while (c == ' ' || c == '\t' || c == '\n' || c == '\r'); i--; result.start = i; result.end = i; if (c <= 'z' && c >= 'a') { scanAtom(s, result); } else if (c == '\'') { scanQAtom(s, result); } else if (c == '"') { scanString(s, result); } else if (c >= 'A' && c <= 'Z' || c == '_') { scanVariable(s, result); } else if (c <= '9' && c >= '0' || c == '-') { scanInteger(s, result); } else if (c == '~') { scanPlaceholder(s, result); } else if (c == '{') { result.kind = TokenKind.TUPLESTART; result.end = result.start + 1; } else if (c == '}') { result.kind = TokenKind.TUPLEEND; result.end = result.start + 1; } else if (c == '[') { result.kind = TokenKind.LISTSTART; result.end = result.start + 1; } else if (c == ']') { result.kind = TokenKind.LISTEND; result.end = result.start + 1; } else if (c == ',') { result.kind = TokenKind.COMMA; result.end = result.start + 1; } else if (c == '|') { result.kind = TokenKind.CONS; result.end = result.start + 1; } else if (c == '#' && s.charAt(i + 1) == '{') { result.kind = TokenKind.MAP; result.end = result.start + 2; } else if (c == '=' && s.charAt(i + 1) == '>') { result.kind = TokenKind.ARROW; result.end = result.start + 2; } else { result.kind = TokenKind.UNKNOWN; result.end = result.start + 1; } result.text = s.substring(result.start, result.end); final char ch = result.text.charAt(0); if (result.kind == TokenKind.STRING) { result.text = unescape(result.text); } else if (result.kind == TokenKind.PLACEHOLDER) { result.text = result.text.substring(1); } else if (result.kind == TokenKind.ATOM && ch == '\'') { result.text = result.text.substring(1, result.text.length() - 1); } return result; } private static void scanPlaceholder(final String s, final Token result) { result.kind = TokenKind.PLACEHOLDER; char c; c = s.charAt(++result.end); while (result.end <= s.length() && (c >= 'a' && c <= 'z' || c >= '0' && c <= '9')) { c = s.charAt(result.end++); } result.end--; } private static void scanInteger(final String s, final Token result) { char c; c = s.charAt(result.end); result.kind = TokenKind.INTEGER; while (result.end < s.length() && (c >= '0' && c <= '9' || c == '-')) { c = s.charAt(result.end++); } result.end--; } private static void scanVariable(final String s, final Token result) { char c; c = s.charAt(result.end); result.kind = TokenKind.VARIABLE; while (result.end < s.length() && c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_' || c == ':') { c = s.charAt(result.end++); } result.end--; } private static void scanString(final String s, final Token result) { char c; result.kind = TokenKind.STRING; c = s.charAt(++result.end); while (result.end < s.length() && c != '"') { if (c == '\\') { c = s.charAt(result.end++); } c = s.charAt(result.end++); } } private static void scanQAtom(final String s, final Token result) { char c; result.kind = TokenKind.ATOM; c = s.charAt(++result.end); while (result.end < s.length() && c != '\'') { if (c == '\\') { c = s.charAt(result.end++); } c = s.charAt(result.end++); } } private static void scanAtom(final String s, final Token result) { char c; c = s.charAt(result.end); result.kind = TokenKind.ATOM; while (result.end < s.length() && c >= 'a' && c <= 'z' || c >= 'A' && c <= 'Z' || c >= '0' && c <= '9' || c == '_') { c = s.charAt(result.end++); } result.end--; } } private static List<Token> scan(final String s) { String ss = s + " "; final List<Token> result = new ArrayList<>(); Token t = Token.nextToken(ss); while (t != null) { result.add(t); ss = ss.substring(t.end); t = Token.nextToken(ss); } return result; } private static String unescape(final String message) { final StreamTokenizer parser = new StreamTokenizer(new StringReader(message)); String result; try { parser.nextToken(); if (parser.ttype == '"') { result = parser.sval; } else { result = "ERROR!"; } } catch (final IOException e) { result = e.toString(); } return result; } }