// -*- mode: Java; c-basic-offset: 3; tab-width: 8; indent-tabs-mode: nil -*-
// Copyright (C) 2007, 2008 Andreas Krey, Ulm, Germany <a.krey@gmx.de>
/* The toplevel is effectively the contents of
* 'fun (args) { catch exit; $body }'
* which is then invoked with the command line arguments.
*
* ; is a right-associative expression-separator/combinator;
* 'let a = x; f (x)', oops, should create new scope by itself?
* (Getting the scope right is relevant for call/cc and all sorts
* of proper incarnating (which places do see the same incarnation
* of a variable?).)
*/
/* In this implementation we do symbol lookup at runtime, because I am
* to lazy to implement call frame management. Still we need to keep
* a compile-time symbol table for lookup because macros (and potentially
* other stuff) are also scoped. (Types have just been eradicated, so
* they don't appear any more, though.)
*
* Interestingly we don't need to be as exact with the scoping (of let)
* at compile time because the proper incarnation management happens
* at runtime now. (But we really need to have a value chain, because
* that is what lets basically degenerate to when we don't optimize.)
*
* The symbol lookup has one other interesting property: By default,
* we only see the 'global' (that is: toplevel function) symbols
* we define ourselves.
*
* Also, should be scope-check functions right away or later, when
* the surrounding function is done? The latter does not really
* work with strange macros, but we could defer other lookups?
*/
/* We assume an accumulator which holds a current value. That way we do
* deal that much with the last pop-or-not after or before an expression.
*/
/*
* Should it be 'let x = <expr>' on expression-level lets? Statement-level
* lets ought to be dead anyway, but what is the proper right-hand side here?
*/
/*
* Momentarily we forbid to use parameter names as variable names in
* the top scope of a function, because we essentially make the
* parameters local lets. (And let is not a call-by-need thing,
* we're strict.)
*/
/* We have a small problem with {}. While we can use () mostly just
* as well, {} is needed to indicate a new scope (thus avoiding lets
* to leak into the next 'statement'. Unfortunately, {} is also needed
* for hash/object literals.
*/
/* Finally, it may be possible to allow leaving out semicolons at end
* of line (but only if it would be permissible in the parse at that
* point. May be some fiddling with the tokenizer, though.
*/
/* call/cc. It is 'call/cc (proc)', where proc is invoked with another
* proc (the continuation). Implementation: As a builtin/macro? Need
* to form the continuation into an invocable, and to call the proc.
*
* call/cc (fun (exit) {
* if (whatever) exit ();
* });
*
* There is still an open question how to make call/cc look like a function,
* even when it is a builtin one. In the code we quite probably need a
* special op at some level, and as we can't link code yet, a opcode
* is the only way. Probably it isn't to bad to have call/cc as a builtin
* macro in the global table, but is still disables the reassignmant
* (see below). It should be something that can be passed by value
* at runtime (this does not apply to let/var/fun). Basically this
* means that call/cc must just produce this function which is then
* invoked. (This and others could be in a system catalog, and having
* a parameterized 'system' opcode.)
*/
/* Oops: 'let myvar = var;' does not work, as let needs an expression
* on the RHS, and can't take a compile-time symbol value. Have an
* 'alias myvar = var;' or maclet or symlet? Statement vs. expression
* level?
*/
/* And: Tail recursion eliminination. Should be just looking for
* direct sequence of call and ret? Ok, need to follow jumps for
* that so it is better done earlier.
*/
package gloop;
import java.io.Reader;
import java.io.IOException;
import java.util.Vector;
import static gloop.Tokenizer.*;
public class Parser {
final Tokenizer tk;
Tokenizer.Token tok;
public Parser (Tokenizer t) throws IOException, Tokenizer.TokEx {
tk = t;
tok = tk.get ();
}
public Token get () throws IOException, TokEx{
return tok = tk.get ();
}
public void parse (Code c, LocalScope sc, String endt)
throws IOException, TokEx
{
c.put ("nullval"); // Initialize acc: Just the default return value
// Tokenizer.println ("parser to " + endt);
while (!tok.is (endt)) {
// // Handle special case of stmt-macro
// // (XXX But let is later going to be an expr-macro!)
// if (tok.is (SYM)) {
// Scope.Ent e = sc.get (tok.val);
// if (e != null) {
// get ();
// Vector<Tokenizer.Token> toklist = e.macstmt (this, c, sc);
// if (toklist != null) {
// // Ok, this is actually some kind of macro;
// // either the list is empty (-> internally done),
// // or it is a replacement.
// tk.push (toklist);
// continue;
// }
// }
// }
pexpr (c, sc);
if (!is (SEMI)) break;
// Tokenizer.println ("Parser at " + tok.tok);
}
chk (endt);
}
public void pexpr (Code c, LocalScope sc)
throws IOException, Tokenizer.TokEx
{
pprim (c, sc);
while (true) {
if (is (AST)) {
c.put ("push");
pexpr (c, sc);
c.put ("mult");
} else {
return;
}
}
}
public void pprim (Code c, LocalScope sc)
throws IOException, Tokenizer.TokEx
{
/* Kernel primitives */
if (tok.is (SYM)) {
/* Macro application loop */
while (tok.is (SYM)) {
String s = tok.val;
get ();
Scope.Ent e = sc.getRec (s);
if (e != null) {
Vector<Tokenizer.Token> toklist = e.macstmt (this, c, sc);
if (toklist == Tokenizer.empty_tokens) {
// Has been handled internally and only consumed tokens;
// we're done.
break;
} else if (toklist != null) {
// Ok, this is actually some kind of macro;
// use the replacement and redo.
tk.push (toklist);
continue;
} else {
// XXX Need to check other special cases,
// or load via Ent.something
// XXX Is it possible to make this case the other way,
// by calling e.makeLoadCode()? Or would that be
// equally ugly?
if (e instanceof LocalScope.FrameEnt) {
LocalScope.FrameEnt fe = (LocalScope.FrameEnt)e;
int z = sc.countTo (fe.scope ());
if (z == 0) {
c.put ("lload", fe.getOffset ());
} else {
c.put ("up", z);
c.put ("load", fe.getOffset ());
}
} else {
throw new IllegalArgumentException (
"internal: not a frame ent");
}
}
} else {
throw new IllegalArgumentException ("undefined: " + s);
}
break;
}
} else if (tok.is (NUM)) {
c.put ("numval", Integer.parseInt (tok.val));
get ();
} else if (tok.is (STR)) {
c.put ("strval", tok.val);
get ();
} else if (is (LPAR)) {
// pexpr (c, sc);
// chk (RPAR);
parse (c, sc, RPAR);
} else {
throw new IllegalArgumentException ("in pexpr (" + tok.tok + ")");
}
/* Postfix loop */
while (true) {
if (is (LPAR)) {
int cnt = 0;
if (!is (RPAR)) {
while (true) {
c.put ("push");
pexpr (c, sc);
c.put ("swap");
cnt ++;
if (!is (COMMA)) break;
}
chk (RPAR);
}
c.put ("call", cnt); // fn in acc, args on stack
} else {
break;
}
}
}
public void chk (String t) throws IOException, Tokenizer.TokEx {
if (!tok.is (t)) {
throw new IllegalArgumentException ("at '" + tok.tok +
"': not a '" + t + "'");
}
get ();
}
public boolean is (String t) throws IOException, Tokenizer.TokEx {
if (!tok.is (t)) {
return false;
}
get ();
return true;
}
public String sym () throws IOException, Tokenizer.TokEx {
if (!tok.is (SYM)) {
throw new IllegalArgumentException ("not a sym");
}
String s = tok.val;
get ();
return s;
}
// public void opttype (Scope sc) throws IOException, Tokenizer.TokEx {
// if (tok.is (TYP)) {
// get ();
// // return Type;
// }
// // return null
// }
public static Runner parse (Reader r) throws IOException, Tokenizer.TokEx {
try {
Tokenizer t = new Tokenizer (r);
Parser p = new Parser (t);
CodeStore cs = new CodeStore ();
Code c = new Code (cs);
p.parse (c, new LocalScope (new GlobalScope ()), Tokenizer.EOF);
c.put ("stop");
c.finish ();
Tokenizer.flush ();
cs.dump ();
return cs.getProg ();
} finally {
Tokenizer.flush ();
}
}
}