/* * xtc - The eXTensible Compiler * Copyright (C) 2009-2011 New York University * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * version 2.1 as published by the Free Software Foundation. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, * USA. */ package xtc.lang.cpp; import java.lang.StringBuilder; import java.io.IOException; import java.util.Collection; import java.util.ListIterator; import java.util.Map; import java.util.HashMap; import java.util.Comparator; import java.util.PriorityQueue; import java.util.LinkedList; import xtc.tree.Node; import xtc.tree.GNode; import xtc.util.Pair; import xtc.util.Runtime; import xtc.lang.cpp.Syntax.Kind; import xtc.lang.cpp.Syntax.LanguageTag; import xtc.lang.cpp.Syntax.ConditionalTag; import xtc.lang.cpp.Syntax.DirectiveTag; import xtc.lang.cpp.Syntax.Layout; import xtc.lang.cpp.Syntax.Language; import xtc.lang.cpp.Syntax.Text; import xtc.lang.cpp.Syntax.Directive; import xtc.lang.cpp.Syntax.Conditional; import xtc.lang.cpp.ContextManager.Context; import net.sf.javabdd.*; /** * This class expands macros and processes header files * * @author Paul Gazzillo * @version $Revision: 1.179 $ */ public class ForkMergeParser { /** The name of the AST conditional node. */ public static String CHOICE_NODE_NAME = "Conditional"; /** Parsing actions. */ private static enum ParsingAction { NONE, SHIFT, REDUCE, ACCEPT, ERROR } /** Array of error messages. */ final private static String[] ERRMSG = { "no default action", "invalid table entry", "error directive"}; /** Error code for no default action. */ final private static int NODEFAULT = 0; /** Error code for invalid table entry. */ final private static int INVALID = 1; /** Error code for seen an error directive. */ final private static int ERRDIRECTIVE = 2; /** The state number of the the starting state. */ final private static int STARTSTATE = 0; /** The stream from which the parser gets syntactic units */ private Stream stream; /** The context manager. */ private ContextManager contextManager; /** The actions object for AST-building and parsing context. */ private Actions actions; /** The xtc runtime. Used for output and flags. */ private Runtime runtime; /** * The sequence numbers of nested conditionals. Used by lazy * forking to only fork the closest conditional when it has empty * branches. */ private LinkedList<Integer> nestedConditionals; /** Whether shared reductions is on or not. */ private final boolean optimizeSharedReductions; /** Whether lazy forking is on or not. */ private final boolean optimizeLazyForking; /** Whether early reduction is on or not. */ private final boolean optimizeEarlyReduce; /** Whether Platoff ordering is on or not. */ private final boolean optimizePlatoffOrdering; /** Whether follow-set caching is on. */ private final boolean optimizeFollowSetCaching; /** The follow-set cache. */ private Map<Integer, Collection<Lookahead>> followCache; /** The skipConditional cache. */ private Map<Integer, OrderedSyntax> skipConditionalCache; /** Show all parsing actions. */ private final boolean showActions; /** Show errors. */ private final boolean showErrors; /** Turn language statistics collection on. */ private final boolean languageStatistics; /** Turn parser statistics collection on. */ private final boolean parserStatistics; /** Turn the subparser kill-switch on. */ private final boolean killswitch; /** The kill-switch cutoff for number of subparsers. */ private final int killswitchCutoff; /** Count the number of parser loop iterations. */ private int iterations; /** Count the number of lazy forks. */ private int lazyForks; /** Count the number of lazy forks with empty branches. */ private int lazyForksEmptyBranches; /** Count the number of conditionals with empty branches. */ private HashMap<Integer, Boolean> emptyConditionals; /** Collect the distribution of subparsers throughout parsing. */ private HashMap<Integer, Integer> nsubparsers; /** Count the number of times FOLLOW is called. */ private int nfollow; /** Create a new parser. */ public ForkMergeParser(Stream stream, ContextManager contextManager, Actions actions, Runtime runtime) { this.stream = stream; this.contextManager = contextManager; this.actions = actions; this.runtime = runtime; this.nestedConditionals = new LinkedList<Integer>(); optimizeSharedReductions = runtime.test("optimizeSharedReductions"); optimizeLazyForking = runtime.test("optimizeLazyForking"); optimizeEarlyReduce = runtime.test("optimizeEarlyReduce"); // It has no effect. Off by default. optimizePlatoffOrdering = runtime.test("platoffOrdering"); // Always leave it on. No effect on subparser size. Minimal // effect on time. optimizeFollowSetCaching = ! runtime.test("noFollowCaching"); if (optimizeFollowSetCaching) { followCache = new HashMap<Integer, Collection<Lookahead>>(); } skipConditionalCache = new HashMap<Integer, OrderedSyntax>(); languageStatistics = runtime.test("statisticsLanguage"); parserStatistics = runtime.test("statisticsParser"); showActions = runtime.test("showActions"); showErrors = runtime.test("showErrors"); if (! runtime.hasValue("killswitch") || null == runtime.getString("killswitch")) { killswitch = false; killswitchCutoff = 0; } else { try { killswitch = true; killswitchCutoff = Integer.parseInt(runtime.getString("killswitch")); if (killswitchCutoff <= 0) { throw new NumberFormatException("the -killswitch flag takes a " + "positive, non-zero integer"); } } catch (NumberFormatException e) { throw new NumberFormatException("the -killswitch flag takes a " + "positive, non-zero integer"); } } } /** * This comparator enforces the subparser set ordering policy. TODO * have a different subparserComparator instance for each of: Onone, * Oearly, Oplatoff, and Oearly with Oplatoff. */ Comparator<Subparser> subparserComparator = new Comparator<Subparser>() { public int compare(Subparser o1, Subparser o2) { int compare = o1.a.t.compare(o2.a.t); if (0 == compare) { if (optimizeEarlyReduce) { // The early reduce optimization puts reductions first to // maximize merge opportunities. if (ParsingAction.SHIFT == o1.a.getAction()) { return 1; } else if (ParsingAction.SHIFT == o2.a.getAction()) { return -1; } else if (optimizePlatoffOrdering) { // Both subparsers must be REDUCEs since accept and error // parsers arer removed instead of rescheduled. // Platoff ordering says longer stacks go first to // maximimize merge opportunities. return o1.s.getHeight() >= o2.s.getHeight() ? -1 : 1; } } else if (optimizePlatoffOrdering) { if (ParsingAction.REDUCE == o1.a.getAction() && ParsingAction.REDUCE == o2.a.getAction()) { return o1.s.getHeight() >= o2.s.getHeight() ? -1 : 1; } } } return compare; } public boolean equals(Object o) { return this == o; } }; /** * Parse the syntax stream. * * @return The AST. */ public Object parse() throws IOException { // Initialize the first subparser. OrderedSyntax startOfFile = new OrderedSyntax(stream); Subparser firstSubparser = new Subparser(new Lookahead(startOfFile, contextManager.new Context(true)), new StateStack(STARTSTATE, null, null), contextManager.new Context(true), actions.getInitialContext()); firstSubparser.a.t = firstSubparser.a.t.getNext(); // Initialize the set of subparsers. Use the default initial // capacity. PriorityQueue<Subparser> subparsers = new PriorityQueue<Subparser>(11, subparserComparator); subparsers.add(firstSubparser); // Create the common root node in case their are multiple accepted // subparsers. GNode root = GNode.create(CHOICE_NODE_NAME); if (parserStatistics) { // Initialize statistics collection. iterations = 0; nsubparsers = new HashMap<Integer, Integer>(); nfollow = 0; lazyForks = 0; lazyForksEmptyBranches = 0; emptyConditionals = new HashMap<Integer, Boolean>(); } // The main parsing loop. while (true) { if (parserStatistics) { // Collect statistics on the number of subparsers. iterations++; if (! nsubparsers.containsKey(subparsers.size())) { nsubparsers.put(subparsers.size(), 0); } nsubparsers.put(subparsers.size(), nsubparsers.get(subparsers.size()) + 1); // System.err.println("subparsers: " + subparsers.size()); // System.err.println(); // LinkedList<Subparser> bob = new LinkedList<Subparser>(); // while (! subparsers.isEmpty()) { // Subparser p = subparsers.poll(); // System.err.println(p + ": " + p.a + ":" + p.context); // bob.add(p); // } // for (Subparser p : bob) { // subparsers.add(p); // } } // Get the earliest token. The list of subparsers is ordered, // so we can get the earliest token from the first subparser in // the list. Each subparser should be on an ordinary token or a // start conditional. OrderedSyntax earliestToken = subparsers.peek().a.t; // System.err.println("earliest: " + earliestToken); assert earliestToken.syntax.kind() == Kind.CONDITIONAL && earliestToken.syntax.toConditional().tag() == ConditionalTag.START || earliestToken.syntax.kind() == Kind.LANGUAGE || earliestToken.syntax.kind() == Kind.EOF; // Prepare a list of processed subparsers to add to the set of // subparsers after the parsing iteration is done LinkedList<Subparser> processedParsers = new LinkedList<Subparser>(); // This flag tells us that one or more parsers in this iteration // performed a reduction. It is used to implement the // earlyReduce optimization. boolean seenReduce = false; // This flag tells us that there is a conditional that needed // its follow-set computed. This is used to make lazy forks // wait until all subparsers on that conditional are ready to // fork. Otherwise, the forked subparsers will get ahead of // other subparsers and miss merge opportunities. Note that // without Oearly, this flag won't really do much, since Oearly // ensures that subparsers that want to shift come after other // subparsers. boolean waitToFork = false; // For all subparsers on the earliest token. Pull each // subparser off of the priority queue and add it to the // processedParsers list. This list is then added back to the // priority queue after processing. while (subparsers.size() > 0 && subparsers.peek().a.t.compare(earliestToken) == 0) { // When the earlyReduce optimization is on, give reductions // there own parser iteration and then merge. Don't do any // shifting until after a merge attempt. if (optimizeEarlyReduce && seenReduce && subparsers.peek().a.getAction() == ParsingAction.SHIFT) { break; } // Pull off the earliest subparser. Subparser subparser = subparsers.poll(); // System.err.println("\n" + subparser + ":" + subparser.a.t.syntax); // Carry out one parsing iteration for one subparser. if (subparser.a.isSet()) { switch (subparser.a.getAction()) { case REDUCE: // Perform a shared reduction. reduce(subparser); seenReduce = true; waitToFork = true; // Repartition the follow-set if necessary. It is // necessary if the any parsing actions are non-reduce or // reducing a different partition. boolean repartition = false; int lastProduction = -1; for (Lookahead n : ((LookaheadSet) subparser.a).set) { getAction(n, subparser.s); if (ParsingAction.REDUCE != n.getAction() || -1 != lastProduction && n.getActionData() != lastProduction) { repartition = true; break; } lastProduction = n.getActionData(); } if (repartition) { // Repartition the follow-set and fork subparsers. LookaheadSet lookaheadSet = (LookaheadSet) subparser.a; Collection<Lookahead> tokenSet = partition(lookaheadSet.set, subparser); processedParsers.addAll(fork(subparser, tokenSet)); // Clean up subparser scope and context. It is already // removed from the set of subparsers. subparser.a.c.delRef(); // No need to free LookaheadSet. They are forked into // new subparsers. subparser.context.delRef(); subparser.scope.free(); } else { // All parsing actions still reduce the same production. // Leave the subparser alone, but update the token set's // parsing action. subparser.a.copyAction(((LookaheadSet) subparser.a).set.get(0)); processedParsers.add(subparser); } break; case SHIFT: if (waitToFork) { // Don't fork until all subparsers on this conditional // are ready to fork. // Reschedule this subparser. processedParsers.add(subparser); } else { // Fork the current conditional's next tokens only. Put // all the rest in a single subparser. Collection<Lookahead> set; if (subparser.a.t.syntax.kind() == Kind.CONDITIONAL) { set = lazyFork((LookaheadSet) subparser.a); } else { // No need for lazy forking. This is not a // conditional but an implicit conditional caused by a // parsing context ambiguity, e.g. C's typedef/var // name ambiguity. Just fork a subparser for each // token. set = ((LookaheadSet) subparser.a).set; } processedParsers.addAll(fork(subparser, set)); // Clean up subparser scope and context. It is already // removed from the set of subparsers. subparser.a.c.delRef(); // No need to free LookaheadSet. They are forked into // new subparsers. subparser.context.delRef(); subparser.scope.free(); } break; default: // Sets of next tokens only apply to shared reductions and // lazy forking. throw new RuntimeException(); } } else { // Get the follow set, i.e. all the possible next ordinary // tokens. Collection<Lookahead> followSet; // If the token is an ordinary language token, no need to // calculate the follow set. It is simply the set // containing the single language token. switch (subparser.a.t.syntax.kind()) { case EOF: // Fall through case LANGUAGE: followSet = new LinkedList<Lookahead>(); followSet .add(new Lookahead(subparser.a.t,subparser.context.addRef())); break; case CONDITIONAL: if (optimizeFollowSetCaching) { Collection<Lookahead> cachedFollowSet; if (hasCachedSet(subparser.a.t)) { // Use the cached follow-set. cachedFollowSet = getCachedSet(subparser.a.t); } else { // This token's follow-set is not yet cached, so compute // it. Context T = contextManager.new Context(true); cachedFollowSet = follow(subparser.a.t, T).values(); T.delRef(); // Cache the follow-set. setCachedSet(subparser.a.t, cachedFollowSet); //Remove elements from LRU cache to make room. Don't //forget to clean up BDDs! } // Conjoin the subparser's presence condition with the // follow-set tokens' presence conditions, omitting tokens // with a FALSE presence condition, i.e. trimming // infeasible paths. followSet = new LinkedList<Lookahead>(); for (Lookahead n : cachedFollowSet) { Context and = subparser.context.and(n.c); if (and.isFalse()) { // Omit infeasible paths. and.delRef(); } else { followSet.add(new Lookahead(n.t, and)); } } } else { // Compute the follow set every time. followSet = follow(subparser.a.t, subparser.context).values(); } break; default: // The parser cannot use tokens other than language, // conditional, and eof tokens. throw new UnsupportedOperationException(); } // Apply the parsing context to tokens in the follow set. // This will reclassify tokens and handle implicit // conditionals, e.g. due to C typedef ambiguities. if (actions.hasContext()) { Collection<Lookahead> newTokens = subparser.scope.reclassify(followSet); // Only create a new list when their are new tokens. if (null != newTokens) { Collection<Lookahead> newSet = new LinkedList<Lookahead>(); newSet.addAll(followSet); newSet.addAll(newTokens); followSet = newSet; } } if (followSet.size() == 1) { // Replace subparser's token with the token that has had // parsing context applied to it. subparser.a.c.delRef(); subparser.a = followSet.iterator().next(); // Regular LR. getAction(subparser.a, subparser.s); switch (subparser.a.getAction()) { case SHIFT: shift(subparser); // Move to the next ordinary token or start conditional // (#if). If the next token ends a branch (#elif or // #endif), move to the next ordinary token or start // conditional after the conditional. subparser.a.t = subparser.a.t.getNext(); while (subparser.a.t.syntax.kind() == Kind.CONDITIONAL && subparser.a.t.syntax.toConditional().tag != ConditionalTag.START) { Conditional conditional = subparser.a.t.syntax.toConditional(); switch (conditional.tag()) { case START: // No need to move. break; case NEXT: subparser.a.t = skipConditional(subparser.a.t); break; case END: subparser.a.t = subparser.a.t.getNext(); break; default: // No such conditional tag. throw new UnsupportedOperationException(); } } // We no longer know what the next parsing action is. // The next token could be either an ordinary token or a // conditional. The action can be used for subparser // ordering, so not clearing it can lead to incorrect // results. subparser.a.clearAction(); // Re-add the subparser to the ordered set to update its // position in the set. processedParsers.add(subparser); break; case REDUCE: reduce(subparser); seenReduce = true; // We no longer know what the next parsing action is. // The next token could be either an ordinary token or a // conditional. The action can be used for subparser // ordering, so not clearing it can lead to incorrect // results. subparser.a.clearAction(); // Re-add the subparser to the ordered set to update its // position in the set. processedParsers.add(subparser); break; case ACCEPT: root.add(subparser.context.addRef()); root.add(subparser.s.value); // Clean up subparser scope and context. We will not add // it back to the main set of subparsers. subparser.a.c.delRef(); subparser.context.delRef(); subparser.scope.free(); runtime.errConsole().pln("ACCEPT").flush(); break; case ERROR: // Clean up subparser scope and context. We will not add // it back to the main set of subparsers. subparser.a.c.delRef(); subparser.context.delRef(); subparser.scope.free(); if (showErrors) { runtime.error("parse error on " + (subparser.a.t.syntax.kind() == Kind.EOF ? "EOF" : "\"" + subparser.a.t.syntax + "\"") + " at " + subparser.a.t.syntax.getLocation()); } break; } } else if (followSet.size() > 1) { // A follow set with more than one token implies an // explicit or implicit conditional. // Fork subparsers on the conditional. assert subparser.a.t.syntax.kind() == Kind.LANGUAGE || subparser.a.t.syntax.toConditional().tag() == ConditionalTag.START; // Partition the follow-set. Naive FMLR will not group // any lookaheads, but the shared reductions and lazy // forking optimizations will. Collection<Lookahead> tokenSet = partition(followSet, subparser); // Fork and replace the subparser with newly-forked // subparsers, one for each token in the follow set. processedParsers.addAll(fork(subparser, tokenSet)); // Clean up subparser scope and context. It is removed // from the set of subparsers. subparser.a.c.delRef(); subparser.context.delRef(); subparser.scope.free(); // Hold off on lazy forking until all subparsers are ready // to fork. waitToFork = true; } else { // The follow set should never be less than one. throw new RuntimeException(); } } } // Add the updated subparsers into the priority queue of // subparsers. for (Subparser subparser : processedParsers) { subparsers.add(subparser); } if (subparsers.size() == 0) { // Done break; } // Merge subparsers. We only need check the earliest subparsers for // a merge. merge(subparsers); } if (parserStatistics) { int max = 0; runtime.errConsole().pln(String.format("iterations %d", iterations)); for (Integer size : nsubparsers.keySet()) { if (size > max) max = size; runtime.errConsole().pln(String.format("subparsers %d %d", size, nsubparsers.get(size))); } runtime.errConsole().pln(String.format("max_subparsers %d", max)); runtime.errConsole().pln(String.format("follow %d", nfollow)); runtime.errConsole().pln(String.format("lazy_forks %d %d", lazyForks, lazyForksEmptyBranches)); int empty = 0; for (Integer i : emptyConditionals.keySet()) { if (emptyConditionals.get(i)) { empty++; } } runtime.errConsole().pln(String.format("empty_conditionals %d %d", emptyConditionals.size(), empty)); runtime.errConsole().flush(); } if (actions.hasContext()) { actions.getInitialContext().free(); } return root; } /** * Parse the syntax stream. * * @return The AST. */ public Object parseNaively() throws IOException { // Initialize the first subparser. OrderedSyntax startOfFile = new OrderedSyntax(stream); Subparser firstSubparser = new Subparser(new Lookahead(startOfFile, contextManager.new Context(true)), new StateStack(STARTSTATE, null, null), contextManager.new Context(true), actions.getInitialContext()); firstSubparser.a.t = firstSubparser.a.t.getNext(); // Initialize the set of subparsers. Use the default initial // capacity. PriorityQueue<Subparser> subparsers = new PriorityQueue<Subparser>(11, subparserComparator); subparsers.add(firstSubparser); // Create the common root node in case their are multiple accepted // subparsers. GNode root = GNode.create(CHOICE_NODE_NAME); if (parserStatistics) { // Initialize statistics collection. iterations = 0; nsubparsers = new HashMap<Integer, Integer>(); nfollow = 0; } // The main parsing loop. while (true) { if (killswitch && subparsers.size() >= killswitchCutoff) { if (parserStatistics) { runtime.errConsole().pln(String.format("killswitch_subparsers %d", subparsers.size())).flush(); } throw new RuntimeException(String .format("kill-switch tripped because " + "subparsers reached %d", subparsers.size())); } if (parserStatistics) { // Collect statistics on the number of subparsers. iterations++; if (! nsubparsers.containsKey(subparsers.size())) { nsubparsers.put(subparsers.size(), 0); } nsubparsers.put(subparsers.size(), nsubparsers.get(subparsers.size()) + 1); // System.err.println("subparsers: " + subparsers.size()); // LinkedList<Subparser> bob = new LinkedList<Subparser>(); // while (! subparsers.isEmpty()) { // Subparser p = subparsers.poll(); // System.err.println(p + ": " + p.a + ":" + p.context); // bob.add(p); // } // for (Subparser p : bob) { // subparsers.add(p); // } // System.err.println(); } // Get the earliest token. The list of subparsers is ordered, // so we can get the earliest token from the first subparser in // the list. Each subparser should be on an ordinary token or a // start conditional. OrderedSyntax earliestToken = subparsers.peek().a.t; // System.err.println("earliest: " + earliestToken); assert earliestToken.syntax.kind() == Kind.CONDITIONAL && (earliestToken.syntax.toConditional().tag() == ConditionalTag.START || earliestToken.syntax.toConditional().tag() == ConditionalTag.NEXT || earliestToken.syntax.toConditional().tag() == ConditionalTag.END) || earliestToken.syntax.kind() == Kind.LANGUAGE || earliestToken.syntax.kind() == Kind.EOF; // Prepare a list of processed subparsers to add to the set of // subparsers after the parsing iteration is done LinkedList<Subparser> processedParsers = new LinkedList<Subparser>(); // For all subparsers on the earliest token. Pull each // subparser off of the priority queue and add it to the // processedParsers list. This list is then added back to the // priority queue after processing. while (subparsers.size() > 0 && subparsers.peek().a.t.compare(earliestToken) == 0) { // Pull off the earliest subparser. Subparser subparser = subparsers.poll(); // System.err.println("\n" + subparser + ":" + subparser.a.t.syntax); // Carry out one parsing iteration for one subparser. // Apply the parsing context to tokens in the follow set. // This will reclassify tokens and handle implicit // conditionals, e.g. due to C typedef ambiguities. if (actions.hasContext() && subparser.a.t.syntax.kind() == Kind.LANGUAGE) { // Create new list, add token. Need to fork immediately if // there are new tokens here. LinkedList<Lookahead> set = new LinkedList<Lookahead>(); set.add(subparser.a); Collection<Lookahead> newTokens = subparser.scope.reclassify(set); if (null != newTokens) { // There were new tokens during reclassification. Fork // new subparsers immediately. processedParsers.addAll(fork(subparser, newTokens)); } } if (subparser.a.t.syntax.kind() == Kind.LANGUAGE || subparser.a.t.syntax.kind() == Kind.EOF) { // Regular LR. getAction(subparser.a, subparser.s); switch (subparser.a.getAction()) { case SHIFT: shift(subparser); // Move to the next ordinary token or start conditional // (#if). If the next token ends a branch (#elif or // #endif), move to the next ordinary token or start // conditional after the conditional. subparser.a.t = subparser.a.t.getNext(); while (subparser.a.t.syntax.kind() == Kind.CONDITIONAL && subparser.a.t.syntax.toConditional().tag != ConditionalTag.START) { Conditional conditional = subparser.a.t.syntax.toConditional(); switch (conditional.tag()) { case START: // No need to move. break; case NEXT: subparser.a.t = skipConditional(subparser.a.t); break; case END: subparser.a.t = subparser.a.t.getNext(); break; default: // No such conditional tag. throw new UnsupportedOperationException(); } } // We no longer know what the next parsing action is. // The next token could be either an ordinary token or a // conditional. The action can be used for subparser // ordering, so not clearing it can lead to incorrect // results. subparser.a.clearAction(); // Re-add the subparser to the ordered set to update its // position in the set. processedParsers.add(subparser); break; case REDUCE: reduce(subparser); // We no longer know what the next parsing action is. // The next token could be either an ordinary token or a // conditional. The action can be used for subparser // ordering, so not clearing it can lead to incorrect // results. subparser.a.clearAction(); // Re-add the subparser to the ordered set to update its // position in the set. processedParsers.add(subparser); break; case ACCEPT: root.add(subparser.context.addRef()); root.add(subparser.s.value); // Clean up subparser scope and context. We will not add // it back to the main set of subparsers. subparser.a.c.delRef(); subparser.context.delRef(); subparser.scope.free(); runtime.errConsole().pln("ACCEPT").flush(); break; case ERROR: // Clean up subparser scope and context. We will not add // it back to the main set of subparsers. subparser.a.c.delRef(); subparser.context.delRef(); subparser.scope.free(); if (showErrors) { runtime.error("parse error on " + (subparser.a.t.syntax.kind() == Kind.EOF ? "EOF" : "\"" + subparser.a.t.syntax + "\"") + " at " + subparser.a.t.syntax.getLocation()); } break; } } else if (subparser.a.t.syntax.kind() == Kind.CONDITIONAL) { switch (subparser.a.t.syntax.toConditional().tag()) { case START: // Fork subparsers on the conditional. // Get the first token in each branch of the conditional. // Move to #endif if any branches are empty. Collection<Lookahead> tokenSet = new LinkedList<Lookahead>(); OrderedSyntax a = subparser.a.t; // Save the presence conditions of all empty branches. Context emptyConditions = contextManager.new Context(false); // The union of the branches conditions. Used to determine // whether there is an implicit else branch or not. Context union = a.syntax.toConditional().context.addRef(); // Loop through each branch of the conditional, find the // first token of each. while (! (a.syntax.kind() == Kind.CONDITIONAL && a.syntax.toConditional().tag() == ConditionalTag.END)) { // Get the presence condition of the branch. It is // conjoined with the presence condition in which it is // nested. Context nestedPresenceCondition = subparser.context.and(a.syntax.toConditional().context); // a is on the conditional starting the branch. Get the // first the token after the one starting the branch. It // will either be the first token in the branch, or the // token that ends the branch. a = a.getNext(); if (a.syntax.kind() == Kind.CONDITIONAL && (a.syntax.toConditional().tag() == ConditionalTag.NEXT || a.syntax.toConditional().tag() == ConditionalTag.END)) { // The branch is empty. Context or = emptyConditions.or(nestedPresenceCondition); emptyConditions.delRef(); emptyConditions = or; } else { // We have the first token in a non-empty branch. tokenSet.add(new Lookahead(a, nestedPresenceCondition.addRef())); } nestedPresenceCondition.delRef(); // Skip ahead to the next branch or stop when we find the // #endif while (true) { // If we have reached the next branch or are at the end // of the conditional, then leave the loop. if (a.syntax.kind() == Kind.CONDITIONAL && (a.syntax.toConditional().tag() == ConditionalTag.NEXT || a.syntax.toConditional().tag() == ConditionalTag.END)) { break; } // a is now a language token or a start conditional. // Move to the next token. if (a.syntax.kind() == Kind.CONDITIONAL) { assert a.syntax.toConditional().tag() == ConditionalTag.START || a.syntax.toConditional().tag() == ConditionalTag.NEXT; a = skipConditional(a); } else { a = a.getNext(); } } // We are now on a next branch or an endif. if (a.syntax.kind() == Kind.CONDITIONAL && a.syntax.toConditional().tag() == ConditionalTag.END) { // On an #endif we are done. break; } else { // It must be a NEXT conditional. assert a.syntax.toConditional().tag() == ConditionalTag.NEXT; // Update the union of branch presence conditions. Context newUnion = union.or(a.syntax.toConditional().context); union.delRef(); union = newUnion; } } // Discover whether there is an implicit-else or not. To do // this, we check the union of all branches conditions // against the presence condition (presenceCondition) of the // current subparser. // // union.not() is the condition of the implicit else. If it // is always false under the current presence condition, // then there is no implicit else. Context elseBranch = subparser.context.andNot(union); union.delRef(); if (! elseBranch.isFalse()) { // Save the presence condition of the empty, implicit // else. Context or = emptyConditions.or(elseBranch); emptyConditions.delRef(); emptyConditions = or; } elseBranch.delRef(); if (! emptyConditions.isFalse()) { // We need to fork a subparser that sits on #endif, // because their were empty branches. tokenSet.add(new Lookahead(a, emptyConditions)); } else { emptyConditions.delRef(); } // Fork and replace the subparser with newly-forked // subparsers, one for each token in the follow set. processedParsers.addAll(fork(subparser, tokenSet)); // Clean up subparser scope and context. It is removed // from the set of subparsers. subparser.a.c.delRef(); subparser.context.delRef(); subparser.scope.free(); break; case NEXT: // Move on and reschedule the subparser. subparser.a.t = skipConditional(subparser.a.t); processedParsers.add(subparser); break; case END: // Move on and reschedule the subparser. subparser.a.t = subparser.a.t.getNext(); processedParsers.add(subparser); break; default: // Can only be #if or #endif. throw new RuntimeException(); } } else { // The subparser can only be on LANGUAGE, EOF, and // CONDITIONAL tokens. throw new RuntimeException(); } } // Add the updated subparsers into the priority queue of // subparsers. for (Subparser subparser : processedParsers) { subparsers.add(subparser); } if (subparsers.size() == 0) { // Done break; } // Merge subparsers. We only need check the earliest subparsers for // a merge. merge(subparsers); } if (parserStatistics) { int max = 0; runtime.errConsole().pln(String.format("iterations %d", iterations)); for (Integer size : nsubparsers.keySet()) { if (size > max) max = size; runtime.errConsole().pln(String.format("subparsers %d %d", size, nsubparsers.get(size))); } runtime.errConsole().pln(String.format("max_subparsers %d", max)); runtime.errConsole().pln(String.format("follow %d", nfollow)); runtime.errConsole().flush(); } if (actions.hasContext()) { actions.getInitialContext().free(); } return root; } /** * Find the FOLLOW set of a given token. When the given token is an * ordinary token, the set is just the token alone. But when the * token is a conditional, this method returns the set of ordinary * tokens reachable from this conditional in all configurations. * * @param a The token to find the follow set of. * @param presenceCondition The presence condition of a. This * presence condition will be freed, so pass a reference. * @return The follow set of the given token. */ public Map<Integer, Lookahead> follow(OrderedSyntax a, Context presenceCondition) throws IOException { Map<Integer, Lookahead> result = new HashMap<Integer, Lookahead>(); presenceCondition.addRef(); if (parserStatistics) { nfollow++; } while (true) { // Get the first token of a. Context emptyCondition = first(result, a, presenceCondition); // Update the presence condition to be the condition of the // empty branches. presenceCondition.delRef(); presenceCondition = emptyCondition; // If there are no empty branches, we need not continue. We are // done finding the follow set. if (presenceCondition.isFalse()) { presenceCondition.delRef(); return result; } // Get next token after stepping out of conditionals. do { switch (a.syntax.kind()) { case LANGUAGE: // Get the next token from the input. a = a.getNext(); break; case CONDITIONAL: // Get the next token after the conditional. Conditional conditional = a.syntax.toConditional(); switch (conditional.tag()) { case START: a = skipConditional(a); break; case NEXT: a = skipConditional(a); break; case END: a = a.getNext(); break; default: throw new UnsupportedOperationException(); } break; default: throw new RuntimeException("FMLR only takes language " + "and conditional tokens."); } // Until a does not end a branch. } while (a.syntax.kind() == Kind.CONDITIONAL && (a.syntax.toConditional().tag() == ConditionalTag.NEXT || a.syntax.toConditional().tag() == ConditionalTag.END)); } } /** * Find the FIRST set of a given token. An ordinary token has * itself and only itself in its first set. For a conditional, the * first set has the first token in each branch. If there are empty * branches, it returns the presence condition of the empty * branches, which indicates that the follow set computation needs * to continue populating the follow set. * * This is a helper routine for follow. It assumes conditionals are * well-formed. * * @param result The follow-set found so far. * @param a The token for which to find the first-set. * @param presenceCondition The presence condition of a. * @return The presence condition of empty branches. It is * non-false if the follow method should continue looking for * tokens. */ private Context first(Map<Integer, Lookahead> result, OrderedSyntax a, Context presenceCondition) throws IOException { presenceCondition.addRef(); while (true) { switch (a.syntax.kind()) { case EOF: // Fall through case LANGUAGE: // Add the token to the follow set. If it is already there, // just update the presence condition. if (! result.containsKey(a.getSequenceNumber())) { // Add a new token to the follow-set. result.put(a.getSequenceNumber(), new Lookahead(a, presenceCondition.addRef())); } else { // Update the presence condition. Lookahead n = result.get(a.getSequenceNumber()); Context union = n.c.or(presenceCondition); n.c.delRef(); n.c = union; } presenceCondition.delRef(); return contextManager.new Context(false); case CONDITIONAL: Conditional conditional = a.syntax.toConditional(); switch (conditional.tag()) { case NEXT: // Fall through case END: // If the token ends a branch, i.e. NEXT or END, then we // need to keep looking for a first token after the end of // the conditional. return presenceCondition; case START: // Save the presence conditions of all empty branches. // This is necessary to attribute the correct presence // condition to the first token following a conditional // with empty branches. // // This is equivalent to the "cont" variable in the // FOLLOW/FIRST algorithm description. Context emptyConditions = contextManager.new Context(false); // The union of the branches conditions. Used to determine // whether there is an implicit else branch or not. Context union = a.syntax.toConditional().context.addRef(); // Loop through each branch of the conditional, find the // first token of each. while (! (a.syntax.kind() == Kind.CONDITIONAL && a.syntax.toConditional().tag() == ConditionalTag.END)) { // Get the presence condition of the branch. It is // conjoined with the presence condition in which it is // nested. Context nestedPresenceCondition = presenceCondition.and(a.syntax.toConditional().context); // a is on the conditional starting the branch. Get the // first the token after the one starting the branch. It // will either be the first token in the branch, or the // token that ends the branch. a = a.getNext(); if (nestedPresenceCondition.isFalse()) { // If the nestedPresenceCondition is false, the branch is // an infeasible path, so skip the branch. } else { // Get the first token in the branch if there is one. If // not, then the branch is empty, and we need to find the // first token after the conditional. Context emptyBranch = first(result, a, nestedPresenceCondition); if (! emptyBranch.isFalse()) { // Save the presence condition of the empty branch. Context or = emptyConditions.or(emptyBranch); emptyConditions.delRef(); emptyConditions = or; } emptyBranch.delRef(); } nestedPresenceCondition.delRef(); // Skip ahead to the next branch or stop when we find the // #endif while (true) { // If we have reached the next branch or are at the end // of the conditional, then leave the loop. if (a.syntax.kind() == Kind.CONDITIONAL && (a.syntax.toConditional().tag() == ConditionalTag.NEXT || a.syntax.toConditional().tag() == ConditionalTag.END)) { break; } // a is now a language token or a start conditional. // Move to the next token. if (a.syntax.kind() == Kind.CONDITIONAL) { assert a.syntax.toConditional().tag() == ConditionalTag.START || a.syntax.toConditional().tag() == ConditionalTag.NEXT; a = skipConditional(a); } else { a = a.getNext(); } } // Move to the next branch or #endif. if (a.syntax.kind() == Kind.CONDITIONAL && a.syntax.toConditional().tag() == ConditionalTag.END) { break; } else { // It must be a NEXT conditional. assert a.syntax.toConditional().tag() == ConditionalTag.NEXT; // Update the union of branch presence conditions. Context newUnion = union.or(a.syntax.toConditional().context); union.delRef(); union = newUnion; } } // Discover whether there is an implicit-else or not. To do // this, we check the union of all branches conditions // against the presence condition (presenceCondition) of the // current subparser. // // union.not() is the condition of the implicit else. If it // is always false under the current presence condition, // then there is no implicit else. Context elseBranch = presenceCondition.andNot(union); union.delRef(); if (! elseBranch.isFalse()) { // Save the presence condition of the empty, implicit // else. Context or = emptyConditions.or(elseBranch); emptyConditions.delRef(); emptyConditions = or; } elseBranch.delRef(); // Record whether the conditional is empty. if (parserStatistics) { // Count empty conditionals. The logic below ensures that // if there is at least one time when a conditional is // non-empty, the conditional is not marked as empty. // This is a more conservative way to count empty // conditionals. if (emptyConditions.isFalse()) { // Not empty. emptyConditionals.put(a.getSequenceNumber(), false); } else { // Empty. if (emptyConditionals.containsKey(a.getSequenceNumber())) { emptyConditionals.put(a.getSequenceNumber(), true && emptyConditionals .get(a.getSequenceNumber())); } else { emptyConditionals.put(a.getSequenceNumber(), true); } } } if (emptyConditions.isFalse()) { // No branch is empty. presenceCondition.delRef(); return emptyConditions; } // Set the presence condition to be that of the empty // branches. presenceCondition.delRef(); presenceCondition = emptyConditions; // Move a to the next token after the conditional's #endif. a = a.getNext(); break; default: throw new UnsupportedOperationException(); } break; default: throw new UnsupportedOperationException(); } } } /** * Test whether the conditional token already has a follow-set * computed. * * @param t The conditional. * @return true if the follow-set is cached. */ public boolean hasCachedSet(OrderedSyntax t) { return followCache.containsKey(t.getSequenceNumber()); } /** * Get the cached follow-set for a conditional token. * * @param t The conditional. * @return The follow-set. */ public Collection<Lookahead> getCachedSet(OrderedSyntax t) { return followCache.get(t.getSequenceNumber()); } /** * Cache the conditional token's follow-set. * * @param t The conditional. */ public void setCachedSet(OrderedSyntax t, Collection<Lookahead> follow) { followCache.put(t.getSequenceNumber(), follow); } /** Merge subparsers. The list of subparsers should be ordered. * That way merge only needs to check the earliest subparsers at the * beginning of the list instead of checking all subparsers for a * merge. * * @param subparsers The ordered list of subparsers to check for and * perform merges */ private void merge(PriorityQueue<Subparser> subparsers) { if (subparsers.size() <= 1) return; // The list of earliest subparsers. These are the parsers that we // compare pair-wise for mergeability. xtc.util.LinkedList<Subparser> subset = new xtc.util.LinkedList<Subparser>(); // Get the earliest token. OrderedSyntax earliestToken = subparsers.peek().a.t; // Pull the earliest subparsers off of the main set of subparsers. while (subparsers.size() > 0 && subparsers.peek().a.t.compare(earliestToken) == 0) { subset.add(subparsers.poll()); } // Move through the list of elements (not an iterator, avoiding // concurrent modification), but use an iterator for the inner // loop. xtc.util.LinkedList<Subparser>.Element parserElement = subset.getFirst(); // Check each pair of the earliest subparsers for mergeability. while (true) { Subparser subparser = parserElement.data(); // The list of subparsers to merge into the current // parserElement subparser. It is "null" if there are no // parsers to merge. LinkedList<Subparser> mergedParsers = null; assert subparser.a.t.same(earliestToken); // Check the subparser against all other earliest subparsers. ListIterator<Subparser> iterator = subset.listIterator(0); while (iterator.hasNext()) { Subparser compareParser = iterator.next(); // Can't merge with self. if (subparser == compareParser) continue; assert earliestToken.same(compareParser.a.t); assert subparser.a.t.same(compareParser.a.t); // See if the subparsers can merge. Two subparsers can merge // if the following hold: (1) they are on the same token (we // already know this since only checking earliest subparsers), // (2), the token is classified the same way under the parsing // context, (3) they have the same parsing state, (4) they // have mergeable parsing contexts, and (5) they aren't // pointing to the exact same stack frame, because this means // the subparsers were just forked. boolean sameTokenType = (subparser.a.t.syntax.kind() == Kind.LANGUAGE && subparser.a.t.syntax.toLanguage().tag() == compareParser.a.t.syntax.toLanguage().tag()) || subparser.a.t.syntax.kind() != Kind.LANGUAGE; if (sameTokenType && subparser.scope.mayMerge(compareParser.scope) && subparser.s.isMergeable(compareParser.s) && subparser.s != compareParser.s) { // Save the subparser for later merging. if (null == mergedParsers) { mergedParsers = new LinkedList<Subparser>(); } mergedParsers.addLast(compareParser); // Remove the merged parser from the set of active // subparsers. iterator.remove(); } } if (null != mergedParsers) { // Merge the subparsers. Create a merged subparser with a new // (1) semantic value, (2) presence condition, and (3) parsing // context. The construction of each follows below. As an // optimization, the current parserElement subparser is // replaced in-memory with the new subparser. // (1) Combine their semantic values. This acheived by creating // a new stack fragment that is as deep as the split in the // stack. Each frame in the new stack fragment has a semantic // value that is a conditional containing the semantic values // from each of the merged subparsers' stacks. // Find the distance down the GSS of the highest common // descendent of all subparsers. The GSS is an up-tree // instead of a DAG, which is why the following algorithm will // work. int maxDist = 0; StateStack s = subparser.s; for (Subparser mergedParser : mergedParsers) { int dist = 0; StateStack t = mergedParser.s; while (s != null && s != t) { s = s.next; t = t.next; dist++; } if (dist > maxDist) { maxDist = dist; } } // Create the new AST CHOICE_NODE_NAME nodes to store the // combined semantic values and their presence conditions. // This creates a new stack fragment that duplicates the // current parserElement's stack down maxDist stack element. // Each new stack element contains a CHOICE_NODE_NAME node with // the original semantic value and the subparser's presence // condition. // Duplicate the stack fragment down to maxDist. Replace the // top stack frame for the merged subparser. subparser.s = new StateStack(subparser.s.state, subparser.s.value, subparser.s.next); // Duplicate the rest of the stack. StateStack u = subparser.s; for (int i = 0; i < maxDist - 1; i++) { u.next = new StateStack(u.next.state, u.next.value, u.next.next); u = u.next; } // Combine all the merged parser's semantic values. for (Subparser mergedParser : mergedParsers) { subparser.s.merge(subparser.context, mergedParser.s, mergedParser.context, maxDist); } // (2) Find the logical disjunction of all the merged // subparsers' presence conditions. Context disjunction = subparser.context; for (Subparser mergedParser : mergedParsers) { Context or = disjunction.or(mergedParser.context); disjunction.delRef(); disjunction = or; } // Update the subparser with the new, combined presence // condition. subparser.context = disjunction; subparser.a.c.delRef(); if (! subparser.a.isSet()) { subparser.a.c = disjunction.addRef(); } else { // Replace a lookahead set with just a regular lookahead. // If one or more subparsers already contains a set (due to // shared reductions or lazy forking) then the set contains // the follow-set with the presence conditions only for the // current subparser. We avoid have incorrect presence // conditions by replacing the lookahead with a new one that // isn't a set. Then, the follow-set for the token will be // computed as normal ((LookaheadSet) subparser.a).free(); subparser.a = new Lookahead(subparser.a.t, disjunction.addRef()); } // (3) Use the parsing contexts merge method to construct the // new parsing context. for (Subparser mergedParser : mergedParsers) { subparser.scope.merge(mergedParser.scope); } // Lastly, clean up the merged subparsers memory use. for (Subparser mergedParser : mergedParsers) { mergedParser.a.c.delRef(); if (mergedParser.a.isSet()) ((LookaheadSet) mergedParser.a).free(); mergedParser.context.delRef(); mergedParser.scope.free(); } } if (parserElement.isLast()) break; parserElement = parserElement.next(); } // Add the processed and merged subparsers back to the main set of // subparsers. for (Subparser subparser : subset) { subparsers.add(subparser); } } /** * Fork subparser on a set of tokens. * * @param tokenSet The set of tokens. * @return A collection of the forked subparsers. */ private Collection<Subparser> fork(Subparser subparser, Collection<Lookahead> tokenSet) { LinkedList<Subparser> processedParsers = new LinkedList<Subparser>(); for (Lookahead n : tokenSet) { processedParsers .addLast(new Subparser(n, subparser.s, n.c.addRef(), subparser.scope.fork())); } return processedParsers; } /** * Partition the follow-set by shared reductions and create new * subparsers for the tokens and token-sets resulting from the * partition. * * @param tokenSet The set of tokens. * @param subparser The subparser that is being forked. * @return The partitioned set of next tokens. */ private Collection<Lookahead> partition(Collection<Lookahead> tokenSet, Subparser subparser) { Collection<Lookahead> partition = new LinkedList<Lookahead>(); // Partition the reduces into sets based on what production they // are reducing, i.e. shared reductions. HashMap<Integer, Lookahead> sharedReductions = null; // Partition the shifts into a single set for later lazy forking. Lookahead shifts = null; // Partition the token set. for (Lookahead n : tokenSet) { getAction(n, subparser.s); if (optimizeSharedReductions && ParsingAction.REDUCE == n.getAction()) { // Partition ordinary tokens in the follow set by the // production they are reducing. if (null == sharedReductions) { sharedReductions = new HashMap<Integer, Lookahead>(); } if (sharedReductions.containsKey(n.getActionData())) { Lookahead element = sharedReductions.get(n.getActionData()); LookaheadSet set; // If there was only one token with this reduction // so far, then make a new lookahead set. if (element.isSet()) { set = (LookaheadSet) element; } else { // Create the lookahead set. set = new LookaheadSet(subparser.a.t, element.c.addRef(), element.getAction(), element.getActionData()); set.add(element); // Add the set of lookaheads to the shared reductions. sharedReductions.put(n.getActionData(), set); } // Add the new token to the shared reduction set. set.add(n); // Update the set's presence condition. Context union = set.c.or(n.c); set.c.delRef(); set.c = union; } else { // This is the first token that is reducing this production. sharedReductions.put(n.getActionData(), n); } } else if (optimizeLazyForking && ParsingAction.SHIFT == n.getAction()) { if (null == shifts) { // No tokens have been added yet. shifts = n; } else { LookaheadSet set; if (shifts.isSet()) { // We already have a set. set = (LookaheadSet) shifts; } else { // There is more than one shifting token, but we have only // seen one. Create a new set to house both of them. Lookahead element = shifts; // Create the lookahead set. set = new LookaheadSet(subparser.a.t, element.c.addRef(), element.getAction(), element.getActionData()); set.add(element); shifts = set; } // Add the new token to the set of shifting tokens. set.add(n); // Update the set's presence condition. Context union = set.c.or(n.c); set.c.delRef(); set.c = union; } } else { // Add the ordinary token by itself to the partition. For // naive FMLR, no partitioning is really done; all tokens are // partitioned into their own trivial subsets. partition.add(n); } } // Add the shared reductions to the partition. if (null != sharedReductions) { partition.addAll(sharedReductions.values()); } // Add the shifts to the partition. if (null != shifts) { partition.add(shifts); } // Return the partition. return partition; } /** * Lazily fork the set of shifts. Partition the set of next tokens * into individual tokens for the current conditional and a single * token for the ordinary token or start conditional that follows * the current conditional. * * @param tokenSet The set that is being lazily forked. * @return The partitioned set of tokens. * @throws IOException because it may read tokens from input. * @throws InvalidCastException if subparser.a is not a LookaheadSet * object. * @throws IllegalStateException if any next token does not cause a * shift action. It also is thrown if the lazy forking optimization * is turned off but this method is called. Additionally it is * thrown if tokenSet.t.syntax is not a START conditional. */ private Collection<Lookahead> lazyFork(LookaheadSet tokenSet) throws IOException { if (tokenSet.t.syntax.kind() != Kind.CONDITIONAL || tokenSet.t.syntax.toConditional().tag() != ConditionalTag.START || ! optimizeLazyForking) { throw new IllegalStateException(); } // Get the sequence numbers for the START conditional and the // first token after the END of this conditional. Only the tokens // within this ranged will be forked. The range includes the // START but is exclusive of the after token, i.e. in range // notation: [min, maxExclusive). // FIXME: optimize this by attaching the END sequence number to // the START conditional during the FOLLOW set computation. int min = tokenSet.t.getSequenceNumber(); OrderedSyntax after = skipConditional(tokenSet.t); int maxExclusive = after.getSequenceNumber(); // Move to the next ordinary token or start conditional // (#if). If the next token ends a branch (#elif or // #endif), move to the next ordinary token or start // conditional after the conditional. while (after.syntax.kind() == Kind.CONDITIONAL && after.syntax.toConditional().tag != ConditionalTag.START) { Conditional conditional = after.syntax.toConditional(); switch (conditional.tag()) { case START: // No need to move. break; case NEXT: after = skipConditional(after); break; case END: after = after.getNext(); break; default: // No such conditional tag. throw new UnsupportedOperationException(); } } // Partition the set into two: (1) the _set_ of tokens (even if // there's only one, to ensure it can merge with other subparsers // at that conditional) that fall inside the current conditional // [min, maxExclusive) and (2) the _token_ (ordinary or START // conditional. Make sure that (2), the token, has a presence // condition that is the union of all the remaining tokens for // correctness. Collection<Lookahead> forkedSet = new LinkedList<Lookahead>(); Lookahead remainder = null; for (Lookahead n : tokenSet.set) { if (min < n.t.getSequenceNumber() && n.t.getSequenceNumber() < maxExclusive) { // The token is inside the current conditional. Add to the // set of return tokens. forkedSet.add(n); } else { // The token is not inside the current conditional. Create // the remainder token and union the presence conditions of // the tokens outside the conditional. if (null == remainder) { // This is the first token of the remainder. remainder = new Lookahead(after, n.c.addRef()); } else { // There is more than one token that is not in the current // conditional. Update the remainder's presence condition. Context union = remainder.c.or(n.c); remainder.c.delRef(); remainder.c = union; } // Clean up these tokens. The follow set will be called on // the remainder again anyway. n.c.delRef(); } } // Add the remainder token to the set of return tokens. if (null != remainder) { forkedSet.add(remainder); } // Collect statistics on empty branches. if (parserStatistics) { lazyForks++; if (null != remainder) { lazyForksEmptyBranches++; } } return forkedSet; } /** * Skip an entire conditional block. The given token must be a * conditional start or next token. * * @param a The conditional start or next token. * @return The first token after the end of the conditional. */ private OrderedSyntax skipConditional(OrderedSyntax a) throws IOException { if (a.syntax.toConditional().tag() != ConditionalTag.START && a.syntax.toConditional().tag() != ConditionalTag.NEXT) { throw new RuntimeException("skipConditional must take a " + "start or next conditional token."); } // Check the cache to see if we already found the next token for // this start or next conditional. if (skipConditionalCache.containsKey(a.getSequenceNumber())) { return skipConditionalCache.get(a.getSequenceNumber()); } // Save the input's sequence number for later caching. int sequenceNumber = a.getSequenceNumber(); // Move to the first token after the start of the conditional and // carefully past it, matching #ifs and #endif and keeping track // of other nested conditionals. int nesting = 1; do { a = a.getNext(); if (a.syntax.kind() == Kind.CONDITIONAL) { switch (a.syntax.toConditional().tag()) { case START: nesting++; break; case END: nesting--; break; } } } while (nesting > 0); // Now we are on the #endif. Return the next token after the end // of the conditional. OrderedSyntax returnToken = a.getNext(); // Cache the token. skipConditionalCache.put(sequenceNumber, returnToken); return returnToken; } /** * Shift the subparser. * * @param subparser The subparser to shift. */ private void shift(Subparser subparser) { Lookahead x = subparser.a; Language<? extends LanguageTag> token = x.t.syntax.toLanguage(); int yystate = x.getActionData(); Node node; OrderedSyntax next; if (showActions) { runtime.errConsole().pln("shifting " + token.tag() + "(" + token.getTokenText() + ")").flush(); } // Layout terminals, e.g. punctuation, have no semantic value. // TODO Not implementing layout token hiding yet, because we need // to be able to test using SuperC -printSource. if (Actions.ValueType.LAYOUT == actions.getValueType(token.tag().getID()) && ! runtime.test("printSource")) { // If we are printing the AST as source code, we must retain the // LAYOUT tokens. token = null; } // Push the new state onto the stack. subparser.s = new StateStack(yystate, token, subparser.s); } /** * Reduce the subparser. * * @param subparser The subparser to reduce. */ private void reduce(Subparser subparser) { int production = subparser.a.getActionData(); int yylen = ForkMergeParserTables.yyr2.table[production]; int symbol = ForkMergeParserTables.yyr1.table[production]; String nodeName = ForkMergeParserTables.yytname.table[symbol];; if (showActions) { runtime.errConsole().pln("reducing " + nodeName).flush(); } // Get the semantic values from the stack. StateStack topState = subparser.s; Pair<Object> values = Pair.<Object>empty(); for (int i = 0; i < yylen; i++) { // Don't bother adding null semantic values as children. if (null != topState.value) { values = new Pair<Object>(topState.value, values); } topState = topState.next; } // Get the goto parsing state. int yystate; yystate = ForkMergeParserTables .yypgoto.table[symbol - ForkMergeParserTables.YYNTOKENS] + topState.state; if (0 <= yystate && yystate <= ForkMergeParserTables.YYLAST && ForkMergeParserTables.yycheck.table[yystate] == topState.state) { yystate = ForkMergeParserTables.yytable.table[yystate]; } else { yystate= ForkMergeParserTables .yydefgoto.table[symbol - ForkMergeParserTables.YYNTOKENS]; } // Construct the new semantic value. Actions.ValueType valueType = actions.getValueType(symbol); Object value; switch (valueType) { case ACTION: // Semantic action nonterminals have no semantic value and // should be empty. Not until new implementation of semantic // actions. if (Pair.<Object>empty() == values) { value = null; break; } else { throw new UnsupportedOperationException("semantic actions " + "nonterminals should " + "have no semantic value"); } case LAYOUT: // Layout nonterminals have no semantic values. value = null; break; case PASS_THROUGH: if (Pair.<Object>empty() == values) { value = null; } else if (values.tail() == Pair.<Object>empty()) { // If there is one child, then pass-through. value = values.head(); break; } else { // Pass-through nonterminals only get passed-through when they // have one child. // Fall through to the default case. } case NODE: value = actions.getValue(symbol, nodeName, values); break; case LIST: if (values == Pair.<Object>empty()) { value = null; } else { if (nodeName.equals(((Node) values.head()).getName())) { value = ((Node) values.head()); values = values.tail(); } else { value = GNode.create(nodeName); } for (Object o : values) { GNode conditionalNode = GNode.create(CHOICE_NODE_NAME); conditionalNode.add(subparser.context.addRef()); conditionalNode.add(o); ((Node) value).add(conditionalNode); } } break; default: throw new UnsupportedOperationException("unsupported node type"); } // Push the new state onto the stack. subparser.s = new StateStack(yystate, value, topState); // Dispatch the semantic action if there is one. actions.dispatch(symbol, subparser); } /** * Get the parsing action for a token. This method is adopted from * Bison's parsing algorithm. * * @param x The token to find the parsing action for. * @param s The state to use to find the action. */ private void getAction(Lookahead x, StateStack s) { if (x.t.syntax.kind() == Kind.LANGUAGE || x.t.syntax.kind() == Kind.EOF) { int yyn; int yystate; yystate = s.state; yyn = ForkMergeParserTables.yypact.table[yystate]; if (ForkMergeParserTables.YYPACT_NINF == yyn) { // Decide to reduce without looking at the next token. This // is a Bison thing. yyn = ForkMergeParserTables.yydefact.table[yystate]; if (0 == yyn) { x.setAction(ParsingAction.ERROR, NODEFAULT); } else { x.setAction(ParsingAction.REDUCE, yyn); } } else { // Find the parsing action for the next token. // Get the token's Bison symbol number. int yytoken; if (x.t.syntax.kind() == Kind.EOF) { yytoken = ForkMergeParserTables.YYEOF; } else if (x.t.syntax.kind() == Kind.LANGUAGE) { Language<? extends LanguageTag> token = x.t.syntax.toLanguage(); String str = token.getTokenText(); LanguageTag tokentype = token.tag(); yytoken = token.tag().getID(); } else { yytoken = -1; } // Index into action table, state row + token column. yyn += yytoken; // Lookup the parsing action. if (yyn < 0 || ForkMergeParserTables.YYLAST < yyn || ForkMergeParserTables.yycheck.table[yyn] != yytoken) { yyn = ForkMergeParserTables.yydefact.table[yystate]; if (0 == yyn) { x.setAction(ParsingAction.ERROR, NODEFAULT); } else { x.setAction(ParsingAction.REDUCE, yyn); } } else { yyn = ForkMergeParserTables.yytable.table[yyn]; if (yyn <= 0) { if (0 == yyn || ForkMergeParserTables.YYTABLE_NINF == yyn) { x.setAction(ParsingAction.ERROR, INVALID); } else { yyn = -yyn; x.setAction(ParsingAction.REDUCE, yyn); } } else { yystate = yyn; if (ForkMergeParserTables.YYFINAL == yystate) { x.setAction(ParsingAction.ACCEPT, -1); x.action = ParsingAction.ACCEPT; } else { x.setAction(ParsingAction.SHIFT, yystate); } } } } } else { throw new UnsupportedOperationException("parser does not handle " + "any other tokens besides " + "ordinary and conditional."); } } /** A subparser. */ public static class Subparser { /** The lookahead symbol, either a token or a conditional. **/ public Lookahead a; /** The state stack. */ public StateStack s; /** The presence condition. */ public Context context; /** The C typedef/var symbol table. */ public Actions.Context scope; /** * Create a new subparser. * * @param a The next token. * @param s The active state stack element. * @param context The presence condition. * @param scope The parsing context. */ public Subparser(Lookahead a, StateStack s, Context context, Actions.Context scope) { this.a = a; this.s = s; this.context = context; this.scope = scope; } /** * Get the presence condition of the subparser. * * @return The presence condition. */ public Context getContext() { return context; } } /** A lookahead token. */ public static class Lookahead { /** The token. */ public OrderedSyntax t; /** The context. */ public Context c; /** The parsing action. */ private ParsingAction action; /** The parsing action data. */ private int actionData; /** * Create a new instance. * * @param t The token. * @param c The presence condition. * @param conditional The conditional in which the token is * contained. Used for lazy forking. */ public Lookahead(OrderedSyntax t, Context c) { this.t = t; this.c = c; this.clearAction(); } /** * Set the parsing action for this next token. * * @param action The parsing action. * @param actionData The parsing action data, i.e. the shift * state, reduced production, or the error id. */ public void setAction(ParsingAction action, int actionData) { this.action = action; this.actionData = actionData; } /** * Clear the parsing action after taking the action. */ public void clearAction() { action = ParsingAction.NONE; actionData = -1; } /** * Copy the parsing action from another next token. * * @param The other next token. */ public void copyAction(Lookahead n) { this.setAction(n.getAction(), n.getActionData()); } /** * Get the parsing action. * * @return The parsing action. * @throws IllegalStateException if the parsing action has not * been set yet. */ public ParsingAction getAction() throws IllegalStateException { return action; } /** * Get the parsing action data if there is any. For actions * without data, namely ACCEPT, the behavior of this method is * undefined. * * @return The parsing action data. * @throws IllegalStateException if the parsing action has not * been set yet. */ public int getActionData() throws IllegalStateException { return actionData; } /** * Create a string representation. * * @return The string representation. */ public String toString() { return "(" + t.syntax.toString() + ", " + action + ", " + actionData + ", " + t.getParentConditional() + ", " + c + ")"; } /** * Whether the lookahead is a single token or a set of lookahead * token. Lookahead sets are used to implement shared reductions * and lazy forking. * * @return true if it is a set. */ public boolean isSet() { return false; } } private class LookaheadSet extends Lookahead { public LinkedList<Lookahead> set; public LookaheadSet(OrderedSyntax t, Context c, ParsingAction action, int actionData) { super(t, c); this.setAction(action, actionData); this.set = new LinkedList<Lookahead>(); } /** * Add a new lookahead to this set. * * @param l The new lookahead. */ public void add(Lookahead l) { set.add(l); } public boolean isSet() { return true; } /** Free the BDDs in the set. */ public void free() { for (Lookahead l : set) { l.c.delRef(); } } /** * Create a string representation. * * @return The string representation. */ public String toString() { StringBuilder sb = new StringBuilder(); sb.append(super.toString()); sb.append(":"); sb.append(set); return sb.toString(); } } /** * A syntax object and it's sequence number, buffered in a * linked-list so that multiple subparsers can read tokens from the * same stream. It references a stack of nested conditions in order * to store the sequence number of the conditional in which each * token lies. */ public class OrderedSyntax { /** The syntax */ public final Syntax syntax; /** The stream from which to pull syntax */ private final Stream stream; /** The ordered sequence number */ private int order; /** The next ordered token */ private OrderedSyntax _next; /** * The sequence number of the conditional in which this token * lies. */ private int parentConditional; /** * Create a new ordered syntax object. A call to getNext() on an * OrderedSyntax object created with this constructor will return * the first token from the stream. * * @param stream The stream from which to pull new tokens. */ public OrderedSyntax(Stream stream) { this(null, 0, 0, stream); nestedConditionals.push(0); } /** * Create a new ordered syntax object. * * @param stream The stream from which to pull new tokens. * @param syntax The token. * @param order The sequence number of the token. */ private OrderedSyntax(Syntax syntax, int order, int parentConditional, Stream stream) { this.syntax = syntax; this.order = order; this.parentConditional = parentConditional; this.stream = stream; } /** * Create a copy of an ordered syntax object using a different * syntax object. This is useful for reclassifying tokens due to * parsing context. * * @param newSyntax The new syntax object. * @param old The old OrderedSyntax object to duplicate sequence * and next token information from. * @throws IOException This constructor must call getNext(), which * throws IOException. */ public OrderedSyntax(Syntax newSyntax, OrderedSyntax old) throws IOException { this.stream = old.stream; this.syntax = newSyntax; this.parentConditional = parentConditional; this.order = old.order; // Prime the pump, by reading the next token. This is // necessary, because if both the original and the copy of the // token have "null" as their next token, they will both call // stream.scan(), which will advance the read two tokens ahead. // Really they both want the same next token. old.getNext(); this._next = old._next; } /** * Create a copy of this object using a new Syntax object. * * @param syntax The new syntax object. * @return A new OrderedSyntax object. * @throws IOException because it calls getNext(). */ public OrderedSyntax copy(Syntax syntax) throws IOException { return new OrderedSyntax(syntax, this); } /** * Get the next token from the stream and assign it the next * sequence number. * * @return The next token. */ public OrderedSyntax getNext() throws IOException { if (null == this._next) { this._next = new OrderedSyntax(this.stream.scan(), this.order + 1, nestedConditionals.peek(), this.stream); if (this._next.syntax.kind() == Kind.CONDITIONAL) { switch (this._next.syntax.toConditional().tag()) { case START: nestedConditionals.push(this._next.order); this._next.parentConditional = this._next.order; break; case NEXT: break; case END: nestedConditionals.pop(); break; } } } return _next; } /** * Get the sequence number of this token. The token sequence * numbers are strictly monotonically increasing from the start of * the file to the end. * * @return The sequence number. */ public int getSequenceNumber() { return order; } /** * Get the sequence number of the this token's parent conditional. * This method is undefined when the token is a conditonal itself. * * @return The sequence number of this token's parent conditional. */ public int getParentConditional() { return parentConditional; } /** * Compare another ordered token. * * @param orderedSyntax The other token to compare to. * @return -1 if this token is earlier than the given token, 1 if * this token is later, 0 if this token is neither earlier nor * later. */ public int compare(OrderedSyntax orderedSyntax) { if (this.order < orderedSyntax.order) return -1; else if (this.order > orderedSyntax.order) return 1; else /* if (this.order == orderedSyntax.order) */ return 0; } /** * Test whether another token is the same as this one. * * @return true if they have the same order number. */ public boolean same(OrderedSyntax ordered) { return this.order == ordered.order; } /** * Get the string representation. * * @return The string representation. */ public String toString() { return this.order + ":" + this.syntax.toString() + this.syntax.getClass(); } } /** A frame of the parsing state stack. */ public static class StateStack { /** The state number */ public int state; /** The semantic value. */ public Object value; /** The next state in the stack */ public StateStack next; /** The height of the stack. Maintained internally. */ private int height; /** * Make a new state. * * @param state The state number. * @param value The semantic value. * @param next The next state stack element in the stack. */ public StateStack(int state, Object value, StateStack next) { this.state = state; this.value = value; this.next = next; if (null == next) { height = 1; } else { height = next.height + 1; } } /** * Get the ith state down the stack, "1" returning this state. * * @param i The state to return. * @return The ith state down the stack. */ public StateStack get(int i) { StateStack state; state = this; while (i > 1) { state = state.next; i--; } return state; } /** * Get the height of the stack. * * @return The height. */ public int getHeight() { assert checkHeight() == height; return height; } /** * Check the height of the stack by following the links. * * @return The height. */ private int checkHeight() { int h = 0; StateStack s = this; while (null != s) { h++; s = s.next; } return h; } /** * Recursively merge the semantic values from the given state * stack into this state stack. The number of elements to merge * is controlled by the dist parameter. This method assumes that * this stack's semantic values are already CHOICE_NODE_NAME nodes. * * This version is inefficient in that shared semantics values * from shared stack elements get referenced twice. * * @param other The other state stack. * @param otherContext the context of the other state stack's * semantic value. * @param dist The distance down the stack to merge. */ public void merge(Context thisContext, StateStack other, Context otherContext, int dist) { if (dist == 0) return; int flags = (null != this.value ? 1 : 0) | (null != other.value ? 2 : 0); switch (flags) { case 0: // Both are null. Do nothing. break; case 1: // other.value is null, but this.value is not. There is // nothing to add to this semantic value, so do nothing. break; case 2: // this.value is null, but other.value is not. Create a new // conditional to store the non-null other.value and its // presence condition. GNode conditionalNode = GNode.create(CHOICE_NODE_NAME); conditionalNode.add(otherContext.addRef()); conditionalNode.add(other.value); this.value = conditionalNode; break; case 3: // Neither are null. Add other.value and its presence // condition. if (this.value == other.value) { // Both are already pointing to the same list node. Don't // create a conditional. } else if (! ((Node) this.value).getName().equals(CHOICE_NODE_NAME)) { // Combine the two value, this and other, into a choice // node. GNode cnode = GNode.create(CHOICE_NODE_NAME); cnode.add(thisContext.addRef()); cnode.add(this.value); cnode.add(otherContext.addRef()); cnode.add(other.value); this.value = cnode; } else { // This value is already a conditional, so just add the // other value. ((Node) this.value).add(otherContext.addRef()); ((Node) this.value).add(other.value); } break; } if (this.next != null) { this.next.merge(thisContext, other.next, otherContext, dist - 1); } } /** * Get the string representation. * * @return The string representation. */ public String toString() { return value + ":" + next; } /** * Check whether this parsing state can merge with another. v * * @param other The other parsing state. * @return true if it can merge with this state. */ public boolean isMergeable(StateStack other) { return isMergeable(this, other); } /** * Check whether two parsing state can merge. * * @param s The first parsing state. * @param t The second parsing state. * @return true if they can merge. */ private static boolean isMergeable(StateStack s, StateStack t) { if (s == t) { return true; } else if (s == null || t == null) { return false; } else if (s.state != t.state) { return false; } else { return isMergeable(s.next, t.next); } } } /** * Determine whether a token is parseable. The only parseable * tokens are Language tokens and EOF. * * @param syntax The syntax to test for parseability. * @return true if the syntax is a Language or EOF token. */ public static boolean isParseable(Syntax syntax) { return syntax.kind() == Kind.LANGUAGE || syntax.kind() == Kind.EOF; } }