/* * Copyright (c) 2012-2017 The ANTLR Project. All rights reserved. * Use of this file is governed by the BSD 3-clause license that * can be found in the LICENSE.txt file in the project root. */ package org.antlr.v4.automata; import org.antlr.v4.runtime.atn.ATN; import org.antlr.v4.runtime.atn.ATNState; import org.antlr.v4.tool.ast.ActionAST; import org.antlr.v4.tool.ast.BlockAST; import org.antlr.v4.tool.ast.GrammarAST; import org.antlr.v4.tool.ast.PredAST; import org.antlr.v4.tool.ast.TerminalAST; import java.util.List; public interface ATNFactory { /** A pair of states pointing to the left/right (start and end) states of a * state submachine. Used to build ATNs. */ public static class Handle { public ATNState left; public ATNState right; public Handle(ATNState left, ATNState right) { this.left = left; this.right = right; } @Override public String toString() { return "("+left+","+right+")"; } } ATN createATN(); void setCurrentRuleName(String name); void setCurrentOuterAlt(int alt); Handle rule(GrammarAST ruleAST, String name, Handle blk); ATNState newState(); Handle label(Handle t); Handle listLabel(Handle t); Handle tokenRef(TerminalAST node); Handle set(GrammarAST associatedAST, List<GrammarAST> alts, boolean invert); Handle charSetLiteral(GrammarAST charSetAST); Handle range(GrammarAST a, GrammarAST b); /** For a non-lexer, just build a simple token reference atom. * For a lexer, a string is a sequence of char to match. That is, * "fog" is treated as 'f' 'o' 'g' not as a single transition in * the DFA. Machine== o-'f'->o-'o'->o-'g'->o and has n+1 states * for n characters. */ Handle stringLiteral(TerminalAST stringLiteralAST); /** For reference to rule r, build * * o-e->(r) o * * where (r) is the start of rule r and the trailing o is not linked * to from rule ref state directly (it's done thru the transition(0) * RuleClosureTransition. * * If the rule r is just a list of tokens, it's block will be just * a set on an edge o->o->o-set->o->o->o, could inline it rather than doing * the rule reference, but i'm not doing this yet as I'm not sure * it would help much in the ATN->DFA construction. * * TODO add to codegen: collapse alt blks that are sets into single matchSet * @param node */ Handle ruleRef(GrammarAST node); /** From an empty alternative build Grip o-e->o */ Handle epsilon(GrammarAST node); /** Build what amounts to an epsilon transition with a semantic * predicate action. The pred is a pointer into the AST of * the SEMPRED token. */ Handle sempred(PredAST pred); /** Build what amounts to an epsilon transition with an action. * The action goes into ATN though it is ignored during analysis. */ Handle action(ActionAST action); Handle action(String action); Handle alt(List<Handle> els); /** From A|B|..|Z alternative block build * * o->o-A->o->o (last ATNState is blockEndATNState pointed to by all alts) * | ^ * o->o-B->o--| * | | * ... | * | | * o->o-Z->o--| * * So every alternative gets begin ATNState connected by epsilon * and every alt right side points at a block end ATNState. There is a * new ATNState in the ATNState in the Grip for each alt plus one for the * end ATNState. * * Special case: only one alternative: don't make a block with alt * begin/end. * * Special case: if just a list of tokens/chars/sets, then collapse * to a single edge'd o-set->o graph. * * Set alt number (1..n) in the left-Transition ATNState. */ Handle block(BlockAST blockAST, GrammarAST ebnfRoot, List<Handle> alternativeGrips); // Handle notBlock(GrammarAST blockAST, Handle set); /** From (A)? build either: * * o--A->o * | ^ * o---->| * * or, if A is a block, just add an empty alt to the end of the block */ Handle optional(GrammarAST optAST, Handle blk); /** From (A)+ build * * |---| (Transition 2 from A.right points at alt 1) * v | (follow of loop is Transition 1) * o->o-A-o->o * * Meaning that the last ATNState in A points back to A's left Transition ATNState * and we add a new begin/end ATNState. A can be single alternative or * multiple. * * During analysis we'll call the follow link (transition 1) alt n+1 for * an n-alt A block. */ Handle plus(GrammarAST plusAST, Handle blk); /** From (A)* build * * |---| * v | * o->o-A-o--o (Transition 2 from block end points at alt 1; follow is Transition 1) * | ^ * o---------| (optional branch is 2nd alt of optional block containing A+) * * Meaning that the last (end) ATNState in A points back to A's * left side ATNState and we add 3 new ATNStates (the * optional branch is built just like an optional subrule). * See the Aplus() method for more on the loop back Transition. * The new node on right edge is set to RIGHT_EDGE_OF_CLOSURE so we * can detect nested (A*)* loops and insert an extra node. Previously, * two blocks shared same EOB node. * * There are 2 or 3 decision points in a A*. If A is not a block (i.e., * it only has one alt), then there are two decisions: the optional bypass * and then loopback. If A is a block of alts, then there are three * decisions: bypass, loopback, and A's decision point. * * Note that the optional bypass must be outside the loop as (A|B)* is * not the same thing as (A|B|)+. * * This is an accurate ATN representation of the meaning of (A)*, but * for generating code, I don't need a DFA for the optional branch by * virtue of how I generate code. The exit-loopback-branch decision * is sufficient to let me make an appropriate enter, exit, loop * determination. See codegen.g */ Handle star(GrammarAST starAST, Handle blk); /** Build an atom with all possible values in its label */ Handle wildcard(GrammarAST associatedAST); Handle lexerAltCommands(Handle alt, Handle cmds); Handle lexerCallCommand(GrammarAST ID, GrammarAST arg); Handle lexerCommand(GrammarAST ID); }