package grammar.java;
import static grammar.GrammarDSL.*;
import grammar.Expression;
/**
* This class defines the lexical structure of the Java language, i.e. how to
* form the tokens that would be used in a context-free grammar (CFG). In a PEG
* grammar, the rules for token formation can be included in the grammar itself.
*
* Normally Java parses a source file as follows:
*
* <pre>
* - conversion of unicode escape (\\uXXXX) into unicode characters
* - stripping of white space and comments
* - tokenization (using longest match)
* </pre>
*
* Currently, unicode escapes are not recognized.
*
* This class contains both grammar rules and helper functions, that notably
* help with whitespace handling.
*
* @see JavaGrammar
*/
public class _A_Lexical
{
//----------------------------------------------------------------------------
// Helpers
//----------------------------------------------------------------------------
/****************************************************************************/
protected Expression spaced(Expression expr)
{
return seq(expr, spacing);
}
/****************************************************************************/
protected Expression literal(Expression expr)
{
return spaced(atomic(expr));
}
/****************************************************************************/
protected Expression operator(String str)
{
return rule(spaced(str(str)));
}
/****************************************************************************/
protected Expression operator(String str, String notChars)
{
return rule(spaced(atomic(seq(
str(str), not(chars(notChars))))));
}
/****************************************************************************/
protected Expression keyword(Expression expr)
{
return rule(spaced(atomic(
seq(expr, not(letterOrDigit)))));
}
/****************************************************************************/
protected Expression keyword(String keyword)
{
return keyword(str(keyword));
}
//----------------------------------------------------------------------------
// Commonly Used Subexpressions
//----------------------------------------------------------------------------
public final Expression underscore = chars("_");
public final Expression floatSuffix = chars("fFdD");
public final Expression zeroSeven = range('0', '7');
public final Expression zeroNine = range('0', '9');
//----------------------------------------------------------------------------
// JLS 3.3 (Unicode Escapes)
//----------------------------------------------------------------------------
/****************************************************************************/
public final Expression hexDigit = rule(
range('a', 'f'),
range('A', 'F'),
zeroNine
);
/****************************************************************************/
public final Expression whiteSpace = rule(plus(chars(" \t\r\n\f")));
/****************************************************************************/
public final Expression multiLineComment = rule_seq(
str("/*"),
until(any, str("*/"))
);
/****************************************************************************/
public final Expression singleLineComment = rule_seq(
str("//"),
until(any, chars("\r\n"))
);
//----------------------------------------------------------------------------
// JLS 3.6 (Spacing) & JLS 3.7 (Comments)
//----------------------------------------------------------------------------
/*****************************************************************************
* Optional spacing. Spacing is never mandatory in java, but some tokens
* cannot be parsed if not separated by spacing (e.g. classSpacing vs class
* Spacing).
*/
public final Expression spacing = rule(star(
choice(atomic(plus(whiteSpace)), multiLineComment, singleLineComment)));
/*****************************************************************************
* Atomic spacing.
*/
public final Expression aspacing = atomic(rule(spacing));
/*****************************************************************************
* Mandatory spacing (for use in macro's syntactic specification).
*/
public final Expression fspacing = atomic(rule(plus(
choice(whiteSpace, multiLineComment, singleLineComment))));
/*****************************************************************************
* Atomic mandatory spacing.
*/
public final Expression afspacing = atomic(rule(fspacing));
//----------------------------------------------------------------------------
// JLS 3.8 (Identifiers) (1: letters and digits)
//----------------------------------------------------------------------------
/* These are traditional definitions of letters and digits. JLS defines
* letters and digits as Unicode characters recognized as such by special Java
* procedures, which is difficult to express in terms of Parsing Expressions. */
//----------------------------------------------------------------------------
/****************************************************************************/
public final Expression letter = rule(
range('a', 'z'),
range('A', 'Z'),
chars("_$")
);
/****************************************************************************/
public final Expression letterOrDigit = rule(letter, range('0', '9'));
//----------------------------------------------------------------------------
// JLS 3.10.2 (Boolean Literals)
//----------------------------------------------------------------------------
public final Expression _true = keyword("true");
public final Expression _false = keyword("false");
public final Expression booleanLiteral = rule(_true, _false);
//----------------------------------------------------------------------------
// JLS 3.10.7 (The Null Literal)
//----------------------------------------------------------------------------
public final Expression _null = keyword("null");
//----------------------------------------------------------------------------
// JLS 3.9 (Keywords)
//----------------------------------------------------------------------------
/* More precisely: reserved words. According to JLS, "true", "false", and
* "null" are technically not keywords - but still must not appear as
* identifiers. Keywords "const" and "goto" are not used; JLS explains the
* reason. We distinguish primitive types from other keywords. */
//----------------------------------------------------------------------------
public final Expression _boolean = keyword("boolean");
public final Expression _byte = keyword("byte");
public final Expression _char = keyword("char");
public final Expression _double = keyword("double");
public final Expression _float = keyword("float");
public final Expression _int = keyword("int");
public final Expression _long = keyword("long");
/****************************************************************************/
public final Expression primitiveType = rule(
_boolean, _byte, _char, _double, _float, _int, _long);
/*****************************************************************************
* Java keywords which are not primitive types, to be reused in other rules.
* To use keyword in grammar rules, use the fields of this class named
* "_<keyword_name>" (e.g. "_public").
*
* goto and const are not used by Java but are reserved for the compiler to
* produce better error message if those C/C++ keywords are used.
*/
public final Expression lexNonTypeKeyword = keyword(choice(
str("abstract"), str("assert"),
str("break"), str("case"),
str("catch"), str("class"),
str("const"), str("continue"),
str("default"), str("do"),
str("else"), str("enum"),
str("extends"), str("finally"),
str("final"), str("for"),
str("goto"), str("if"),
str("implements"), str("import"),
str("interface"), str("instanceof"),
str("native"), str("new"),
str("package"), str("private"),
str("protected"), str("public"),
str("return"), str("static"),
str("strictfp"), str("super"),
str("switch"), str("synchronized"),
str("this"), str("throws"),
str("throw"), str("transient"),
str("try"), str("void"),
str("volatile"), str("while")
));
/*****************************************************************************
* Words that represent primitive values, to be reused in other rules.
*/
public final Expression lexLiteralWord = rule(_true, _false, _null);
/*****************************************************************************
* Reserved words comprise keywords (primitive type names, other keywords) and
* literal words. A Java identifier cannot be the same as a reserved word.
*/
public final Expression keyword = atomic(rule(
primitiveType,
lexNonTypeKeyword,
lexLiteralWord
));
//-------------------------------------------------------------------------
// JLS 3.8 (Identifiers) (2: identifiers)
//-------------------------------------------------------------------------
public final Expression identifier = rule(literal(seq(
not(keyword),
letter,
star(letterOrDigit)
)));
//-------------------------------------------------------------------------
// JLS 3.10.1 (Integer Literals)
//-------------------------------------------------------------------------
/****************************************************************************/
public final Expression hexDigits = rule(list(star(underscore), hexDigit));
/****************************************************************************/
public final Expression hexNumeral = rule_seq(
choice(str("0x"), str("0X")), hexDigits);
/****************************************************************************/
public final Expression binaryNumeral = rule_seq(
choice(str("0b"), str("0B")),
list(underscore, chars("01"))
);
/****************************************************************************/
public final Expression octalNumeral = rule_seq(
str("0"), plus(star(underscore), zeroSeven));
/****************************************************************************/
public final Expression decimalNumeral = rule(
str("0"),
seq(
range('1', '9'),
star(star(underscore), zeroNine)
)
);
/****************************************************************************/
public final Expression integerLiteral = rule(literal(seq(
choice(
hexNumeral,
binaryNumeral,
octalNumeral,
decimalNumeral // May be a prefix of all above ("0")
),
opt(chars("lL"))
)));
//----------------------------------------------------------------------------
// Integer parsable by @link{Integer#parseInt(String, int)}.
// /!\ This does NOT include trailing whitespace.
//----------------------------------------------------------------------------
public final Expression parseIntNumber = rule(seq(
opt(choice(ref("plus"), ref("minus"))),
plus(zeroNine)
));
//----------------------------------------------------------------------------
// JLS 3.10.2 (Floating-Point Literals)
//----------------------------------------------------------------------------
/****************************************************************************/
public final Expression hexSignificand = rule(
seq(
choice(str("0x"), str("0X")),
opt(hexDigits),
str("."),
hexDigits
),
seq(hexNumeral, opt(str("."))) // May be a prefix of above
);
/****************************************************************************/
public final Expression digits = rule(list(star(underscore), zeroNine));
/****************************************************************************/
public final Expression binaryExponent = rule_seq(
chars("pP"), opt(chars("+\\-")), digits);
/****************************************************************************/
public final Expression hexFloat = rule_seq(
hexSignificand, binaryExponent, opt(floatSuffix));
/****************************************************************************/
public final Expression exponent = rule_seq(
chars("eE"), opt(chars("+-")), digits);
/****************************************************************************/
public final Expression decimalFloat = rule(
seq(digits, str("."), opt(digits), opt(exponent), opt(floatSuffix)),
seq(str("."), digits, opt(exponent), opt(floatSuffix)),
seq(digits, exponent, opt(floatSuffix)),
seq(digits, opt(exponent), floatSuffix)
);
/****************************************************************************/
public final Expression floatLiteral = rule(
literal(choice(hexFloat, decimalFloat)));
//----------------------------------------------------------------------------
// JLS 3.10.6 (Escape Sequences for Character and String Literals)
//----------------------------------------------------------------------------
/****************************************************************************/
public final Expression octalEscape = rule(
seq(range('0', '3'), zeroSeven, zeroSeven),
seq(zeroSeven, zeroSeven),
zeroSeven
);
/****************************************************************************/
public final Expression escape = rule_seq(
str("\\"),
choice(chars("btnfr\"'\\"), octalEscape)
);
//----------------------------------------------------------------------------
// JLS 3.10.4 (Character Literals)
//----------------------------------------------------------------------------
public final Expression charLiteralNoQuotes = rule(
escape,
seq(not(chars("'\\\n\r")), any)
);
public final Expression charLiteral = rule(literal(seq(
str("'"), charLiteralNoQuotes, str("'"))));
//----------------------------------------------------------------------------
// JLS 3.10.5 (String Literals)
//----------------------------------------------------------------------------
public final Expression stringLiteralContent = atomic(rule(star(choice(
escape,
seq(not(chars("\"\\\n\r")), any)
))));
public final Expression stringLiteral = rule(literal(seq(
str("\""), stringLiteralContent, str("\"") )));
//----------------------------------------------------------------------------
// JLS 3.10.0 (Literals)
//----------------------------------------------------------------------------
public final Expression literal = rule(
charLiteral,
floatLiteral,
integerLiteral,
stringLiteral,
booleanLiteral,
_null
);
//----------------------------------------------------------------------------
// JLS 3.11 (Separators)
//----------------------------------------------------------------------------
// lAnBra/rAnBra means left/right angle bracket
// lcuBra/rCuBra means left/right curly brace
// lPar/rPar means left/right parenthesis
// lSqBra/rSqBra means left/right square bracket
//----------------------------------------------------------------------------
public final Expression lAnBra = operator("<");
public final Expression lCuBra = operator("{");
public final Expression lPar = operator("(");
public final Expression lSqBra = operator("[");
public final Expression rAnBra = operator(">");
public final Expression rCuBra = operator("}");
public final Expression rPar = operator(")");
public final Expression rSqBra = operator("]");
//----------------------------------------------------------------------------
// JLS 3.12 (Operators) (1: Arithmetic Operators)
//----------------------------------------------------------------------------
public final Expression plus = operator("+", "=+");
public final Expression minus = operator("-", "=-");
public final Expression star = operator("*", "=");
public final Expression slash = operator("/", "=");
public final Expression percent = operator("%", "=");
public final Expression plusEq = operator("+=");
public final Expression minusEq = operator("-=");
public final Expression starEq = operator("*=");
public final Expression slashEq = operator("/=");
public final Expression modEq = operator("%=");
public final Expression plusPlus = operator("++");
public final Expression minusMinus = operator("--");
//----------------------------------------------------------------------------
// JLS 3.12 (Operators) (2: Binary Operators)
//----------------------------------------------------------------------------
/* sl, sr and bsr mean "shift right", "shift left" and "binary shift right" */
//----------------------------------------------------------------------------
public final Expression pipe = operator("|", "=|");
public final Expression and = operator("&", "=&");
public final Expression hat = operator("^", "=");
public final Expression hatEq = operator("^=");
public final Expression sl = operator("<<", "=");
public final Expression sr = operator(">>", "=>");
public final Expression bsr = operator(">>>", "=");
public final Expression pipeEq = operator("|=");
public final Expression andEq = operator("&=");
public final Expression slEq = operator("<<=");
public final Expression srEq = operator(">>=");
public final Expression bsrEq = operator(">>>=");
public final Expression tilde = operator("~");
//----------------------------------------------------------------------------
// JLS 3.12 (Operators) (3: Logic Operators)
//----------------------------------------------------------------------------
public final Expression bang = operator("!", "=");
public final Expression andAnd = operator("&&");
public final Expression orOr = operator("||");
//----------------------------------------------------------------------------
// JLS 3.12 (Operators) (4: Comparison Operators)
//----------------------------------------------------------------------------
/* ge, gt, le, lt mean greater and equal, greater than, lower and equal, .. */
//----------------------------------------------------------------------------
public final Expression eqEq = operator("==");
public final Expression notEq = operator("!=");
public final Expression ge = operator(">=");
public final Expression gt = operator(">", "=>");
public final Expression le = operator("<=");
public final Expression lt = operator("<", "=>");
//----------------------------------------------------------------------------
// JLS 3.12 (Operators) (5: Other Operators)
//----------------------------------------------------------------------------
public final Expression at = operator("@");
public final Expression colon = operator(":");
public final Expression comma = operator(",");
public final Expression dot = operator(".");
public final Expression ellipsis = operator("...");
public final Expression eq = operator("=", "=");
public final Expression qMark = operator("?");
public final Expression semi = operator(";");
//----------------------------------------------------------------------------
// Keywords
//----------------------------------------------------------------------------
/* goto and const are not used by Java but are reserved for the compiler to
* produce better error message if those C/C++ keywords are used. */
//----------------------------------------------------------------------------
public final Expression _abstract = keyword("abstract");
public final Expression _assert = keyword("assert");
public final Expression _break = keyword("break");
public final Expression _case = keyword("case");
public final Expression _catch = keyword("catch");
public final Expression _class = keyword("class");
public final Expression _continue = keyword("continue");
public final Expression _default = keyword("default");
public final Expression _do = keyword("do");
public final Expression _else = keyword("else");
public final Expression _enum = keyword("enum");
public final Expression _extends = keyword("extends");
public final Expression _finally = keyword("finally");
public final Expression _final = keyword("final");
public final Expression _for = keyword("for");
public final Expression _if = keyword("if");
public final Expression _implements = keyword("implements");
public final Expression _import = keyword("import");
public final Expression _interface = keyword("interface");
public final Expression _instanceof = keyword("instanceof");
public final Expression _native = keyword("native");
public final Expression _new = keyword("new");
public final Expression _package = keyword("package");
public final Expression _private = keyword("private");
public final Expression _protected = keyword("protected");
public final Expression _public = keyword("public");
public final Expression _return = keyword("return");
public final Expression _static = keyword("static");
public final Expression _strictfp = keyword("strictfp");
public final Expression _super = keyword("super");
public final Expression _switch = keyword("switch");
public final Expression _synchronized = keyword("synchronized");
public final Expression _this = keyword("this");
public final Expression _throw = keyword("throw");
public final Expression _throws = keyword("throws");
public final Expression _transient = keyword("transient");
public final Expression _try = keyword("try");
public final Expression _void = keyword("void");
public final Expression _volatile = keyword("volatile");
public final Expression _while = keyword("while");
}