package scotch.compiler.parser; import static org.antlr.v4.runtime.Recognizer.EOF; import static org.hamcrest.Matchers.is; import static org.junit.Assert.assertThat; import static scotch.compiler.parser.ScotchLexer.ARROW; import static scotch.compiler.parser.ScotchLexer.AS; import static scotch.compiler.parser.ScotchLexer.BACKTICK; import static scotch.compiler.parser.ScotchLexer.BACKWARDS_ARROW; import static scotch.compiler.parser.ScotchLexer.BIGINT_LITERAL; import static scotch.compiler.parser.ScotchLexer.BOOL_LITERAL; import static scotch.compiler.parser.ScotchLexer.CHARACTER_LITERAL; import static scotch.compiler.parser.ScotchLexer.CLASS; import static scotch.compiler.parser.ScotchLexer.CLOSE_CURLY; import static scotch.compiler.parser.ScotchLexer.CLOSE_PAREN; import static scotch.compiler.parser.ScotchLexer.CLOSE_SQUARE; import static scotch.compiler.parser.ScotchLexer.COMMA; import static scotch.compiler.parser.ScotchLexer.DATA; import static scotch.compiler.parser.ScotchLexer.DO; import static scotch.compiler.parser.ScotchLexer.DOT; import static scotch.compiler.parser.ScotchLexer.DOUBLE_ARROW; import static scotch.compiler.parser.ScotchLexer.DOUBLE_COLON; import static scotch.compiler.parser.ScotchLexer.DOUBLE_LITERAL; import static scotch.compiler.parser.ScotchLexer.ELSE; import static scotch.compiler.parser.ScotchLexer.EQUALS; import static scotch.compiler.parser.ScotchLexer.HEX_LITERAL; import static scotch.compiler.parser.ScotchLexer.ID_TYPE; import static scotch.compiler.parser.ScotchLexer.ID_VAR; import static scotch.compiler.parser.ScotchLexer.IF; import static scotch.compiler.parser.ScotchLexer.INFIX; import static scotch.compiler.parser.ScotchLexer.INSTANCE; import static scotch.compiler.parser.ScotchLexer.INT_LITERAL; import static scotch.compiler.parser.ScotchLexer.LAMBDA; import static scotch.compiler.parser.ScotchLexer.LEFT; import static scotch.compiler.parser.ScotchLexer.MODULE; import static scotch.compiler.parser.ScotchLexer.OCT_LITERAL; import static scotch.compiler.parser.ScotchLexer.OPEN_CURLY; import static scotch.compiler.parser.ScotchLexer.OPEN_PAREN; import static scotch.compiler.parser.ScotchLexer.OPEN_SQUARE; import static scotch.compiler.parser.ScotchLexer.OPERATOR; import static scotch.compiler.parser.ScotchLexer.PIPE; import static scotch.compiler.parser.ScotchLexer.PREFIX; import static scotch.compiler.parser.ScotchLexer.RIGHT; import static scotch.compiler.parser.ScotchLexer.SEMICOLON; import static scotch.compiler.parser.ScotchLexer.STRING_LITERAL; import static scotch.compiler.parser.ScotchLexer.THEN; import static scotch.compiler.parser.ScotchLexer.UNIT; import static scotch.compiler.parser.ScotchLexer.VOCABULARY; import static scotch.compiler.parser.ScotchLexer.WHERE; import static scotch.compiler.parser.ScotchLexer.WILDCARD; import static scotch.util.Pair.pair; import java.util.ArrayList; import java.util.List; import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.BaseErrorListener; import org.antlr.v4.runtime.RecognitionException; import org.antlr.v4.runtime.Recognizer; import org.antlr.v4.runtime.Token; import org.junit.Rule; import org.junit.Test; import org.junit.rules.ExpectedException; import scotch.util.Pair; public class ScotchLexerTest { @Rule public final ExpectedException exception = ExpectedException.none(); @Test public void shouldGetInteger() { shouldLex("1234 bananas!", INT_LITERAL, "1234"); } @Test public void shouldGetEof() { shouldLexAt(2, "1234", EOF, "<EOF>"); } @Test public void shouldGetHexadecimalWithLowerCase() { shouldLex("0xabcdef", HEX_LITERAL); } @Test public void shouldGetHexadecimalWithUpperCase() { shouldLex("0xABCDEF", HEX_LITERAL); } @Test public void shouldGetHexadecimalWithNumbersAndLetters() { shouldLex("0x1a3BEf4", HEX_LITERAL); } @Test public void shouldGetOctal() { shouldLex("07677", OCT_LITERAL); } @Test public void zeroShouldBeIntLiteral() { shouldLex("0 ", INT_LITERAL, "0"); } @Test public void shouldGetIdentifierEqualsIdentifier() { shouldLex("fruit=bananas", new ArrayList<Pair<Integer, String>>() {{ add(pair(ID_VAR, "fruit")); add(pair(EQUALS, "=")); add(pair(ID_VAR, "bananas")); }}); } @Test public void shouldGetArrowWithTailAsOperator() { shouldLex(">->", OPERATOR); } @Test public void shouldGetArrow() { shouldLex("->", ARROW); } @Test public void shouldGetBindOperator() { shouldLex(">>=", OPERATOR); } @Test public void shouldGetDoubleArrow() { shouldLex("=>", DOUBLE_ARROW); } @Test public void shouldGetDoubleArrowWithTailAsOperator() { shouldLex(">=>", OPERATOR); } @Test public void shouldGetRangeOperator() { shouldLex("..", OPERATOR); } @Test public void shouldGetDotOperator() { shouldLex(" . ", OPERATOR, "."); } @Test public void shouldGetDotOperatorWithTabs() { shouldLex("\t. ", OPERATOR, "."); } @Test public void shouldGetSemicolon() { shouldLex(";", SEMICOLON); } @Test public void shouldGetDot() { shouldLex(".", DOT); } @Test public void shouldGetIdVar() { shouldLex("a", ID_VAR); } @Test public void shouldGetIdType() { shouldLex("Map", ID_TYPE); } @Test public void shouldGetUnicodeDoubleArrow() { shouldLex("⇒", DOUBLE_ARROW); } @Test public void shouldGetUnicodeArrow() { shouldLex("→", ARROW); } @Test public void shouldGetDoubleColon() { shouldLex("::", DOUBLE_COLON); } @Test public void shouldGetUnicodeDoubleColon() { shouldLex("∷", DOUBLE_COLON); } @Test public void shouldGetLambdaSlash() { shouldLex("\\", LAMBDA); } @Test public void shouldGetUnicodeLambda() { shouldLex("λ", LAMBDA); } @Test public void shouldGetBackwardsArrow() { shouldLex("<-", BACKWARDS_ARROW); } @Test public void shouldGetUnicodeBackwardsArrow() { shouldLex("←", BACKWARDS_ARROW); } @Test public void shouldGetWhere() { shouldLex("where", WHERE); } @Test public void shouldGetDo() { shouldLex("do", DO); } @Test public void shouldGetOpenParen() { shouldLex("(", OPEN_PAREN); } @Test public void shouldGetCloseParen() { shouldLex(")", CLOSE_PAREN); } @Test public void shouldGetOpenSquare() { shouldLex("[", OPEN_SQUARE); } @Test public void shouldGetCloseSquare() { shouldLex("]", CLOSE_SQUARE); } @Test public void shouldGetOpenCurly() { shouldLex("{", OPEN_CURLY); } @Test public void shouldGetCloseCurly() { shouldLex("}", CLOSE_CURLY); } @Test public void shouldGetComma() { shouldLex(",", COMMA); } @Test public void shouldGetWildcard() { shouldLex("_ ", WILDCARD, "_"); } @Test public void shouldGetAs() { shouldLex("as bananas", AS, "as"); } @Test public void shouldGetIdVarWithTrailingBang() { shouldLex("var!", ID_VAR); } @Test public void shouldGetIdVarWithTrailingWat() { shouldLex("var?", ID_VAR); } @Test public void shouldGetIdVarWithTrailingBangAndWat() { shouldLex("var!?", ID_VAR); } @Test public void shouldGetIdVarWithTrailingQuote() { shouldLex("var'", ID_VAR); } @Test public void shouldGetIdVarWithTrailingBangAndQuote() { shouldLex("var!'", ID_VAR); } @Test public void shouldGetModule() { shouldLex("module", MODULE); } @Test public void shouldGetLeft() { shouldLex("left infix", LEFT, "left"); } @Test public void shouldGetRight() { shouldLex("right infix", RIGHT, "right"); } @Test public void shouldGetInfix() { shouldLex("infix 4", INFIX, "infix"); } @Test public void shouldGetPrefix() { shouldLex("prefix 3", PREFIX, "prefix"); } @Test public void shouldGetBacktick() { shouldLex("`back", BACKTICK, "`"); } @Test public void shouldGetIf() { shouldLex("if True", IF, "if"); } @Test public void shouldGetThen() { shouldLex("then branch", THEN, "then"); } @Test public void shouldGetElse() { shouldLex("else branch", ELSE, "else"); } @Test public void shouldGetString() { shouldLex("\"toast\" <= string", STRING_LITERAL, "\"toast\""); } @Test public void shouldGetStringWithEscape() { shouldLex("\"toast\\n\" <= string with escape", STRING_LITERAL, "\"toast\\n\""); } @Test public void shouldGetStringWithSingleQuotes() { shouldLex("\"'toast'\" <= single quoted toast", STRING_LITERAL, "\"'toast'\""); } @Test public void shouldGetStringWithOctalEscape() { shouldLex("\"\\033\" <= octal escape", STRING_LITERAL, "\"\\033\""); } @Test public void shouldGetStringWithUnicodeEscape() { shouldLex("\"\\u00FA\" <= unicode escape", STRING_LITERAL, "\"\\u00FA\""); } @Test public void shouldGetCharacter() { shouldLex("'a' <= char", CHARACTER_LITERAL, "'a'"); } @Test public void shouldGetCharacterWithUnicodeEscape() { shouldLex("'\\uFA00' <= unicode escape", CHARACTER_LITERAL, "'\\uFA00'"); } @Test public void shouldGetCharacterWithEscape() { shouldLex("'\\t' <= escape", CHARACTER_LITERAL, "'\\t'"); } @Test public void shouldGetCharacterWithOctalEscape() { shouldLex("'\\045' <= octal escape", CHARACTER_LITERAL, "'\\045\'"); } @Test public void unterminatedStringShouldCauseError() { exception.expect(LexTestException.class); exception.expectMessage("token recognition error at: '\"oops\\n'"); lex("\"oops\n").nextToken(); } @Test public void unterminatedCharacterShouldCauseError() { exception.expect(LexTestException.class); exception.expectMessage("token recognition error at: ''a\\n'"); lex("'a\n").nextToken(); } @Test public void overlongCharacterShouldCauseError() { exception.expect(LexTestException.class); exception.expectMessage("token recognition error at: ''ab'"); lex("'ab\n").nextToken(); } @Test public void shouldGetPipe() { shouldLex("| pipe", PIPE, "|"); } @Test public void shouldGetNilList() { shouldLex("[] <= nil list", ID_TYPE, "[]"); } @Test public void shouldGetUnit() { shouldLex("() <= unit", UNIT, "()"); } @Test public void shouldGetClass() { shouldLex("class <= keyword", CLASS, "class"); } @Test public void shouldGetInstance() { shouldLex("instance <= keyword", INSTANCE, "instance"); } @Test public void shouldGetSingleColonAsType() { shouldLex(": <= cons list", ID_TYPE, ":"); } @Test public void shouldGetData() { shouldLex("data <= keyword", DATA, "data"); } @Test public void shouldGetTrue() { shouldLex("True <= boolean", BOOL_LITERAL, "True"); } @Test public void shouldGetFalse() { shouldLex("False <= boolean", BOOL_LITERAL, "False"); } @Test public void shouldGetDoubleLiteral() { shouldLex("1.2 <= double", DOUBLE_LITERAL, "1.2"); } @Test public void shouldGetDoubleLiteralWithoutLeadingDigits() { shouldLexAt(1, "id .2 <= double", DOUBLE_LITERAL, ".2"); } @Test public void shouldGetBigIntLiteral() { shouldLex("123B <= bigInt", BIGINT_LITERAL, "123B"); } private ScotchLexer lex(String... input) { ScotchLexer lexer = new ScotchLexer(new ANTLRInputStream(String.join("\n", input))); lexer.removeErrorListeners(); lexer.addErrorListener(new BaseErrorListener() { @Override public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) { throw new LexTestException(msg); } }); return lexer; } private void shouldHaveToken(Token token, int type, String text) { assertThat(VOCABULARY.getSymbolicName(token.getType()), is(VOCABULARY.getSymbolicName(type))); assertThat(token.getText(), is(text)); } private void shouldLex(String text, int type) { shouldLex(text, type, text); } private void shouldLex(String input, List<Pair<Integer, String>> tokens) { ScotchLexer lexer = lex(input); tokens.forEach(pair -> pair.into((type, text) -> { Token token = lexer.nextToken(); shouldHaveToken(token, type, text); return null; })); } private void shouldLex(String input, int type, String text) { shouldLexAt(0, input, type, text); } private void shouldLexAt(int offset, String input, int type, String text) { ScotchLexer lexer = lex(input); for (int i = 0; i < offset; i++) { lexer.nextToken(); } Token token = lexer.nextToken(); shouldHaveToken(token, type, text); } private static final class LexTestException extends RuntimeException { public LexTestException(String message) { super(message); } } }