package scotch.compiler.parser;
import static org.antlr.v4.runtime.Recognizer.EOF;
import static org.hamcrest.Matchers.is;
import static org.junit.Assert.assertThat;
import static scotch.compiler.parser.ScotchLexer.ARROW;
import static scotch.compiler.parser.ScotchLexer.AS;
import static scotch.compiler.parser.ScotchLexer.BACKTICK;
import static scotch.compiler.parser.ScotchLexer.BACKWARDS_ARROW;
import static scotch.compiler.parser.ScotchLexer.BIGINT_LITERAL;
import static scotch.compiler.parser.ScotchLexer.BOOL_LITERAL;
import static scotch.compiler.parser.ScotchLexer.CHARACTER_LITERAL;
import static scotch.compiler.parser.ScotchLexer.CLASS;
import static scotch.compiler.parser.ScotchLexer.CLOSE_CURLY;
import static scotch.compiler.parser.ScotchLexer.CLOSE_PAREN;
import static scotch.compiler.parser.ScotchLexer.CLOSE_SQUARE;
import static scotch.compiler.parser.ScotchLexer.COMMA;
import static scotch.compiler.parser.ScotchLexer.DATA;
import static scotch.compiler.parser.ScotchLexer.DO;
import static scotch.compiler.parser.ScotchLexer.DOT;
import static scotch.compiler.parser.ScotchLexer.DOUBLE_ARROW;
import static scotch.compiler.parser.ScotchLexer.DOUBLE_COLON;
import static scotch.compiler.parser.ScotchLexer.DOUBLE_LITERAL;
import static scotch.compiler.parser.ScotchLexer.ELSE;
import static scotch.compiler.parser.ScotchLexer.EQUALS;
import static scotch.compiler.parser.ScotchLexer.HEX_LITERAL;
import static scotch.compiler.parser.ScotchLexer.ID_TYPE;
import static scotch.compiler.parser.ScotchLexer.ID_VAR;
import static scotch.compiler.parser.ScotchLexer.IF;
import static scotch.compiler.parser.ScotchLexer.INFIX;
import static scotch.compiler.parser.ScotchLexer.INSTANCE;
import static scotch.compiler.parser.ScotchLexer.INT_LITERAL;
import static scotch.compiler.parser.ScotchLexer.LAMBDA;
import static scotch.compiler.parser.ScotchLexer.LEFT;
import static scotch.compiler.parser.ScotchLexer.MODULE;
import static scotch.compiler.parser.ScotchLexer.OCT_LITERAL;
import static scotch.compiler.parser.ScotchLexer.OPEN_CURLY;
import static scotch.compiler.parser.ScotchLexer.OPEN_PAREN;
import static scotch.compiler.parser.ScotchLexer.OPEN_SQUARE;
import static scotch.compiler.parser.ScotchLexer.OPERATOR;
import static scotch.compiler.parser.ScotchLexer.PIPE;
import static scotch.compiler.parser.ScotchLexer.PREFIX;
import static scotch.compiler.parser.ScotchLexer.RIGHT;
import static scotch.compiler.parser.ScotchLexer.SEMICOLON;
import static scotch.compiler.parser.ScotchLexer.STRING_LITERAL;
import static scotch.compiler.parser.ScotchLexer.THEN;
import static scotch.compiler.parser.ScotchLexer.UNIT;
import static scotch.compiler.parser.ScotchLexer.VOCABULARY;
import static scotch.compiler.parser.ScotchLexer.WHERE;
import static scotch.compiler.parser.ScotchLexer.WILDCARD;
import static scotch.util.Pair.pair;
import java.util.ArrayList;
import java.util.List;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.Token;
import org.junit.Rule;
import org.junit.Test;
import org.junit.rules.ExpectedException;
import scotch.util.Pair;
public class ScotchLexerTest {
@Rule
public final ExpectedException exception = ExpectedException.none();
@Test
public void shouldGetInteger() {
shouldLex("1234 bananas!", INT_LITERAL, "1234");
}
@Test
public void shouldGetEof() {
shouldLexAt(2, "1234", EOF, "<EOF>");
}
@Test
public void shouldGetHexadecimalWithLowerCase() {
shouldLex("0xabcdef", HEX_LITERAL);
}
@Test
public void shouldGetHexadecimalWithUpperCase() {
shouldLex("0xABCDEF", HEX_LITERAL);
}
@Test
public void shouldGetHexadecimalWithNumbersAndLetters() {
shouldLex("0x1a3BEf4", HEX_LITERAL);
}
@Test
public void shouldGetOctal() {
shouldLex("07677", OCT_LITERAL);
}
@Test
public void zeroShouldBeIntLiteral() {
shouldLex("0 ", INT_LITERAL, "0");
}
@Test
public void shouldGetIdentifierEqualsIdentifier() {
shouldLex("fruit=bananas", new ArrayList<Pair<Integer, String>>() {{
add(pair(ID_VAR, "fruit"));
add(pair(EQUALS, "="));
add(pair(ID_VAR, "bananas"));
}});
}
@Test
public void shouldGetArrowWithTailAsOperator() {
shouldLex(">->", OPERATOR);
}
@Test
public void shouldGetArrow() {
shouldLex("->", ARROW);
}
@Test
public void shouldGetBindOperator() {
shouldLex(">>=", OPERATOR);
}
@Test
public void shouldGetDoubleArrow() {
shouldLex("=>", DOUBLE_ARROW);
}
@Test
public void shouldGetDoubleArrowWithTailAsOperator() {
shouldLex(">=>", OPERATOR);
}
@Test
public void shouldGetRangeOperator() {
shouldLex("..", OPERATOR);
}
@Test
public void shouldGetDotOperator() {
shouldLex(" . ", OPERATOR, ".");
}
@Test
public void shouldGetDotOperatorWithTabs() {
shouldLex("\t. ", OPERATOR, ".");
}
@Test
public void shouldGetSemicolon() {
shouldLex(";", SEMICOLON);
}
@Test
public void shouldGetDot() {
shouldLex(".", DOT);
}
@Test
public void shouldGetIdVar() {
shouldLex("a", ID_VAR);
}
@Test
public void shouldGetIdType() {
shouldLex("Map", ID_TYPE);
}
@Test
public void shouldGetUnicodeDoubleArrow() {
shouldLex("⇒", DOUBLE_ARROW);
}
@Test
public void shouldGetUnicodeArrow() {
shouldLex("→", ARROW);
}
@Test
public void shouldGetDoubleColon() {
shouldLex("::", DOUBLE_COLON);
}
@Test
public void shouldGetUnicodeDoubleColon() {
shouldLex("∷", DOUBLE_COLON);
}
@Test
public void shouldGetLambdaSlash() {
shouldLex("\\", LAMBDA);
}
@Test
public void shouldGetUnicodeLambda() {
shouldLex("λ", LAMBDA);
}
@Test
public void shouldGetBackwardsArrow() {
shouldLex("<-", BACKWARDS_ARROW);
}
@Test
public void shouldGetUnicodeBackwardsArrow() {
shouldLex("←", BACKWARDS_ARROW);
}
@Test
public void shouldGetWhere() {
shouldLex("where", WHERE);
}
@Test
public void shouldGetDo() {
shouldLex("do", DO);
}
@Test
public void shouldGetOpenParen() {
shouldLex("(", OPEN_PAREN);
}
@Test
public void shouldGetCloseParen() {
shouldLex(")", CLOSE_PAREN);
}
@Test
public void shouldGetOpenSquare() {
shouldLex("[", OPEN_SQUARE);
}
@Test
public void shouldGetCloseSquare() {
shouldLex("]", CLOSE_SQUARE);
}
@Test
public void shouldGetOpenCurly() {
shouldLex("{", OPEN_CURLY);
}
@Test
public void shouldGetCloseCurly() {
shouldLex("}", CLOSE_CURLY);
}
@Test
public void shouldGetComma() {
shouldLex(",", COMMA);
}
@Test
public void shouldGetWildcard() {
shouldLex("_ ", WILDCARD, "_");
}
@Test
public void shouldGetAs() {
shouldLex("as bananas", AS, "as");
}
@Test
public void shouldGetIdVarWithTrailingBang() {
shouldLex("var!", ID_VAR);
}
@Test
public void shouldGetIdVarWithTrailingWat() {
shouldLex("var?", ID_VAR);
}
@Test
public void shouldGetIdVarWithTrailingBangAndWat() {
shouldLex("var!?", ID_VAR);
}
@Test
public void shouldGetIdVarWithTrailingQuote() {
shouldLex("var'", ID_VAR);
}
@Test
public void shouldGetIdVarWithTrailingBangAndQuote() {
shouldLex("var!'", ID_VAR);
}
@Test
public void shouldGetModule() {
shouldLex("module", MODULE);
}
@Test
public void shouldGetLeft() {
shouldLex("left infix", LEFT, "left");
}
@Test
public void shouldGetRight() {
shouldLex("right infix", RIGHT, "right");
}
@Test
public void shouldGetInfix() {
shouldLex("infix 4", INFIX, "infix");
}
@Test
public void shouldGetPrefix() {
shouldLex("prefix 3", PREFIX, "prefix");
}
@Test
public void shouldGetBacktick() {
shouldLex("`back", BACKTICK, "`");
}
@Test
public void shouldGetIf() {
shouldLex("if True", IF, "if");
}
@Test
public void shouldGetThen() {
shouldLex("then branch", THEN, "then");
}
@Test
public void shouldGetElse() {
shouldLex("else branch", ELSE, "else");
}
@Test
public void shouldGetString() {
shouldLex("\"toast\" <= string", STRING_LITERAL, "\"toast\"");
}
@Test
public void shouldGetStringWithEscape() {
shouldLex("\"toast\\n\" <= string with escape", STRING_LITERAL, "\"toast\\n\"");
}
@Test
public void shouldGetStringWithSingleQuotes() {
shouldLex("\"'toast'\" <= single quoted toast", STRING_LITERAL, "\"'toast'\"");
}
@Test
public void shouldGetStringWithOctalEscape() {
shouldLex("\"\\033\" <= octal escape", STRING_LITERAL, "\"\\033\"");
}
@Test
public void shouldGetStringWithUnicodeEscape() {
shouldLex("\"\\u00FA\" <= unicode escape", STRING_LITERAL, "\"\\u00FA\"");
}
@Test
public void shouldGetCharacter() {
shouldLex("'a' <= char", CHARACTER_LITERAL, "'a'");
}
@Test
public void shouldGetCharacterWithUnicodeEscape() {
shouldLex("'\\uFA00' <= unicode escape", CHARACTER_LITERAL, "'\\uFA00'");
}
@Test
public void shouldGetCharacterWithEscape() {
shouldLex("'\\t' <= escape", CHARACTER_LITERAL, "'\\t'");
}
@Test
public void shouldGetCharacterWithOctalEscape() {
shouldLex("'\\045' <= octal escape", CHARACTER_LITERAL, "'\\045\'");
}
@Test
public void unterminatedStringShouldCauseError() {
exception.expect(LexTestException.class);
exception.expectMessage("token recognition error at: '\"oops\\n'");
lex("\"oops\n").nextToken();
}
@Test
public void unterminatedCharacterShouldCauseError() {
exception.expect(LexTestException.class);
exception.expectMessage("token recognition error at: ''a\\n'");
lex("'a\n").nextToken();
}
@Test
public void overlongCharacterShouldCauseError() {
exception.expect(LexTestException.class);
exception.expectMessage("token recognition error at: ''ab'");
lex("'ab\n").nextToken();
}
@Test
public void shouldGetPipe() {
shouldLex("| pipe", PIPE, "|");
}
@Test
public void shouldGetNilList() {
shouldLex("[] <= nil list", ID_TYPE, "[]");
}
@Test
public void shouldGetUnit() {
shouldLex("() <= unit", UNIT, "()");
}
@Test
public void shouldGetClass() {
shouldLex("class <= keyword", CLASS, "class");
}
@Test
public void shouldGetInstance() {
shouldLex("instance <= keyword", INSTANCE, "instance");
}
@Test
public void shouldGetSingleColonAsType() {
shouldLex(": <= cons list", ID_TYPE, ":");
}
@Test
public void shouldGetData() {
shouldLex("data <= keyword", DATA, "data");
}
@Test
public void shouldGetTrue() {
shouldLex("True <= boolean", BOOL_LITERAL, "True");
}
@Test
public void shouldGetFalse() {
shouldLex("False <= boolean", BOOL_LITERAL, "False");
}
@Test
public void shouldGetDoubleLiteral() {
shouldLex("1.2 <= double", DOUBLE_LITERAL, "1.2");
}
@Test
public void shouldGetDoubleLiteralWithoutLeadingDigits() {
shouldLexAt(1, "id .2 <= double", DOUBLE_LITERAL, ".2");
}
@Test
public void shouldGetBigIntLiteral() {
shouldLex("123B <= bigInt", BIGINT_LITERAL, "123B");
}
private ScotchLexer lex(String... input) {
ScotchLexer lexer = new ScotchLexer(new ANTLRInputStream(String.join("\n", input)));
lexer.removeErrorListeners();
lexer.addErrorListener(new BaseErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, int line, int charPositionInLine, String msg, RecognitionException e) {
throw new LexTestException(msg);
}
});
return lexer;
}
private void shouldHaveToken(Token token, int type, String text) {
assertThat(VOCABULARY.getSymbolicName(token.getType()), is(VOCABULARY.getSymbolicName(type)));
assertThat(token.getText(), is(text));
}
private void shouldLex(String text, int type) {
shouldLex(text, type, text);
}
private void shouldLex(String input, List<Pair<Integer, String>> tokens) {
ScotchLexer lexer = lex(input);
tokens.forEach(pair -> pair.into((type, text) -> {
Token token = lexer.nextToken();
shouldHaveToken(token, type, text);
return null;
}));
}
private void shouldLex(String input, int type, String text) {
shouldLexAt(0, input, type, text);
}
private void shouldLexAt(int offset, String input, int type, String text) {
ScotchLexer lexer = lex(input);
for (int i = 0; i < offset; i++) {
lexer.nextToken();
}
Token token = lexer.nextToken();
shouldHaveToken(token, type, text);
}
private static final class LexTestException extends RuntimeException {
public LexTestException(String message) {
super(message);
}
}
}