package me.tomassetti.parser.antlr;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.ATNConfigSet;
import org.antlr.v4.runtime.dfa.DFA;
import org.junit.Test;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.List;
import java.util.stream.Collectors;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;
public class TurinLexerTest {
private void verifyError(String code) throws IOException {
InputStream stream = new ByteArrayInputStream(code.getBytes(StandardCharsets.UTF_8));
final List<Integer> errors = new ArrayList<>();
CharStream charStream = new ANTLRInputStream(stream);
TurinLexer turinLexer = new TurinLexer(charStream);
turinLexer.removeErrorListeners();
turinLexer.addErrorListener(new ANTLRErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object o, int i, int i1, String s, RecognitionException e) {
errors.add(i);
}
@Override
public void reportAmbiguity(Parser parser, DFA dfa, int i, int i1, boolean b, BitSet bitSet, ATNConfigSet atnConfigSet) {
}
@Override
public void reportAttemptingFullContext(Parser parser, DFA dfa, int i, int i1, BitSet bitSet, ATNConfigSet atnConfigSet) {
}
@Override
public void reportContextSensitivity(Parser parser, DFA dfa, int i, int i1, int i2, ATNConfigSet atnConfigSet) {
}
});
List<Token> tokens = new ArrayList<>();
Token token;
while ((token = turinLexer.nextToken()).getType() != -1) {
if (token.getType() == TurinLexer.UNEXPECTED_CHAR) {
errors.add(TurinLexer.UNEXPECTED_CHAR);
}
tokens.add(token);
}
assertTrue(errors.size() > 0);
}
private void verifyModeIsNotInitial(String code) throws IOException {
InputStream stream = new ByteArrayInputStream(code.getBytes(StandardCharsets.UTF_8));
CharStream charStream = new ANTLRInputStream(stream);
TurinLexer turinLexer = new TurinLexer(charStream);
turinLexer.removeErrorListeners();
turinLexer.addErrorListener(new ANTLRErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object o, int i, int i1, String s, RecognitionException e) {
throw new RuntimeException("Syntax error found "+ i + " " + i1 + " "+s+" "+e.getMessage());
}
@Override
public void reportAmbiguity(Parser parser, DFA dfa, int i, int i1, boolean b, BitSet bitSet, ATNConfigSet atnConfigSet) {
}
@Override
public void reportAttemptingFullContext(Parser parser, DFA dfa, int i, int i1, BitSet bitSet, ATNConfigSet atnConfigSet) {
}
@Override
public void reportContextSensitivity(Parser parser, DFA dfa, int i, int i1, int i2, ATNConfigSet atnConfigSet) {
}
});
List<Token> tokens = new ArrayList<>();
Token token;
while ((token = turinLexer.nextToken()).getType() != -1) {
tokens.add(token);
}
assertTrue(turinLexer._mode != 0);
}
private List<Token> parseCode(String code) throws IOException {
InputStream stream = new ByteArrayInputStream(code.getBytes(StandardCharsets.UTF_8));
CharStream charStream = new ANTLRInputStream(stream);
TurinLexer turinLexer = new TurinLexer(charStream);
turinLexer.removeErrorListeners();
turinLexer.addErrorListener(new ANTLRErrorListener() {
@Override
public void syntaxError(Recognizer<?, ?> recognizer, Object o, int i, int i1, String s, RecognitionException e) {
throw new RuntimeException("Syntax error found "+ i + " " + i1 + " "+s+" "+e.getMessage());
}
@Override
public void reportAmbiguity(Parser parser, DFA dfa, int i, int i1, boolean b, BitSet bitSet, ATNConfigSet atnConfigSet) {
}
@Override
public void reportAttemptingFullContext(Parser parser, DFA dfa, int i, int i1, BitSet bitSet, ATNConfigSet atnConfigSet) {
}
@Override
public void reportContextSensitivity(Parser parser, DFA dfa, int i, int i1, int i2, ATNConfigSet atnConfigSet) {
}
});
List<Token> tokens = new ArrayList<>();
Token token;
while ((token = turinLexer.nextToken()).getType() != -1) {
if (token.getChannel() == 0) {
tokens.add(token);
}
}
return tokens;
}
private List<Integer> getTokenTypes(String code) throws IOException {
return parseCode(code).stream().map((t)->t.getType()).collect(Collectors.toList());
}
private void verify(String code, Integer... tokenTypes) throws IOException {
assertEquals(Arrays.asList(tokenTypes), getTokenTypes(code));
}
@Test
public void parseLineComment() throws IOException {
String code = " // hi! \n an_id";
verify(code, TurinLexer.NL, TurinLexer.VALUE_ID);
}
@Test
public void parsePlaceholder() throws IOException {
String code = "_";
verify(code, TurinLexer.PLACEHOLDER);
}
@Test
public void parseNamePlaceholder() throws IOException {
String code = "_name";
verify(code, TurinLexer.NAME_PLACEHOLDER);
}
@Test
public void parsePlaceholderInStringInterpolation() throws IOException {
String code = "\"#{_}\"";
verify(code, TurinLexer.STRING_START, TurinLexer.INTERPOLATION_START, TurinLexer.PLACEHOLDER, TurinLexer.INTERPOLATION_END, TurinLexer.STRING_STOP);
}
@Test
public void parseNamePlaceholderInStringInterpolation() throws IOException {
String code = "\"#{_name}\"";
verify(code, TurinLexer.STRING_START, TurinLexer.INTERPOLATION_START, TurinLexer.NAME_PLACEHOLDER, TurinLexer.INTERPOLATION_END, TurinLexer.STRING_STOP);
}
@Test
public void parseBasicKeywords() throws IOException {
String code = "namespace property val has type program";
verify(code, TurinLexer.NAMESPACE_KW, TurinLexer.PROPERTY_KW, TurinLexer.VAL_KW, TurinLexer.HAS_KW, TurinLexer.TYPE_KW, TurinLexer.PROGRAM_KW);
}
@Test
public void parseModifierKeywords() throws IOException {
String code = "abstract shared";
verify(code, TurinLexer.ABSTRACT_KW, TurinLexer.SHARED_KW);
}
@Test
public void parseIDs() throws IOException {
String code = "foo f122___ a___FOO";
verify(code, TurinLexer.VALUE_ID, TurinLexer.VALUE_ID, TurinLexer.VALUE_ID);
}
@Test
public void parseTIDs() throws IOException {
String code = "Foo F122___ A___foo";
verify(code, TurinLexer.TYPE_ID, TurinLexer.TYPE_ID, TurinLexer.TYPE_ID);
}
@Test
public void parseIDsAndTIDsStartingWithUnderscore() throws IOException {
String code = "_a __B";
verify(code, TurinLexer.VALUE_ID, TurinLexer.TYPE_ID);
}
@Test
public void parseSimpleString() throws IOException {
String code = "\"Hello!\"";
verify(code, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT, TurinLexer.STRING_STOP);
}
@Test
public void parseStringWithStringEndEscape() throws IOException {
String code = "\"Hel\\\"lo!\"";
verify(code, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT, TurinLexer.ESCAPE_SEQUENCE,
TurinLexer.STRING_CONTENT, TurinLexer.STRING_STOP);
}
@Test
public void parseStringWithUnclosedInterpolationInTheMiddleWithErrors() throws IOException {
String code = "\"Hel#{lo!\"";
verifyError(code);
}
@Test
public void parseStringWithUnclosedInterpolationAtTheEndWithErrors() throws IOException {
String code = "\"Hello!#{\"";
verifyModeIsNotInitial(code);
}
@Test
public void parseStringWithUnknownEscapeSequenceWithErrors() throws IOException {
String code = "\"\\z\"";
verifyError(code);
}
@Test
public void parseStringWithEmptyInterpolation() throws IOException {
String code = "\"Hel#{}lo!\"";
verify(code, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT, TurinLexer.INTERPOLATION_START, TurinLexer.INTERPOLATION_END, TurinLexer.STRING_CONTENT, TurinLexer.STRING_STOP);
}
@Test
public void parseStringWithInterpolationContainingID() throws IOException {
String code = "\"Hel#{foo}lo!\"";
verify(code, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT, TurinLexer.INTERPOLATION_START,
TurinLexer.VALUE_ID,
TurinLexer.INTERPOLATION_END, TurinLexer.STRING_CONTENT, TurinLexer.STRING_STOP);
}
@Test
public void parseStringWithSharpSymbol() throws IOException {
String code = "\"Hel#lo!\"";
verify(code, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT, TurinLexer.STRING_STOP);
}
@Test
public void parseMethodDefinitionWithExpressionBody() throws IOException {
String code = "void toString() = \"foo\"";
verify(code, TurinLexer.VOID_KW, TurinLexer.VALUE_ID, TurinLexer.LPAREN, TurinLexer.RPAREN, TurinLexer.ASSIGNMENT, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT, TurinLexer.STRING_STOP);
}
@Test
public void parseZero() throws IOException {
String code = "0";
verify(code, TurinLexer.INT);
}
@Test
public void parseMinusZero() throws IOException {
String code = "-0";
verify(code, TurinLexer.INT);
}
@Test
public void parsePositive() throws IOException {
String code = "38";
verify(code, TurinLexer.INT);
}
@Test
public void parseNegative() throws IOException {
String code = "-19";
verify(code, TurinLexer.INT);
}
@Test
public void parseEscapeSequence() throws IOException {
String code = "\"\\n\"";
verify(code, TurinLexer.STRING_START, TurinLexer.ESCAPE_SEQUENCE, TurinLexer.STRING_STOP);
}
@Test
public void parseSeveralEscapeSequences() throws IOException {
String code = "\"class A\\n{\\n}\\n\"";
verify(code, TurinLexer.STRING_START, TurinLexer.STRING_CONTENT,
TurinLexer.ESCAPE_SEQUENCE, TurinLexer.STRING_CONTENT,
TurinLexer.ESCAPE_SEQUENCE, TurinLexer.STRING_CONTENT,
TurinLexer.ESCAPE_SEQUENCE,
TurinLexer.STRING_STOP);
}
}