package org.fenixedu.bennu.core.groups;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.IntStream;
import org.fenixedu.bennu.core.domain.exceptions.BennuCoreDomainException;
import org.fenixedu.bennu.core.domain.exceptions.DomainException;
final class GroupParser {
/**
* Parses the given expression, returning its corresponding {@link Group}.
*
* @param expression
* The expression to parse
* @return The group corresponding to the given expression
* @throws DomainException
* If the given expression is invalid
* @throws NullPointerException
* If the given expression is {@code null}.
*/
public static Group parse(String expression) {
return new GroupParser(expression.toCharArray()).toGroup();
}
/*
* Internal implementation
*
* All the methods that implement the old ANTLR constructs are properly annotated, with the grammar fragment that
* created them.
*
* The parser keeps a cursor (currentPos) to the expression string, which is used when looking for tokens.
*/
// The expression to parse
private final char[] chars;
// The position that is currently being looked at
private int currentPos = 0;
private GroupParser(char[] chars) {
this.chars = chars;
}
/*
* parse: expression EOF
*/
private Group toGroup() {
Group group = expression();
if (!eof()) {
throw groupParsingException("<EOF>", new String(chars, currentPos, chars.length - currentPos));
}
return group;
}
/*
* expression: minus
*/
private Group expression() {
return minus();
}
/*
* minus: or ('-' or)*
*/
private Group minus() {
Group group = or();
while (consumeIfMatches('-')) {
group = group.minus(or());
}
return group;
}
/*
* or: and ('|' and)*
*/
private Group or() {
Group group = and();
while (consumeIfMatches('|')) {
group = group.or(and());
}
return group;
}
/*
* and: not ('&' not)*
*/
private Group and() {
Group group = not();
while (consumeIfMatches('&')) {
group = group.and(not());
}
return group;
}
/*
* not: '!' atom | atom
*/
private Group not() {
if (consumeIfMatches('!')) {
return atom().not();
} else {
return atom();
}
}
/*
* atom: '#' IDENTIFIER | '(' expression ')' | function
*
* An atom is either a reference to a dynamic group, an expression surrounded by parentheses, or a function.
*/
private Group atom() {
if (consumeIfMatches('#')) {
return Group.dynamic(identifier());
} else if (consumeIfMatches('(')) {
Group group = expression();
consume(')');
return group;
} else {
return function();
}
}
/*
* function: IDENTIFIER ('(' argument (',' argument)* ')')?
*
* A function is an identifier, optionally followed by an arbitrary number of arguments.
*/
private Group function() {
String name = identifier();
Map<String, List<String>> arguments = null;
if (consumeIfMatches('(')) {
arguments = new HashMap<>();
argument(arguments);
while (consumeIfMatches(',')) {
argument(arguments);
}
consume(')');
}
return CustomGroupRegistry.parse(name, arguments);
}
/*
* argument: (IDENTIFIER '=')? (value | '[' (value (',' value)*)? ']')
*/
private void argument(Map<String, List<String>> arguments) {
String name = "";
if (hasArgumentName()) {
name = identifier();
consume('=');
}
if (consumeIfMatches('[')) {
argumentList(name, arguments);
} else {
arguments.computeIfAbsent(name, k -> new ArrayList<>()).add(value());
}
}
/*
* argumentList: (value (',' value)*)?
*
* Assumes that the '[' character has already been consumed
*/
private void argumentList(String name, Map<String, List<String>> arguments) {
// Argument list may be empty, so we cannot simply try and get a value
if (!consumeIfMatches(']')) {
arguments.computeIfAbsent(name, k -> new ArrayList<>()).add(value());
while (consumeIfMatches(',')) {
arguments.computeIfAbsent(name, k -> new ArrayList<>()).add(value());
}
consume(']');
}
}
/*
* value: IDENTIFIER | STRING
*/
private String value() {
if (consumeIfMatches('\'')) {
return string();
} else {
return identifier();
}
}
/*
* Terminal Operators
*/
/*
* IDENTIFIER: ('a'..'z'|'A'..'Z'|'_'|'0'..'9')+
*
* Retrieves all characters in the stream, until a non-identifier character or EOF is reached.
*
* Throws an exception if EOF was reached before actually reading anything
*/
private String identifier() {
int cursor = currentPos;
while (!eof(cursor)) {
char c = chars[cursor];
if (isIdentifierChar(c)) {
cursor++;
} else {
break;
}
}
if (currentPos == cursor) {
throw groupParsingException("<IDENTIFIER>", eof() ? "<EOF>" : String.valueOf(chars[currentPos]));
}
String value = new String(chars, currentPos, cursor - currentPos);
currentPos = cursor;
return value;
}
/*
* A string is an arbitrary sequence of characters surrounded by single quotes. To add a single quote to the string itself,
* one may simply add the sequence "\'".
*
* Throws an exception if EOF was reached before seeing the end quote.
*
* Assumes that the '\'' character has already been consumed.
*/
private String string() {
StringBuilder builder = new StringBuilder();
while (!eof()) {
// Read a character
char c = chars[currentPos];
currentPos++;
// If we found a '\', check if the following character is a single quote
if (c == '\\' && !eof(currentPos) && chars[currentPos] == '\'') {
builder.append('\'');
currentPos++;
} else if (c != '\'') {
// Append any non-single-quote character to the result
builder.append(c);
} else {
// Return upon finding the single quote
return builder.toString();
}
}
throw groupParsingException("'", "<EOF>");
}
/*
* Returns whether the next tokens represent a valid argument name (i.e., an identifier followed by a '=')
*/
private boolean hasArgumentName() {
consumeWhitespace();
int i = currentPos;
// Look up an identifier
while (!eof(i)) {
if (isIdentifierChar(chars[i])) {
i++;
} else {
break;
}
}
// Clear whitespace in between
while (!eof(i)) {
if (isWhitespace(chars[i])) {
i++;
} else {
break;
}
}
// Check if the last character is '='
return !eof(i) && chars[i] == '=';
}
/*
* Helper methods
*/
/*
* Determines whether the given string constitutes a valid Identifier
*/
static boolean isValidIdentifier(String value) {
if (value.isEmpty()) {
return false;
}
for (char c : value.toCharArray()) {
if (!isIdentifierChar(c)) {
return false;
}
}
return true;
}
/*
* Determines whether the given character is part of a valid identifier
*/
private static boolean isIdentifierChar(char c) {
return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || (c >= '0' && c <= '9') || c == '_';
}
private boolean isWhitespace(char c) {
return c == ' ' || c == '\t' || c == '\n' || c == '\r';
}
/*
* Consumes the next non-whitespace token, if it matches the provided one.
*
* Returns whether there was a match. Returns false if EOF was reached.
*/
private boolean consumeIfMatches(char token) {
consumeWhitespace();
if (eof()) {
return false;
}
if (chars[currentPos] == token) {
currentPos++;
return true;
} else {
return false;
}
}
/*
* Consumes every whitespace character in the beginning of the stream
*/
private void consumeWhitespace() {
while (!eof()) {
if (isWhitespace(chars[currentPos])) {
currentPos++;
} else {
break;
}
}
}
/*
* Consumes the next non-whitespace token, throwing an exception if it doesn't match the provided token, or if EOF has been
* reached
*/
private void consume(char token) {
consumeWhitespace();
if (eof()) {
throw groupParsingException(String.valueOf(token), "<EOF>");
}
char c = chars[currentPos];
if (c == token) {
currentPos++;
} else {
throw groupParsingException(String.valueOf(token), String.valueOf(c));
}
}
/*
* Returns whether the end of file has been reached
*/
private boolean eof() {
return eof(currentPos);
}
/*
* Returns whether the given position represents the end of file
*/
private boolean eof(int pos) {
return pos == chars.length;
}
private DomainException groupParsingException(String expected, String given) {
StringBuilder message = new StringBuilder();
message.append('\n').append(new String(chars)).append('\n');
IntStream.range(0, currentPos).forEach(i -> message.append(' '));
message.append('^').append("\nExpected: ").append(expected).append(", got ").append(given);
return BennuCoreDomainException.groupParsingError(message.toString());
}
}