package grammar;
import grammar.Expression.And;
import grammar.Expression.Any;
import grammar.Expression.Capture;
import grammar.Expression.CharClass;
import grammar.Expression.Choice;
import grammar.Expression.Not;
import grammar.Expression.Optional;
import grammar.Expression.Plus;
import grammar.Expression.Range;
import grammar.Expression.Reference;
import grammar.Expression.Rule;
import grammar.Expression.Sequence;
import grammar.Expression.Star;
import grammar.Expression.StringLiteral;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import util.StringUtils;
/**
* Performs a pass over the expression tree that resolves reference expressions
* (turning our tree into a graph), computes the textual representations of
* expressions, and compacts the graph so as to avoid duplicate expressions. It
* also sets a reference to the grammar for each expression.
*
* Obtaining a graph without duplicate expressions serves to make memoization
* slightly more efficient.
*
* Despite the fact that the expression graph can contain loops, the textual
* representation of an expression is always bounded due to the use of rule
* names whenever possible.
*
* The cleaner always returns an expression of the same type it receives,
* excepted when the exception is of type Reference in which case it returns an
* expression of type Rule.
*/
public class ExpressionTreeCleaner implements ExpressionVisitor
{
/*****************************************************************************
* The grammar from which the expression tree originates.
*/
private final Grammar grammar;
/*****************************************************************************
* Unique ID to assign to the next rule.
*/
private int ruleCounter = 0;
/*****************************************************************************
* A map from textual representation of expressions to the canonical expression
* for that representation. This is used in order to remove duplicate
* expressions in the expression graph.
*/
private final Map<String, Expression> canonicals = new HashMap<>();
/****************************************************************************/
ExpressionTreeCleaner(Grammar grammar)
{
this.grammar = grammar;
}
/*****************************************************************************
* @see ExpressionTreeCleaner
*/
public Expression clean(Expression expr)
{
if (expr instanceof Reference) {
expr = resolveRef((Reference) expr);
}
if (expr.repr != null)
{
/* clean() has already been called on the the expression. This can happen
* if there are common subtrees (e.g. two rules references a same rule),
* or with recursive rules. */
return standardize(expr);
}
if (expr instanceof Rule)
{
/* Setting the representation of rules needs to be done done now in order
* to be able to compute the representation of child expressions that
* recurse. It is also necessary in order for standardize() to work
* properly in case of recursion. */
if ((expr.repr = ((Rule) expr).name) == null) {
throw new Error("Rule with name not set.");
}
// Assign a unique ID to each rule.
if (expr instanceof Rule) {
((Rule)expr).id = ruleCounter++;
}
}
expr.grammar = grammar;
List<Expression> children = expr.children();
for (int i = 0 ; i < children.size() ; ++i)
{
children.set(i, clean(children.get(i)));
}
// Compute the textual representation of the expression.
expr.accept(this);
return standardize(expr);
}
/*****************************************************************************
* Resolves the reference specified by the given expression.
*/
Expression resolveRef(Reference ref)
{
String referencedRule = ref.referencedRule;
Expression referenced = grammar.maybeRule(referencedRule);
if (referenced == null)
{
throw new RuntimeException("Reference to non-existant rule "
+ referencedRule);
}
return referenced;
}
/*****************************************************************************
* Standardize the expression: avoids the presence of duplicate non-rule
* expressions in the tree. The textual representation of expr needs to have
* been computed beforehand.
*
* We don't standardize rule for two reasons. First, this allows macros
* defined in different files to have the same name. Second, there is not much
* to gain from it anyway. Rules can never be created from within an
* expression, and there is already a rule repository in {@link Grammar},
* which reference resolution uses.
*/
Expression standardize(Expression expr)
{
if (expr instanceof Rule) { return expr; }
// Eliminate unnecessary nodes.
if (expr instanceof Sequence || expr instanceof Choice)
if (expr.children().size() == 1) {
return expr.child();
}
Expression canonical = canonicals.get(expr.repr);
if (canonical == null) {
canonical = expr;
canonicals.put(expr.repr, expr);
}
return canonical;
}
/*****************************************************************************
* Potentially wraps (with parens) the textual representation of expr, to make
* it suitable for use in a parent with given precedence.
*/
static String wrap(int precedence, Expression expr)
{
if (expr.precedence >= precedence) {
return expr.repr;
}
return "(" + expr.repr + ")";
}
//----------------------------------------------------------------------------
// VISITOR
//----------------------------------------------------------------------------
/* Computes the textual representation of the expression assuming the textual
* representation of its children has already been computed. */
//----------------------------------------------------------------------------
/****************************************************************************/
@Override public void visit(Choice expr)
{
StringBuilder str = new StringBuilder();
for (Expression e : expr.children()) {
str.append(e);
str.append(" | ");
}
if (str.length() != 0) {
str.delete(str.length() - 3, str.length());
}
else {
str.append("<empty>");
}
expr.repr = str.toString();
}
/****************************************************************************/
@Override public void visit(Sequence expr)
{
StringBuilder str = new StringBuilder();
for (Expression e : expr.children()) {
str.append(wrap(expr.precedence, e));
str.append(" ");
}
/* The trailing space is important: it differentiates the String for a
* sequence containing a single element, from the string for the element by
* itself. */
if (str.length() == 0) {
str.append("<empty>");
}
expr.repr = str.toString();
}
/****************************************************************************/
@Override public void visit(Not expr)
{
expr.repr = "!" + wrap(expr.precedence, expr.child());
}
/****************************************************************************/
@Override public void visit(And expr)
{
expr.repr = "&" + wrap(expr.precedence, expr.child());
}
/****************************************************************************/
@Override public void visit(Star expr)
{
expr.repr = wrap(expr.precedence, expr.child()) + "*";
}
/****************************************************************************/
@Override public void visit(Plus expr)
{
expr.repr = wrap(expr.precedence, expr.child()) + "+";
}
/****************************************************************************/
@Override public void visit(Optional expr)
{
expr.repr = wrap(expr.precedence, expr.child()) + "?";
}
/****************************************************************************/
@Override public void visit(Range expr)
{
expr.repr = (expr.negated ? "^[" : "[") + StringUtils.escape(expr.first) + "-"
+ StringUtils.escape(expr.last) + "]";
}
/****************************************************************************/
@Override public void visit(CharClass expr)
{
expr.repr = (expr.negated ? "^[" : "[") + StringUtils.escape(expr.chars) + "]";
}
/****************************************************************************/
@Override public void visit(StringLiteral expr)
{
expr.repr = "\"" + StringUtils.escape(expr.string) + "\"";
}
/****************************************************************************/
@Override public void visit(Any expr)
{
expr.repr = "_";
}
/****************************************************************************/
@Override public void visit(Capture expr)
{
expr.repr = expr.captureName + ":"
+ wrap(expr.precedence, expr.child());
}
/****************************************************************************/
@Override public void visit(Rule expr)
{
/* Done in clean(). Not done in the Rule constructor because the rule name
* can be bound late by the Grammar class. */
}
}