/*******************************************************************************
* Copyright (c) 2004, 2008 John Krasnay and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* John Krasnay - initial API and implementation
*******************************************************************************/
package net.sf.vex.dom;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
/**
* Tools for building a deterministic finite automaton (DFA)
* recognizer for regular expression-like languages.
*/
public class DFABuilder {
/**
* Node represents a node in an abstract syntax tree. The first
* step to creating a DFA is to build an AST using the given
* createXxx methods.
*/
public interface Node {
public void accept(NodeVisitor visitor);
public Object clone();
public Set getFirstPos();
public Set getLastPos();
public boolean isNullable();
}
/**
* Create a node that represents a choice between two nodes.
*
* @param child1 first choice
* @param child2 second choice
*/
public static Node createChoiceNode(Node child1, Node child2) {
return new OrNode(child1, child2);
}
/**
* Create a DFA given the root node of the syntax tree.
*
* @return Initial state of the resulting DFA.
* @param root Root node of the syntax tree.
*/
public static DFAState createDFA(Node root) {
// Append a sentinel to indicate accepting states.
SymbolNode sentinelNode = new SymbolNode(Sentinel.getInstance());
Node fakeRoot = new CatNode(root, sentinelNode);
// map symbol node set => state in the new DFA
Map stateMap = new HashMap();
// symbol node sets we have considered
Set marked = new HashSet();
// stack of symbol node sets we have yet to consider
Stack unmarked = new Stack();
// calculate followPos and symbolMap
FollowPosBuilder fpb = new FollowPosBuilder();
fakeRoot.accept(fpb);
// map symbol node => set of symbol nodes that follow it
Map followPos = fpb.getFollowPos();
// map symbol => set of symbol nodes that represent it
Map symbolMap = fpb.getSymbolMap();
Set nodeSet = fakeRoot.getFirstPos();
DFAState startState = new DFAState();
if (nodeSet.contains(sentinelNode)) {
startState.setAccepting(true);
}
stateMap.put(nodeSet, startState);
unmarked.push(nodeSet);
while (unmarked.size() > 0) {
nodeSet = (Set) unmarked.pop();
marked.add(nodeSet);
DFAState state = (DFAState) stateMap.get(nodeSet);
if (state == null) {
state = new DFAState();
stateMap.put(nodeSet, state);
}
Iterator iterSymbols = symbolMap.keySet().iterator();
while (iterSymbols.hasNext()) {
Object symbol = iterSymbols.next();
Set targetSet = new HashSet();
Iterator iterNodes = nodeSet.iterator();
while (iterNodes.hasNext()) {
SymbolNode node = (SymbolNode) iterNodes.next();
if (node.getSymbol().equals(symbol)) {
targetSet.addAll((Set)followPos.get(node));
}
}
if (!targetSet.isEmpty()) {
if (!unmarked.contains(targetSet)
&& !marked.contains(targetSet)) {
unmarked.push(targetSet);
}
DFAState targetState = (DFAState)
stateMap.get(targetSet);
if (targetState == null) {
targetState = new DFAState();
if (targetSet.contains(sentinelNode)) {
targetState.setAccepting(true);
}
stateMap.put(targetSet, targetState);
}
state.addTransition(symbol, targetState);
}
}
}
return startState;
}
/**
* Create optional node.
*
* @param child Node that is optional.
*/
public static Node createOptionalNode(Node child) {
return new OrNode(child, new NullNode());
}
/**
* Create a repeating node.
*
* @param child Node that can be repeated.
* @param minRepeat minimum number of times the node can be repeated.
*/
public static Node createRepeatingNode(Node child, int minRepeat) {
Node node = new StarNode(child);
for (int i = 0; i < minRepeat; i++) {
node = new CatNode(node, (Node) child.clone());
}
return node;
}
/**
* Creates a node representing a sequence of two other nodes.
*
* @param child1 first node in the sequence.
* @param child2 second node in the sequence.
*/
public static Node createSequenceNode(Node child1, Node child2) {
return new CatNode(child1, child2);
}
/**
* Create a node for a symbol.
*
* @param symbol Symbol contained by the node.
*/
public static Node createSymbolNode(Object symbol) {
return new SymbolNode(symbol);
}
//============================================================ PRIVATE
/**
* Implementation of node that keeps firstPos, lastPos, and nullable
* as instance variables. The accept method is undefined.
*/
private abstract static class AbstractNode implements Node {
protected Set firstPos;
protected Set lastPos;
protected boolean nullable;
public abstract Object clone();
public Set getFirstPos() {
return this.firstPos;
}
public Set getLastPos() {
return this.lastPos;
}
public boolean isNullable() {
return this.nullable;
}
protected Set union(Set set1, Set set2) {
Set retval = new HashSet();
retval.addAll(set1);
retval.addAll(set2);
return retval;
}
}
/**
* Node representing a sequence of two nodes.
*/
private static class CatNode extends AbstractNode {
private Node leftChild;
private Node rightChild;
public CatNode(Node leftChild, Node rightChild) {
this.leftChild = leftChild;
this.rightChild = rightChild;
if (leftChild.isNullable()) {
this.firstPos = union(leftChild.getFirstPos(),
rightChild.getFirstPos());
} else {
this.firstPos = leftChild.getFirstPos();
}
if (rightChild.isNullable()) {
this.lastPos = union(leftChild.getLastPos(),
rightChild.getLastPos());
} else {
this.lastPos = rightChild.getLastPos();
}
this.nullable = leftChild.isNullable() && rightChild.isNullable();
}
public void accept(NodeVisitor visitor) {
leftChild.accept(visitor);
rightChild.accept(visitor);
visitor.visitCatNode(this);
}
public Object clone() {
return new CatNode((Node) this.leftChild.clone(),
(Node) this.rightChild.clone());
}
public Node getLeftChild() {
return this.leftChild;
}
public Node getRightChild() {
return this.rightChild;
}
}
/**
* Builds the followPos function. The function is represented by a
* map from symbol nodes to sets of symbol nodes that can follow
* them. Also generates a map of symbols to sets of symbol nodes
* that represent them.
*/
private static class FollowPosBuilder implements NodeVisitor {
private Map followPos = new HashMap();
private Map symbolMap = new HashMap();
public Map getFollowPos() {
return this.followPos;
}
public Map getSymbolMap() {
return this.symbolMap;
}
public void visitCatNode(CatNode node) {
Iterator iter = node.getLeftChild().getLastPos().iterator();
while (iter.hasNext()) {
SymbolNode symbolNode = (SymbolNode) iter.next();
Set set = this.getFollowPos(symbolNode);
set.addAll(node.getRightChild().getFirstPos());
}
}
public void visitNullNode(NullNode node) {
}
public void visitOrNode(OrNode node) {
}
public void visitStarNode(StarNode node) {
Iterator iter = node.getChild().getLastPos().iterator();
while (iter.hasNext()) {
SymbolNode symbolNode = (SymbolNode) iter.next();
Set set = this.getFollowPos(symbolNode);
set.addAll(node.getChild().getFirstPos());
}
}
public void visitSymbolNode(SymbolNode node) {
// Done by getFollowPos(SymbolNode)
//this.followPos.put(node, new HashSet());
// Ensure we have an entry for this symbol
this.getFollowPos(node);
Object symbol = node.getSymbol();
Set symbolNodeSet = (Set) this.symbolMap.get(symbol);
if (symbolNodeSet == null) {
symbolNodeSet = new HashSet();
this.symbolMap.put(symbol, symbolNodeSet);
}
symbolNodeSet.add(node);
}
private Set getFollowPos(SymbolNode node) {
Set ret = (Set) this.followPos.get(node);
if (ret == null) {
ret = new HashSet();
this.followPos.put(node, ret);
}
return ret;
}
}
/**
* Describes a visitor that can walk an AST.
*/
private interface NodeVisitor {
public void visitCatNode(CatNode node);
public void visitNullNode(NullNode node);
public void visitOrNode(OrNode node);
public void visitStarNode(StarNode node);
public void visitSymbolNode(SymbolNode node);
}
/**
* Node representing nothing. It is used with OrNode to construct an
* optional entry.
*/
private static class NullNode extends AbstractNode {
public NullNode() {
this.firstPos = Collections.EMPTY_SET;
this.lastPos = Collections.EMPTY_SET;
this.nullable = true;
}
public void accept(NodeVisitor visitor) {
visitor.visitNullNode(this);
}
public Object clone() {
return new NullNode();
}
}
/**
* Node representing a choice between two alternatives.
*/
private static class OrNode extends AbstractNode {
private Node leftChild;
private Node rightChild;
public OrNode(Node leftChild, Node rightChild) {
this.leftChild = leftChild;
this.rightChild = rightChild;
this.firstPos = union(leftChild.getFirstPos(),
rightChild.getFirstPos());
this.lastPos = union(leftChild.getLastPos(),
rightChild.getLastPos());
this.nullable = leftChild.isNullable() || rightChild.isNullable();
}
public void accept(NodeVisitor visitor) {
leftChild.accept(visitor);
rightChild.accept(visitor);
visitor.visitOrNode(this);
}
public Object clone() {
return new OrNode((Node) this.leftChild.clone(),
(Node) this.rightChild.clone());
}
public Node getLeftChild() {
return this.leftChild;
}
public Node getRightChild() {
return this.rightChild;
}
}
/**
* Symbol appended to the AST to mark accepting states.
*/
private static class Sentinel {
private static final Sentinel instance = new Sentinel();
private Sentinel() {
}
public static Sentinel getInstance() {
return instance;
}
public String toString() {
return "#";
}
}
/**
* Node representing zero or more repetitions of its child.
*/
private static class StarNode extends AbstractNode {
private Node child;
public StarNode(Node child) {
this.child = child;
this.firstPos = child.getFirstPos();
this.lastPos = child.getLastPos();
this.nullable = true;
}
public void accept(NodeVisitor visitor) {
child.accept(visitor);
visitor.visitStarNode(this);
}
public Object clone() {
return new StarNode((Node) this.child.clone());
}
public Node getChild() {
return this.child;
}
}
/**
* Node representing a symbol.
*/
private static class SymbolNode extends AbstractNode {
private static int pos = 1;
private int myPos;
private Object symbol;
public SymbolNode (Object symbol) {
this.symbol = symbol;
this.firstPos = Collections.singleton(this);
this.lastPos = Collections.singleton(this);
this.nullable = false;
this.myPos = pos++;
}
public void accept(NodeVisitor visitor) {
visitor.visitSymbolNode(this);
}
public Object clone() {
return new SymbolNode(this.symbol);
}
public int getMyPos() {
return this.myPos;
}
public Object getSymbol() {
return this.symbol;
}
}
/*
private static String snSetToString(Set set) {
StringBuffer sb = new StringBuffer();
sb.append("{ ");
Iterator i2 = set.iterator();
while (i2.hasNext()) {
SymbolNode sn2 = (SymbolNode) i2.next();
sb.append(sn2.getMyPos());
sb.append(" ");
}
sb.append("}");
return sb.toString();
}
*/
}