package org.kohsuke.bali.automaton.builder;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import java.util.Stack;
import org.kohsuke.bali.automaton.*;
import org.kohsuke.bali.datatype.DatatypeImpl;
import org.relaxng.datatype.Datatype;
import com.sun.msv.datatype.xsd.StringType;
import com.sun.msv.datatype.xsd.TokenType;
import com.sun.msv.grammar.*;
import com.sun.msv.grammar.util.NameClassCollisionChecker;
import com.sun.msv.grammar.util.NameClassSimplifier;
/**
* Builds a BinaryTreeAutomaton object from a Grammar object.
*
* @author Kohsuke Kawaguchi (kk@kohsuke.org)
*/
public final class TreeAutomatonBuilder {
/** Builds a binary tree automaton from a grammar. */
public static TreeAutomaton build( Grammar grammar,
boolean optimizeIgnorableAttribute,
boolean useEpsilonTransition,
boolean optimizeMixed ) throws TooComplicatedException {
return new TreeAutomatonBuilder( grammar,
optimizeIgnorableAttribute,
useEpsilonTransition,
optimizeMixed ).doBuild();
}
private TreeAutomaton doBuild() throws TooComplicatedException {
// set the first job in the queue
getState( grammar.getTopLevel().getExpandedExp(pool) );
while(!queue.isEmpty()) {
Expression e = (Expression)queue.pop();
State st = (State)states.get(e);
// Finds all transition from the given state.
// if(st.nextState!=null)
// ((SequenceExp)e).exp1.visit(new TransitionBuilder(st));
// else
e.visit(new TransitionBuilder(st));
}
return result;
}
private TreeAutomatonBuilder( Grammar grammar,
boolean optimizeIgnorableAttribute,
boolean useEpsilonTransition,
boolean optimizeMixed ) throws TooComplicatedException {
this.grammar = grammar;
this.optimizeIgnorableAttribute = optimizeIgnorableAttribute;
this.useEpsilonTransition = useEpsilonTransition;
this.optimizeMixed = optimizeMixed;
pool = grammar.getPool();
nameClassEncoder = NameClassEncoder.build(grammar);
result = new TreeAutomaton(nameClassEncoder.literals);
nullChecker = new NullabilityChecker(optimizeIgnorableAttribute);
allNegative = new NameSignature(AnyNameClass.theInstance, 0,0, this.nameClassEncoder );
}
/** The grammar object from which we are building a binary tree automaton. */
private final Grammar grammar;
private final ExpressionPool pool;
/** Optimizes ignorable attribute if true. */
private final boolean optimizeIgnorableAttribute;
/** Reduce the number of transitions by sharing transitions across states. */
private final boolean useEpsilonTransition;
/** Optimize <mixed> */
private final boolean optimizeMixed;
/** The result automaton. */
private final TreeAutomaton result;
/** Map from Expression to State. */
private final HashMap states = new HashMap();
/** NameClass Encoder */
private final NameClassEncoder nameClassEncoder;
/**
* Map from a State to a State.
* Used for non-existent attribute transition from the head of a content model.
* TODO: better documentation
*/
private final HashMap headStates = new HashMap();
private final NullabilityChecker nullChecker;
/**
* Returns true if the state that corresponds to the given expression
* can make use of an epsilon transition.
*/
private boolean isSplittable( Expression e ) {
// don't perform this optimization unless it's turned on.
if( useEpsilonTransition && e instanceof SequenceExp ) {
SequenceExp se = (SequenceExp)e;
if( se.getChildren()[0].visit(nullChecker) )
return true;
}
return false;
}
private Expression getSequenceTail( SequenceExp e ) {
Expression[] exps = e.getChildren();
Expression r = Expression.epsilon;
for( int i=1; i<exps.length; i++ )
r = pool.createSequence(r,exps[i]);
return r;
}
/** Gets or creates the State object corresponding to the given expression. */
private State getState( Expression e ) {
if(e==null) throw new NullPointerException();
State st = (State)states.get(e);
if(st==null) {
if( isSplittable(e) ) {
SequenceExp se = (SequenceExp)e;
states.put( se, st=result.createState(
e,
e.visit(nullChecker),
getState(getSequenceTail(se)) ));
queue.push(e);
} else {
states.put( e, st=result.createState( e, e.visit(nullChecker), null) );
queue.push(e);
}
}
return st;
}
/**
* Returns true if the given state represents the given expression.
*/
private boolean isEqual( State s, Expression e ) {
return states.get(e)==s;
}
/** Special NameSignature array that has "*:*" */
private final NameSignature allNegative;
private final NameClassCollisionChecker collisionChecker = new NameClassCollisionChecker();
/** Combines all the name classes in the given collection. */
private NameClass combine( Collection col ) {
NameClass nc = new NotNameClass(AnyNameClass.theInstance);
for( Iterator itr = col.iterator(); itr.hasNext(); ) {
NameSignature ns = (NameSignature)itr.next();
nc = new ChoiceNameClass( nc, ns.nameClass );
}
return NameClassSimplifier.simplify(nc);
}
private void refine( Set negatives, Set positives ) {
// computes the sum of all positive names.
NameClass pos = combine(positives);
// refine negatives
ArrayList unusedNegatives = new ArrayList();
for( Iterator itr = negatives.iterator(); itr.hasNext(); ) {
NameSignature ns = (NameSignature)itr.next();
if( new DifferenceNameClass( ns.nameClass, pos ).isNull() )
unusedNegatives.add(ns);
// if ns is included in the positives, ns is not contributing
// to the outcome. thus remove it.
}
negatives.removeAll(unusedNegatives);
// computes the sum of all positive names.
NameClass neg = combine(negatives);
// refine positives
ArrayList unusedPositives = new ArrayList();
for( Iterator itr = positives.iterator(); itr.hasNext(); ) {
NameSignature ns = (NameSignature)itr.next();
if( !collisionChecker.check( ns.nameClass, neg ) )
unusedPositives.add(ns);
// if ns doesn't have any intersection with negatives,
// it's not contributing.
}
positives.removeAll(unusedPositives);
}
/**
* Computes the initial state for the content model of a given ElementExp.
*
* This method adds non-existent attribute transition at the top of
* the content model if necessary.
*/
private State getContentModelHeadState( ElementExp exp ) {
State contentModelTop = getState(exp.contentModel.getExpandedExp(pool));
Set neg = new HashSet();
neg.add(allNegative);
Set pos = AttNameSigCollector.collect(exp.contentModel,nameClassEncoder);
refine( neg, pos );
if(neg.isEmpty()) {
return contentModelTop;
} else {
State st = (State)headStates.get(contentModelTop);
if(st==null) {
headStates.put( contentModelTop,
st=result.createState(exp.contentModel.getExpandedExp(pool),false,null) );
st.addTransition( new NonExistentAttributeAlphabet(neg,pos),
null, contentModelTop);
}
return st;
}
}
/** List of unprocessed expressions. */
private final Stack queue = new Stack();
private static final DataAlphabet anyStringAlphabet =
new DataAlphabet(new DatatypeImpl("","string",StringType.theInstance));
/**
* Visits an expression (a state) and builds all transitions from it
* by computing derivatives.
*/
private final class TransitionBuilder implements ExpressionVisitorVoid {
TransitionBuilder( State st ) {
this.state = st;
this.tail = Expression.epsilon;
}
/** We are adding transitions that leaves this state. */
private final State state;
private Expression tail;
/** This method is true if we are inside <mixed>. */
private boolean inMixed;
private State getState( Expression e ) {
if( inMixed )
e = pool.createMixed(e);
return TreeAutomatonBuilder.this.getState(e);
}
public void onAttribute(AttributeExp exp) {
state.addTransition( new AttributeAlphabet(
nameClassEncoder.getSignature(exp.nameClass),false),
getState(exp.exp.getExpandedExp(pool)), getState(tail) );
}
public void onChoice(ChoiceExp exp) {
Expression t = tail;
if( optimizeIgnorableAttribute ) {
/* look for
<choice>
<empty/>
<attribute>
<text/>
</attribute>
</choice>
and avoid generating transitions for them.
*/
if( Util.isIgnorableOptionalAttribute(exp) )
return;
}
// check attributes in the branch
Set nc1a = AttNameSigCollector.collect( exp.exp1, nameClassEncoder );
Set nc2a = AttNameSigCollector.collect( exp.exp2, nameClassEncoder );
Set nc1b = new HashSet(nc1a);
Set nc2b = new HashSet(nc2a);
refine( nc1a, nc2a );
if( nc1a.isEmpty() ) {
tail = t;
exp.exp2.visit(this);
} else {
state.addTransition( new NonExistentAttributeAlphabet(nc1a,nc2a),
null, getState(pool.createSequence(exp.exp2,t)) );
}
refine( nc2b, nc1b );
if( nc2b.isEmpty() ) {
tail = t;
exp.exp1.visit(this);
} else {
state.addTransition( new NonExistentAttributeAlphabet(nc2b,nc1b),
null, getState(pool.createSequence(exp.exp1,t)) );
}
}
public void onElement(ElementExp exp) {
state.addTransition( new ElementAlphabet(
nameClassEncoder.getSignature(exp.getNameClass())),
getContentModelHeadState(exp), getState(tail) );
}
public void onOneOrMore(OneOrMoreExp exp) {
// find an attribute declaration
AttributeExp attDecl = findAttDecl(exp.exp);
if(attDecl==null) {
tail = pool.createSequence( pool.createOptional(exp), tail );
exp.exp.visit(this);
} else {
// this is equivalent to deriv(exp,@attDecl);
Expression deriv = pool.createOptional(exp);
NameSignature sig = (NameSignature)nameClassEncoder.getSignature(attDecl.nameClass);
Expression derivEndAtt = pool.createSequence(
replace(deriv,attDecl,Expression.nullSet), tail);
Expression expEndAtt = pool.createSequence(
replace(exp, attDecl,Expression.nullSet), tail);
if(derivEndAtt!=Expression.nullSet)
state.addTransition(new AttributeAlphabet(sig,true),
getState(attDecl.exp), getState(derivEndAtt));
if(expEndAtt!=Expression.nullSet)
state.addTransition(new NonExistentAttributeAlphabet(
new NameSignature[]{sig},new NameSignature[0]),
null, getState(expEndAtt));
}
}
public void onMixed(MixedExp exp) {
if( optimizeMixed ) {
// optimize <mixed> by eagerly extend <interleave>
final boolean oldInMixed = inMixed;
inMixed = true;
// add a loop-back transition.
state.addTransition( anyStringAlphabet,
getState(Expression.nullSet), state );
exp.exp.visit(this);
inMixed = oldInMixed;
} else {
// this is the normal mode
pool.createInterleave( exp.exp, Expression.anyString ).visit(this);
}
}
public void onList(ListExp exp) {
state.addTransition( ListAlphabet.theInstance,
getState(exp.exp), getState(tail) );
}
public void onRef(ReferenceExp exp) {
exp.exp.visit(this);
}
public void onOther(OtherExp exp) {
exp.exp.visit(this);
}
public void onEpsilon() {
;
}
public void onNullSet() {
; // no transition from nullSet
}
public void onAnyString() {
State target;
if( isEqual(state,tail) )
// optimization.
// if d(P) = <text/>,P then
// d(<text/>,P) \supseteq d(P)
target = getState(tail);
else
// normal case -- derivative of <text/> is still <text/>
target = getState(pool.createSequence(Expression.anyString,tail));
state.addTransition( anyStringAlphabet,
getState(Expression.nullSet), target );
}
public void onSequence(SequenceExp exp) {
Expression t = tail;
Expression stail = getSequenceTail(exp);
if( useEpsilonTransition && state.nextState!=null
&& states.get(stail)==state.nextState ) {
//don't visit the tail of the sequence.
tail = pool.createSequence(stail,t);
exp.getChildren()[0].visit(this);
} else {
if( exp.exp1.visit(nullChecker) )
exp.exp2.visit(this);
tail = pool.createSequence( exp.exp2, t );
exp.exp1.visit(this);
}
}
public void onData(DataExp exp) {
Expression except = exp.except;
if(except==null) except = Expression.nullSet;
state.addTransition( new DataAlphabet(exp.dt), getState(except), getState(tail) );
}
public void onValue(ValueExp exp) {
// MSV uses TokenType if <value> doesn't have @type.
// replace it with our DatatypeImpl.
Datatype dt = exp.dt;
if( dt instanceof TokenType ) dt = new DatatypeImpl("","token",TokenType.theInstance);
state.addTransition( new ValueAlphabet(dt,exp.value), null, getState(tail) );
}
public void onConcur(ConcurExp exp) {
throw new UnsupportedOperationException("<concur> is not supported");
}
public void onInterleave(InterleaveExp exp) {
state.addTransition(
new InterleaveAlphabet(
getState(tail),
AttNameCombiner.collect(exp.exp1),
AttNameCombiner.collect(exp.exp2),
TextFinder.find(exp.exp1)
),
getState(exp.exp1), getState(exp.exp2) );
}
}
/**
* Finds one attribute declaration in the given expression, or null.
*/
private AttributeExp findAttDecl( Expression exp ) {
return (AttributeExp)exp.visit(new ExpressionVisitorExpression() {
public Expression onAttribute(AttributeExp exp) { return exp; }
public Expression onChoice(ChoiceExp exp) {
Expression e = exp.exp1.visit(this);
if(e!=null) return e;
return exp.exp2.visit(this);
}
public Expression onElement(ElementExp exp) { return null; }
public Expression onOneOrMore(OneOrMoreExp exp) { return exp.exp.visit(this); }
public Expression onMixed(MixedExp exp) { return exp.exp.visit(this); }
public Expression onList(ListExp exp) { return null; }
public Expression onRef(ReferenceExp exp) { return exp.exp.visit(this); }
public Expression onOther(OtherExp exp) { return exp.exp.visit(this); }
public Expression onEpsilon() { return null; }
public Expression onNullSet() { return null; }
public Expression onAnyString() { return null; }
public Expression onSequence(SequenceExp exp) {
// since this method can be only used inside <oneOrMore>, if we see
// <group> then there's no chance that we'll see <attribute>.
return null;
}
public Expression onData(DataExp exp) { return null; }
public Expression onValue(ValueExp exp) { return null; }
public Expression onConcur(ConcurExp exp) { throw new InternalError(); }
public Expression onInterleave(InterleaveExp exp) {
return null; // the same reason as the onSequence method.
}
});
}
/**
* Replaces the occurence of "from" inside "exp" to "to".
*/
private Expression replace( Expression exp, final AttributeExp from, final Expression to ) {
return exp.visit(new ExpressionCloner(pool){
public Expression onRef( ReferenceExp exp ) { return exp.exp.visit(this); }
public Expression onOther( OtherExp exp ) { return exp.exp.visit(this); }
public Expression onAttribute( AttributeExp exp ) {
if(exp==from) return to;
else return exp;
}
public Expression onElement( ElementExp exp ) { return exp; }
});
}
}