/* * @(#)$Id: Expression.java,v 1.14 2001/11/03 01:20:28 kk122374 Exp $ * * Copyright 2001 Sun Microsystems, Inc. All Rights Reserved. * * This software is the proprietary information of Sun Microsystems, Inc. * Use is subject to license terms. * */ package com.sun.msv.grammar; import org.xml.sax.*; import java.util.Collection; import com.sun.msv.grammar.util.RefExpRemover; /** * Primitive of the tree regular expression. * * most of the derived class is immutable (except ReferenceExp, ElementExp and OtherExp). * * <p> * By making it immutable, it becomes possible to share subexpressions among expressions. * This is very important for regular-expression-derivation based validation algorithm, * as well as for smaller memory footprint. * This sharing is automatically achieved by ExpressionPool. * * <p> * ReferebceExp, ElementExp, and OtherExp are also placed in the pool, * but these are not unified. Since they are not unified, * application can derive classes from these expressions * and mix them into AGM. This technique is heavily used to introduce schema language * specific primitives into AGM. See various sub-packages of this package for examples. * * <p> * The equals method must be implemented by the derived type. equals method will be * used to unify the expressions. equals method can safely assume that its children * are already unified (therefore == can be used to test the equality, rather than * equals method). * * <p> * To achieve unification, we overload the equals method so that * <code>o1.equals(o2)</code> is true if o1 and o2 are identical. * There, those two objects must return the same hash code. For this purpose, * the hash code is calculated statically and cached internally. * * * @author <a href="mailto:kohsuke.kawaguchi@eng.sun.com">Kohsuke KAWAGUCHI</a> */ public abstract class Expression implements java.io.Serializable { /** cached value of epsilon reducibility. * * Epsilon reducibility can only be calculated after parsing the entire expression, * because of forward reference to other pattern. */ private Boolean epsilonReducibility; /** returns true if this expression accepts empty sequence. * * <p> * If this method is called while creating Expressions, then this method * may return approximated value. When this method is used while validation, * this method is guaranteed to return the correct value. */ public boolean isEpsilonReducible() { // epsilon reducibility is cached internally. if( epsilonReducibility==null ) epsilonReducibility = calcEpsilonReducibility()?Boolean.TRUE:Boolean.FALSE; return epsilonReducibility==Boolean.TRUE; } /** computes epsilon reducibility */ protected abstract boolean calcEpsilonReducibility(); /** * Cached value of the expression after ReferenceExps are removed. * This value is computed on demand. */ private Expression expandedExp = null; /** * Gets the expression after removing all ReferenceExps, until child * AttributeExp or ElementExp. */ public Expression getExpandedExp( ExpressionPool pool ) { if(expandedExp==null) { // this part of the code may be called by the multiple threads // even if that happens, there is no consistency problem // because two thread will compute the same value. expandedExp = this.visit(new RefExpRemover(pool,false)); } return expandedExp; } /** * Peels the occurence expressions from this expression. * <p> * In AGM, 'X?','X+' and 'X*' are represented by using * other primitives. This method returns the 'X' part by * removing occurence related expressions. */ public final Expression peelOccurence() { // 'X?' is represented as 'choice(X,epsilon)'. if( this instanceof ChoiceExp ) { ChoiceExp cexp = (ChoiceExp)this; if(cexp.exp1==Expression.epsilon) return cexp.exp2.peelOccurence(); if(cexp.exp2==Expression.epsilon) return cexp.exp1.peelOccurence(); // note that epsilon may be in some branch deep under the tree. // for example, when the expression is ((A|epsilon)|B) // the above code won't be able to peel the epsilon in it. // but this is OK, since this method still returns ChoiceExp, // and the type of the expression is what matters. } // 'X+' is represented as 'oneOrMore(X)' if( this instanceof OneOrMoreExp ) return ((OneOrMoreExp)this).exp.peelOccurence(); // 'X*' is represented as '(X+)?' // therefore it is important to recursively process it. // otherwise we've finished. return this; } protected Expression( int hashCode ) { this.cachedHashCode = hashCode; } /** * this constructor can be used for the ununified expressions. * the only reason there are two parameters is to prevent unintentional * use of the default constructor. */ protected Expression( Object foolProof1, Object foolProof2 ) { this.cachedHashCode = System.identityHashCode(this); } /** * this field can be used by Verifier implementation to speed up * validation. Due to its nature, this field is not serialized. * * TODO: revisit this decision of not to serialize this field. */ public transient Object verifierTag = null; public abstract Object visit( ExpressionVisitor visitor ); public abstract Expression visit( ExpressionVisitorExpression visitor ); public abstract boolean visit( ExpressionVisitorBoolean visitor ); public abstract void visit( ExpressionVisitorVoid visitor ); // if you don't need RELAX capability at all, cut these lines public Object visit( com.sun.msv.grammar.relax.RELAXExpressionVisitor visitor ) { return visit((ExpressionVisitor)visitor); } public Expression visit( com.sun.msv.grammar.relax.RELAXExpressionVisitorExpression visitor ) { return visit((ExpressionVisitorExpression)visitor); } public boolean visit( com.sun.msv.grammar.relax.RELAXExpressionVisitorBoolean visitor ) { return visit((ExpressionVisitorBoolean)visitor); } public void visit( com.sun.msv.grammar.relax.RELAXExpressionVisitorVoid visitor ) { visit((ExpressionVisitorVoid)visitor); } // until here /** hash code of this object. * * To memorize every sub expression, hash code is frequently used. * And computation of the hash code requires full-traversal of * the expression. Therefore, hash code is computed when the object * is constructed, and kept cached thereafter. */ private final int cachedHashCode; public final int hashCode() { return cachedHashCode; } public abstract boolean equals( Object o ); static protected int hashCode( Object o1, Object o2, int hashKey ) { // TODO: more efficient hashing algorithm return o1.hashCode()+o2.hashCode()+hashKey; } static protected int hashCode( Object o, int hashKey ) { // TODO: more efficient hashing algorithm return o.hashCode()+hashKey; } static final int HASHCODE_ATTRIBUTE = 1; static final int HASHCODE_CHOICE = 2; static final int HASHCODE_ONE_OR_MORE = 3; static final int HASHCODE_REF = 4; static final int HASHCODE_SEQUENCE = 5; static final int HASHCODE_DATA = 6; static final int HASHCODE_VALUE = 7; static final int HASHCODE_ANYSTRING = 8; static final int HASHCODE_EPSILON = 9; static final int HASHCODE_NULLSET = 10; static final int HASHCODE_ELEMENT = 11; static final int HASHCODE_MIXED = 12; static final int HASHCODE_CONCUR = 20; static final int HASHCODE_INTERLEAVE = 21; static final int HASHCODE_LIST = 22; static final int HASHCODE_KEY = 23; private static class EpsilonExpression extends Expression { EpsilonExpression() { super(Expression.HASHCODE_EPSILON); } public Object visit( ExpressionVisitor visitor ) { return visitor.onEpsilon(); } public Expression visit( ExpressionVisitorExpression visitor ) { return visitor.onEpsilon(); } public boolean visit( ExpressionVisitorBoolean visitor ) { return visitor.onEpsilon(); } public void visit( ExpressionVisitorVoid visitor ) { visitor.onEpsilon(); } protected boolean calcEpsilonReducibility() { return true; } public boolean equals( Object o ) { return this==o; } // this class is used as singleton. private Object readResolve() { return this.epsilon; } }; /** * Special expression object that represents epsilon (ε). * This expression matches to "empty". * Epsilon can be thought as an empty sequence. */ public static final Expression epsilon = new EpsilonExpression(); private static class NullSetExpression extends Expression { NullSetExpression() { super(Expression.HASHCODE_NULLSET); } public Object visit( ExpressionVisitor visitor ) { return visitor.onNullSet(); } public Expression visit( ExpressionVisitorExpression visitor ) { return visitor.onNullSet(); } public boolean visit( ExpressionVisitorBoolean visitor ) { return visitor.onNullSet(); } public void visit( ExpressionVisitorVoid visitor ) { visitor.onNullSet(); } protected boolean calcEpsilonReducibility() { return false; } public boolean equals( Object o ) { return this==o; } // this class is used as singleton. private Object readResolve() { return this.nullSet; } }; /** * special expression object that represents the empty set (Φ). * This expression doesn't match to anything. * NullSet can be thought as an empty choice. */ public static final Expression nullSet = new NullSetExpression(); private static class AnyStringExpression extends Expression { AnyStringExpression() { super(Expression.HASHCODE_ANYSTRING); } public Object visit( ExpressionVisitor visitor ) { return visitor.onAnyString(); } public Expression visit( ExpressionVisitorExpression visitor ) { return visitor.onAnyString(); } public boolean visit( ExpressionVisitorBoolean visitor ) { return visitor.onAnyString(); } public void visit( ExpressionVisitorVoid visitor ) { visitor.onAnyString(); } // anyString is consider to be epsilon reducible. // In other words, one can always ignore anyString. // // Instead, anyString will remain in the expression even after // consuming some StringToken. // That is, residual of anyString by StringToken is not the epsilon but an anyString. protected boolean calcEpsilonReducibility() { return true; } public boolean equals( Object o ) { return this==o; } // this class is used as singleton. private Object readResolve() { return this.anyString; } }; /** * special expression object that represents "any string". * It is close to xsd:string datatype, but they have different semantics * in several things. * * <p> * This object is used as <anyString/> pattern of TREX and * <text/> pattern of RELAX NG. */ public static final Expression anyString = new AnyStringExpression(); }