/* * @(#)$Id: ExpressionPool.java,v 1.21 2002/08/01 22:10:43 kk122374 Exp $ * * Copyright 2001 Sun Microsystems, Inc. All Rights Reserved. * * This software is the proprietary information of Sun Microsystems, Inc. * Use is subject to license terms. * */ package com.sun.msv.grammar; import java.util.Hashtable; import org.relaxng.datatype.Datatype; import com.sun.msv.datatype.xsd.XSDatatype; import com.sun.msv.util.StringPair; /** * Creates a new Expression by combining existing expressions. * * all expressions are memorized and unified so that every subexpression * will be shared and reused. Optimization will be also done transparently. * For example, createChoice(P,P) will result in P. createSequence(P,nullSet) * will result in nullSet. * * Furthermore, associative operators are grouped to the left. * createChoice( (P|Q), (R|S) ) will be ((P|Q)|R)|S. * * <P> * Although this unification is essential, this is also the performance * bottle neck. In particular, createChoice and createSequence are two most * commonly called methods. * * <p> * For example, when validating a DocBook XML (150KB) twice against * DocBook.trex(237KB), createChoice is called 63000 times and createSequence * called 23000 times. (the third is the createOptional method and only 1560 times.) * And they took more than 10% of validation time, which is the worst * time-consuming method. * * <p> * Therefore, please beware that this class includes several ugly code optimization. * * @author <a href="mailto:kohsuke.kawaguchi@eng.sun.com">Kohsuke KAWAGUCHI</a> */ public class ExpressionPool implements java.io.Serializable { public final Expression createAttribute( NameClass nameClass ) { return unify(new AttributeExp(nameClass,Expression.anyString)); } public final Expression createAttribute( NameClass nameClass, Expression content ) { if(content==Expression.nullSet) return content; return unify(new AttributeExp(nameClass,content)); } public final Expression createEpsilon() { return Expression.epsilon; } public final Expression createNullSet() { return Expression.nullSet; } public final Expression createAnyString() { return Expression.anyString; } public final Expression createChoice( Expression left, Expression right ) { if( left==Expression.nullSet ) return right; if( right==Expression.nullSet ) return left; if( left==Expression.epsilon && right.isEpsilonReducible() ) return right; if( right==Expression.epsilon && left.isEpsilonReducible() ) return left; // TODO: should we re-order choice in a consistent manner? // associative operators are grouped to the left if( right instanceof ChoiceExp ) { final ChoiceExp c = (ChoiceExp)right; return createChoice( createChoice(left,c.exp1), c.exp2 ); } // eliminate duplicate choice items by checking that the right // is already included in the left. Expression next = left; while( true ) { if( next==right ) return left; // left is already in the choice if(!(next instanceof ChoiceExp)) break; ChoiceExp cp = (ChoiceExp)next; if( cp.exp2==right ) return left; next = cp.exp1; } // special (optimized) unification. // this will prevent unnecessary ChoiceExp instanciation. Expression o = expTable.get( Expression.hashCode(left,right,Expression.HASHCODE_CHOICE), left, right, ChoiceExp.class ); if(o==null) // different thread may possibly be doing the same thing at the same time. // so we have to call unify method, too synchronize update. return unify( new ChoiceExp(left,right) ); else return o; } public final Expression createOneOrMore( Expression child ) { if( child == Expression.epsilon || child == Expression.anyString || child == Expression.nullSet || child instanceof OneOrMoreExp ) return child; return unify(new OneOrMoreExp(child)); } public final Expression createZeroOrMore( Expression child ) { return createOptional(createOneOrMore(child)); } public final Expression createOptional( Expression child ) { // optimization will be done in createChoice method. return createChoice(child,Expression.epsilon); } public final Expression createData( XSDatatype dt ) { String ns = dt.getNamespaceUri(); if(ns==null) ns="\u0000"; // use something that doesn't collide with others. return createData( dt, new StringPair(ns,dt.displayName()) ); } public final Expression createData( Datatype dt, StringPair typeName ) { return createData( dt, typeName, Expression.nullSet ); } public final Expression createData( Datatype dt, StringPair typeName, Expression except ) { return unify( new DataExp(dt,typeName,except) ); } public final Expression createValue( XSDatatype dt, Object value ) { return createValue( dt, new StringPair("",dt.displayName()), value ); } public final Expression createValue( Datatype dt, StringPair typeName, Object value ) { return unify( new ValueExp(dt,typeName,value) ); } public final Expression createList( Expression exp ) { if(exp==Expression.nullSet) return exp; return unify( new ListExp(exp) ); } public final Expression createMixed( Expression body ) { if( body==Expression.nullSet ) return Expression.nullSet; if( body==Expression.epsilon ) return Expression.anyString; return unify( new MixedExp(body) ); } public final Expression createSequence( Expression left, Expression right ) { if( left ==Expression.nullSet || right==Expression.nullSet ) return Expression.nullSet; if( left ==Expression.epsilon ) return right; if( right==Expression.epsilon ) return left; // associative operators are grouped to the left if( right instanceof SequenceExp ) { final SequenceExp s = (SequenceExp)right; return createSequence( createSequence(left,s.exp1), s.exp2 ); } // special (optimized) unification. Expression o = expTable.get( Expression.hashCode(left,right,Expression.HASHCODE_SEQUENCE), left, right, SequenceExp.class ); if(o==null) return unify( new SequenceExp(left,right) ); else return o; } public final Expression createConcur( Expression left, Expression right ) { if( left==Expression.nullSet || right==Expression.nullSet ) return Expression.nullSet; if( left==Expression.epsilon ) { if( right.isEpsilonReducible() ) return Expression.epsilon; else return Expression.nullSet; } if( right==Expression.epsilon ) { if( left.isEpsilonReducible() ) return Expression.epsilon; else return Expression.nullSet; } // associative operators are grouped to the left if( right instanceof ConcurExp ) { final ConcurExp c = (ConcurExp)right; return createConcur( createConcur(left, c.exp1), c.exp2 ); } return unify(new ConcurExp(left,right)); } public final Expression createInterleave( Expression left, Expression right ) { if( left == Expression.epsilon ) return right; if( right== Expression.epsilon ) return left; if( left == Expression.nullSet || right== Expression.nullSet ) return Expression.nullSet; // associative operators are grouped to the left if( right instanceof InterleaveExp ) { final InterleaveExp i = (InterleaveExp)right; return createInterleave( createInterleave(left, i.exp1), i.exp2 ); } return unify(new InterleaveExp(left,right)); } /** hash table that contains all expressions currently known to this table. */ private final ClosedHash expTable; /** * creates new expression pool as a child pool of the given parent pool. * * <P> * Every expression memorized in the parent pool can be retrieved, but update * operations are only performed upon the child pool. * In this way, the parent pool can be shared among the multiple threads without * interfering performance. * * <P> * Furthermore, you can throw away a child pool after a certain time period to * prevent it from eating up memory. */ public ExpressionPool( ExpressionPool parent ) { expTable = new ClosedHash(parent.expTable); } public ExpressionPool() { expTable = new ClosedHash(); } /** * unifies expressions. * * If the equivalent expression is already registered in the table, * destroy newly created one (so that no two objects represents * same expression structure). * * If it's not registered, then register it and return it. */ protected final Expression unify( Expression exp ) { // call of get method need not be synchronized. // the implementation guarantee that simulatenous calls to get & put // will work correctly. Object o = expTable.get(exp); if(o==null) { // expression may not be registered. So try it again with lock synchronized(expTable) { o = expTable.get(exp); if(o==null) { // this check prevents two same expressions to be added simultaneously. // expression is not registered. expTable.put( exp ); return exp; } } } // expression is already registered. return (Expression)o; } /** * expression cache by closed hash. * * Special care has to be taken wrt threading. * This implementation allows get and put method to be called simulatenously. */ public final static class ClosedHash implements java.io.Serializable { /** The hash table data. */ private Expression table[]; /** The total number of mappings in the hash table. */ private int count; /** * The table is rehashed when its size exceeds this threshold. (The * value of this field is (int)(capacity * loadFactor).) */ private int threshold; /** The load factor for the hashtable. */ private static final float loadFactor = 0.3f; private static final int initialCapacity = 191; /** the parent hash table. * can be null. items in the parent hash table will be returned by * get method. */ private final ClosedHash parent; public ClosedHash() { this(null); } public ClosedHash( ClosedHash parent ) { table = new Expression[initialCapacity]; threshold = (int)(initialCapacity * loadFactor); this.parent = parent; } public Expression get( int hash, Expression left, Expression right, Class type ) { if( parent!=null ) { Expression e = parent.get(hash,left,right,type); if(e!=null) return e; } Expression tab[] = table; int index = (hash & 0x7FFFFFFF) % tab.length; while(true) { final Expression e = tab[index]; if( e==null ) return null; if( e.hashCode()==hash && e.getClass()==type ) { BinaryExp be = (BinaryExp)e; if( be.exp1==left && be.exp2==right ) return be; } index = (index+1)%tab.length; } } public Expression get( int hash, Expression child, Class type ) { if( parent!=null ) { Expression e = parent.get(hash,child,type); if(e!=null) return e; } Expression tab[] = table; int index = (hash & 0x7FFFFFFF) % tab.length; while(true) { final Expression e = tab[index]; if( e==null ) return null; if( e.hashCode()==hash && e.getClass()==type ) { UnaryExp ue = (UnaryExp)e; if( ue.exp==child ) return ue; } index = (index+1)%tab.length; } } public Expression get( Expression key ) { if( parent!=null ) { Expression e = parent.get(key); if(e!=null) return e; } Expression tab[] = table; int index = (key.hashCode() & 0x7FFFFFFF) % tab.length; while(true) { final Expression e = tab[index]; if( e==null ) return null; if( e.equals(key) ) return e; index = (index+1)%tab.length; } } /** * rehash. * * It is possible for one thread to call get method * while another thread is performing rehash. * Keep this in mind. */ private void rehash() { // create a new table first. // meanwhile, other threads can safely access get method. int oldCapacity = table.length; Expression oldMap[] = table; int newCapacity = oldCapacity * 2 + 1; Expression newMap[] = new Expression[newCapacity]; for (int i = oldCapacity ; i-- > 0 ;) if( oldMap[i]!=null ) { int index = (oldMap[i].hashCode() & 0x7FFFFFFF) % newMap.length; while(newMap[index]!=null) index=(index+1)%newMap.length; newMap[index] = oldMap[i]; } // threshold is not accessed by get method. threshold = (int)(newCapacity * loadFactor); // switch! table = newMap; } /** * put method. No two threads can call this method simulatenously, * and it's the caller's responsibility to enforce it. */ public void put(Expression newExp) { if (count >= threshold) rehash(); Expression tab[] = table; int index = (newExp.hashCode() & 0x7FFFFFFF) % tab.length; while(tab[index]!=null) index=(index+1)%tab.length; tab[index] = newExp; count++; } } }