package org.kohsuke.bali.automaton.builder; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.Set; import org.kohsuke.bali.automaton.NameSignature; import com.sun.msv.grammar.AnyNameClass; import com.sun.msv.grammar.AttributeExp; import com.sun.msv.grammar.ChoiceNameClass; import com.sun.msv.grammar.ElementExp; import com.sun.msv.grammar.Grammar; import com.sun.msv.grammar.ReferenceExp; import com.sun.msv.grammar.NameClass; import com.sun.msv.grammar.NamespaceNameClass; import com.sun.msv.grammar.NotNameClass; import com.sun.msv.grammar.SimpleNameClass; import com.sun.msv.grammar.util.ExpressionWalker; import com.sun.msv.grammar.util.NameClassCollisionChecker; import com.sun.msv.util.StringPair; /** * Assigns bit representation to each name * and signature to each name class. * * <p> * NameClasses need to be unified before they are passed to this class. * * @author Kohsuke Kawaguchi (kk@kohsuke.org) */ public class NameClassEncoder { /** Computes name class encoding from a set of NameClasses. */ public static NameClassEncoder build( NameClass[] names ) throws TooComplicatedException { NameClassEncoder nce = new NameClassEncoder(names); nce.doBuild(); return nce; } /** Computes name class encoding from a set of NameClasses. */ public static NameClassEncoder build( Collection names ) throws TooComplicatedException { NameClassEncoder nce = new NameClassEncoder(names); nce.doBuild(); return nce; } /** Computes name class encoding from a grammar. */ public static NameClassEncoder build( Grammar g ) throws TooComplicatedException { final Set names = new HashSet(); // collect all name classes g.getTopLevel().visit(new ExpressionWalker() { public void onAttribute(AttributeExp exp) { if(visitedExps.add(exp)) { names.add(exp.nameClass); exp.exp.visit(this); } } public void onElement(ElementExp exp) { if(visitedExps.add(exp)) { names.add(exp.getNameClass()); exp.contentModel.visit(this); } } public void onRef(ReferenceExp exp) { if(visitedExps.add(exp)) { exp.exp.visit(this); } } private final Set visitedExps = new HashSet(); }); return build(names); } /** Map from NameClass to its Signature. */ public final Map signatures = new HashMap(); /** Name code for literals. Map from StringPair to Integer. */ public final Map literals = new HashMap(); /** number of bits that are used. */ private int usedBits=0; public int getUsedBits() { return usedBits; } /** computed Clusters. */ private Cluster[] clusters; /** used to build encoding but once it's built, this field becomes empty. */ private final Set names = new HashSet(); public final NameClassUnifier unifier = new NameClassUnifier(); /** * A set of independet name classes that share the same bit mask. */ private class Cluster { /** name classes in the ascending order of the test bits. */ final NameClass[] nameClasses; /** True if the above set covers the possible names completely. */ final boolean isCover; /** Number of bits assigned for this cluster. */ final int bitLen; /** position of the least siginificant bit of this cluster in the whole bit masks. */ final int bitPos; Cluster( NameClass[] ncs, boolean cov, int bitLen, int bitPos ) { this.nameClasses = ncs; this.isCover = cov; this.bitLen = bitLen; this.bitPos = bitPos; } } private NameClassEncoder( Collection _names ) { for (Iterator itr = _names.iterator(); itr.hasNext();) { NameClass nc = (NameClass) itr.next(); names.add( unifier.unify(nc) ); } } private NameClassEncoder( NameClass[] _names ) { for( int i=0; i<_names.length; i++ ) names.add( unifier.unify(_names[i]) ); // collision elimination } private void doBuild() throws TooComplicatedException { // collect probe points ProbePointsCollector ppc = new ProbePointsCollector(); for( Iterator itr=names.iterator(); itr.hasNext(); ) ((NameClass)itr.next()).visit(ppc); // list up simple names. List simpleNames = new ArrayList(); for( Iterator itr = names.iterator(); itr.hasNext();) { NameClass nc = (NameClass) itr.next(); if( nc instanceof SimpleNameClass ) simpleNames.add(nc); } ArrayList clusters = new ArrayList(); while( !names.isEmpty() ) { // find an independent set from names. ArrayList c = findIndependentSet(); int count = c.size(); boolean isCover = this.isCover(c); // if c is not a cover, we need one cluster that represents "others" if( !isCover ) count++; // compute the number of bits that need to represent this cluster. int nBits=log2(count); if( usedBits+nBits>32 ) throw new TooComplicatedException(); // we don't have that many bits int mask = createMask(usedBits,nBits); // assign signatures for( int i=0; i<c.size(); i++ ) { NameClass nc = (NameClass)c.get(i); signatures.put( nc, new NameSignature(nc,mask,i<<usedBits,this) ); } clusters.add(new Cluster( (NameClass[]) c.toArray(new NameClass[c.size()]), isCover, nBits, usedBits )); usedBits += nBits; // update names names.removeAll(c); // repeat this process } this.clusters = (Cluster[]) clusters.toArray(new Cluster[clusters.size()]); // determine name code for name literals. for( Iterator itr=ppc.probePoints.iterator(); itr.hasNext(); ) { StringPair literal = (StringPair)itr.next(); int code = getLiteralCode(literal); if(literal.localName!=IMPOSSIBLE) { if( getLiteralCode(new StringPair(literal.namespaceURI,IMPOSSIBLE))==code ) // this literal is of the form "foo:bar" and its encoded representation // is the same as "foo:*". Thus there's no point in having this literal continue; } else if(literal.namespaceURI!=IMPOSSIBLE) { if( getLiteralCode(new StringPair(IMPOSSIBLE,IMPOSSIBLE))==code ) // this literal is of the form "foo:*" and its encoded representation // is the same as "*:*". Thus there's no point in having this literal continue; } literals.put( literal, new Integer(code) ); } // then update the name signature again so that // simple name classes will have -1 as the mask. for( int i=0; i<simpleNames.size(); i++ ) { SimpleNameClass snc = (SimpleNameClass)simpleNames.get(i); int code = getLiteralCode(snc.toStringPair()); signatures.put( snc, new NameSignature(snc,-1,code,this) ); } } /** Obtains the signature computed for the given name class. */ public NameSignature getSignature( NameClass nc ) { NameSignature ns = (NameSignature)signatures.get(unifier.unify(nc)); if(ns==null) throw new InternalError(); // algorithmic error return ns; } /** * Computes the encoded representation of a string literal. * * A literal can contain wildcard in its parameter. */ private int getLiteralCode( StringPair literal ) { int code=0; for( int i=this.clusters.length-1; i>=0; i-- ) { Cluster c = this.clusters[i]; code <<= c.bitLen; int j=0; for( ; j<c.nameClasses.length; j++ ) if( c.nameClasses[j].accepts(literal) ) { code |= j; break; } if(j==c.nameClasses.length) // no match. fill those bits by one code |= (1<<c.bitLen)-1; } return code; } /** * Computes ceil(log2(n)). The number of bits necessary * to represent <i>n</i> distinct things. */ private int log2( int n ) { int nBits=0; n--; while(n!=0) { nBits++; n >>= 1; } return nBits; } private int createMask( int zeroLen, int oneLen ) { int mask=0; for( int i=0; i<oneLen; i++ ) mask = (mask<<1)|1; return mask<<zeroLen; } /** * Finds an independet set from the "names" field. * * Ideally, we'd like to compute the maximum independent set, * but that is a NP-complete problem. */ private ArrayList findIndependentSet() { return findIndependentSet( new ArrayList() ); } /** * Finds an independent set. * * This function computes an independent set by starting with * all the simple name classes. * * <p> * Since most of the name classes are {@link SimpleNameClass}, * and they are independent to each other, this is usually * a good way to get a large independent set. * * <p> * This algorithm can't be applied after the second iteration * because all the simple classes are wiped out in the first iteration. */ private ArrayList findIndependentSet2() { ArrayList result = new ArrayList(); for( Iterator itr = names.iterator(); itr.hasNext();) { NameClass nc = (NameClass)itr.next(); if( nc instanceof SimpleNameClass ) result.add(nc); } return findIndependentSet(result); } /** * Finds an independet set by extending the given set. * * This function simply increases the given set in a greedily manner. * * @param * partial independent set. */ private ArrayList findIndependentSet( ArrayList result ) { NameClassCollisionChecker checker = new NameClassCollisionChecker(); for( Iterator itr = names.iterator(); itr.hasNext();) { NameClass nc = (NameClass)itr.next(); // if a new name doesn't collide with existing ones, take it int i; for( i=0; i<result.size(); i++ ) if( checker.check(nc,(NameClass)result.get(i)) ) break; // collision if( i==result.size() ) result.add(nc); } return result; } /** * Returns true if the parameters together covers the entire * name class space. * * <p> * The formal definition is: * <pre> * isCover( nc1, nc2, ... nck ) * := ( choice(nc1,nc2,nck) == anyName ) * </pre> */ private boolean isCover( Collection nameClasses ) { if( nameClasses.size()==0 ) return false; // build the union of all the given name classes NameClass union=null; for( Iterator itr = nameClasses.iterator(); itr.hasNext(); ) { NameClass nc = (NameClass)itr.next(); if(union==null) union = nc; else union = new ChoiceNameClass(union,nc); } // see if it has any "hole" in it --- return new NotNameClass(union).isNull(); } // // // debug functions // // /** * Dumps all the computed signatures. */ public void dumpSigntures() { System.out.println("name classes"); for( Iterator itr = signatures.entrySet().iterator(); itr.hasNext(); ) { Map.Entry e = (Map.Entry) itr.next(); System.out.println( e.getValue() ); } System.out.println("\n\n"); System.out.println("name literals"); for( Iterator itr=literals.entrySet().iterator(); itr.hasNext(); ) { Map.Entry e = (Map.Entry) itr.next(); System.out.println( toBitString(((Integer)e.getValue()).intValue())+" : "+e.getKey() ); } } private String toBitString( int n ) { String s=""; for( int i=0; i<usedBits; i++ ) if( (n&(1<<i))!=0 ) s='1'+s; else s='0'+s; return s; } // a very simple test public static void main( String[] args ) throws Exception { NameClass ns1 = new NamespaceNameClass("foo"); NameClass ns2 = new NamespaceNameClass("bar"); NameClass ns3 = new NamespaceNameClass("zot"); NameClassEncoder e = NameClassEncoder.build( new NameClass[]{ AnyNameClass.theInstance, new ChoiceNameClass(ns1,ns2), new ChoiceNameClass(ns2,ns3), new ChoiceNameClass(ns3,ns1), ns1, new SimpleNameClass("foo","zot")}); e.dumpSigntures(); } /** Invalid name token constant. */ public static final String IMPOSSIBLE = "\u0000"; }