package org.kohsuke.bali; import java.io.ByteArrayInputStream; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileWriter; import java.io.IOException; import java.net.URL; import java.util.ArrayList; import java.util.HashSet; import java.util.Iterator; import java.util.Set; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParserFactory; import org.kohsuke.bali.automaton.TreeAutomaton; import org.kohsuke.bali.automaton.builder.*; import org.kohsuke.bali.datatype.DatatypeLibraryImpl; import org.kohsuke.bali.optimizer.AttributeReorder; import org.kohsuke.bali.optimizer.InterleaveStrengthReducer; import org.kohsuke.bali.optimizer.Unifier; import org.kohsuke.bali.optimizer.ZeroOrMoreAttributeExpander; import org.kohsuke.bali.writer.AutomatonDumper; import org.kohsuke.bali.writer.AutomatonVisualizer; import org.kohsuke.bali.writer.AutomatonWriter; import org.kohsuke.bali.writer.CSharpWriter; import org.kohsuke.bali.writer.Interpreter; import org.kohsuke.bali.writer.StringBasedEncoder; import org.kohsuke.bali.writer.JavaWriter; import org.kohsuke.bali.writer.MultiWriter; import org.kohsuke.bali.writer.NullWriter; import org.kohsuke.bali.writer.WinCppWriter; import org.relaxng.datatype.DatatypeLibrary; import org.xml.sax.InputSource; import org.xml.sax.SAXException; import org.xml.sax.XMLReader; import com.sun.msv.driver.textui.DebugController; import com.sun.msv.grammar.AttributeExp; import com.sun.msv.grammar.ElementExp; import com.sun.msv.grammar.ExpressionPool; import com.sun.msv.grammar.Grammar; import com.sun.msv.grammar.ReferenceExp; import com.sun.msv.grammar.util.ExpressionWalker; import com.sun.msv.reader.Controller; import com.sun.msv.reader.dtd.DTDReader; import com.sun.msv.reader.trex.ng.RELAXNGReader; import com.sun.msv.reader.util.GrammarLoader; import com.sun.msv.scanner.dtd.DTDParser; import com.sun.msv.verifier.jaxp.SAXParserFactoryImpl; import com.sun.msv.writer.relaxng.RELAXNGWriter; import runtime.ValidateletImpl; /** * * * @author Kohsuke Kawaguchi (kk@kohsuke.org) */ public class Driver { private static int usage( String msg ) { if(msg!=null) System.err.println(msg); System.err.println( "Usage: bali <schema file name> [<instance1> <instance2> ...]\n"+ "\n"+ "Input Options (by default file extension is used)\n"+ " -dtd: input schema file is DTD\n"+ " -rng: input schema file is RELAX NG\n"+ "Optimization Options (all turned on by default. append '-' to turn off (e.g., -u-) \n"+ " -u : compress the automaton by minimizing the input grammar\n"+ " -ir : compress the automaton by reducing <interleave>s to <group>s if possible\n"+ " -ia : compress ignorable attributes\n"+ " -ra : reorder attributes\n"+ " -ss : compress transition table by sharing them across states\n"+ " -mx : improve the runtim efficiency of <mixed>\n"+ " -za : massage zero-or-more attributes\n"+ "Output Options:\n"+ " -oa <format> :\n"+ " dump automaton into a graphic file (gif/ps/png)\n"+ " -oj <target dir> <fully qualified class name> :\n"+ " output Java validatelet\n"+ " -ocs <target dir> <fully qualified class name> :\n"+ " output .NET C# validatelet\n"+ " -ot :\n"+ " dump automaton to a text file\n"+ " -ow <target dir> <namespace> <class name>\n"+ " output Visual C++ validatelet for Win32\n"+ " <namespace> can be '-' to indicate no namespace\n"+ "Other Options:\n"+ " -ne : just encode names in the grammar and quit\n"+ " -v : be verbose\n"+ "\n"+ " instance documents are validated with the interpreter if specified\n"); return -1; } public static void main(String[] args) { System.exit(doMain(args)); } // schema language constants private static final int SCHEMALANGUAGE_AUTODETECT = 0; private static final int SCHEMALANGUAGE_RELAXNG = 1; private static final int SCHEMALANGUAGE_DTD = 2; /** * Executes the program and returns the exit code. * * This method will never call <code>System.exit</code>. */ public static int doMain(String[] args) { String grammarName = null; AutomatonWriter writer = new NullWriter(); // set to non-null if we need an interpreter. Interpreter interpreter = null; boolean unify = true; boolean interleaveReduction = true; boolean ignorableAttributeOptimization = true; boolean nameEncodingTest = false; boolean useEpsilonTransition = true; boolean optimizeMixed = true; boolean expandZeroOrMoreAtts = true; boolean reorderAttributes = true; boolean verbose = false; int schemaLanguage = SCHEMALANGUAGE_AUTODETECT; ArrayList instances = new ArrayList(); TreeAutomaton automaton; try { try { for( int i=0; i<args.length; i++ ) { String arg = args[i]; if("-/".indexOf(arg.charAt(0))!=-1) { String opt = arg.substring(1); if(opt.equals("oa")) { String type = args[++i]; writer = new MultiWriter( writer, new AutomatonVisualizer( type, System.out )); continue; } if(opt.equals("oj")) { File target = new File(args[++i]); if( !target.isDirectory() ) return usage("Cowardly refuse to output to a non-existent directory: "+target); String fullClassName = args[++i]; int idx = fullClassName.lastIndexOf('.'); if(idx==-1) writer = new MultiWriter( writer, new JavaWriter( "", fullClassName, target )); else writer = new MultiWriter( writer, new JavaWriter( fullClassName.substring(0,idx), fullClassName.substring(idx+1), target )); continue; } if(opt.equals("ocs")) { File target = new File(args[++i]); if( !target.isDirectory() ) return usage("Cowardly refuse to output to a non-existent directory: "+target); String fullClassName = args[++i]; int idx = fullClassName.lastIndexOf('.'); if(idx==-1) writer = new MultiWriter( writer, new CSharpWriter( null, fullClassName, target )); else writer = new MultiWriter( writer, new CSharpWriter( fullClassName.substring(0,idx), fullClassName.substring(idx+1), target )); continue; } if(opt.equals("ot")) { writer = new MultiWriter( writer, new AutomatonDumper(System.out)); continue; } if(opt.equals("ow")) { File target = new File(args[++i]); if( !target.isDirectory() ) return usage("Cowardly refuse to output to a non-existent directory: "+target); String namespace = args[++i]; String className = args[++i]; if(namespace.equals("-")) namespace=null; writer = new MultiWriter( writer, new WinCppWriter( namespace, className, target ) ); continue; } if(opt.equals("u")) { unify = true; continue; } if(opt.equals("u-")) { unify = false; continue; } if(opt.equals("ir")) { interleaveReduction = true; continue; } if(opt.equals("ir-")) { interleaveReduction = false; continue; } if(opt.equals("ia")) { ignorableAttributeOptimization = true; continue; } if(opt.equals("ia-")) { ignorableAttributeOptimization = false; continue; } if(opt.equals("ss")) { useEpsilonTransition = true; continue; } if(opt.equals("ss-")) { useEpsilonTransition = false; continue; } if(opt.equals("ra")) { reorderAttributes = true; continue; } if(opt.equals("ra-")) { reorderAttributes = false; continue; } if(opt.equals("mx")) { optimizeMixed = true; continue; } if(opt.equals("mx-")) { optimizeMixed = false; continue; } if(opt.equals("za")) { expandZeroOrMoreAtts = true; continue; } if(opt.equals("za-")) { expandZeroOrMoreAtts = false; continue; } if(opt.equals("ne")) { nameEncodingTest = true; continue; } if(opt.equals("v")) { verbose = true; continue; } if(opt.equals("dtd")) { schemaLanguage = SCHEMALANGUAGE_DTD; continue; } if(opt.equals("rng")) { schemaLanguage = SCHEMALANGUAGE_RELAXNG; continue; } return usage("Unknown option: "+arg); } if( grammarName!=null ) { if( interpreter==null ) { // make sure that the interpreter will run. interpreter = new Interpreter(); writer = new MultiWriter( writer, interpreter ); } instances.add(arg); continue; } grammarName = arg; } } catch( ArrayIndexOutOfBoundsException e ) { // incorrect command line parameters can cause this error return usage("incorrect parameter arity"); } if( grammarName==null ) return usage("no grammar is given"); if( schemaLanguage==SCHEMALANGUAGE_AUTODETECT ) { if( grammarName.substring(Math.max(0,grammarName.length()-4)).equals(".rng") ) schemaLanguage = SCHEMALANGUAGE_RELAXNG; else if( grammarName.substring(Math.max(0,grammarName.length()-4)).equals(".dtd") ) schemaLanguage = SCHEMALANGUAGE_DTD; } System.err.println("parsing the grammar"); Grammar grammar=null; URL grammarUrl = new File(grammarName).toURL(); try { switch( schemaLanguage ) { case SCHEMALANGUAGE_RELAXNG: grammar = loadRELAXNGGrammar(grammarUrl); break; case SCHEMALANGUAGE_DTD: grammar = loadDTDGrammar(grammarUrl); break; default: grammar = loadOtherGrammar(grammarUrl); break; } } catch( IOException e ) { e.printStackTrace(); } catch( SAXException e ) { ; // the error should have been reported already } catch( ParserConfigurationException e ) { e.printStackTrace(); } if(grammar==null) { System.err.println("failed to parse a grammar"); return -2; } if( unify ) { System.err.println("compacting the grammar"); grammar = Unifier.unify(grammar); } if( expandZeroOrMoreAtts ) { System.err.println("massaging zero or more attributes"); grammar = ZeroOrMoreAttributeExpander.optimize(grammar); } if( interleaveReduction ) { System.err.println("reducing <interleave>s"); grammar = InterleaveStrengthReducer.optimize(grammar); } if( reorderAttributes ) { System.err.println("reordering attributes"); grammar = AttributeReorder.optimize(grammar); } if(nameEncodingTest) { testNameEncoding(grammar); return 0; } System.err.println("building the automaton"); if( ignorableAttributeOptimization ) System.err.println(" + optimize ignorable optional attributes"); if( useEpsilonTransition ) System.err.println(" + compress transition table"); if( optimizeMixed ) System.err.println(" + optimize <mixed>"); automaton = TreeAutomatonBuilder.build( grammar, ignorableAttributeOptimization, useEpsilonTransition, optimizeMixed); System.err.println(automaton.countStates()+" states and "+automaton.countTransitions()+" transitions"); // convert this automaton into whatever form the user wants. writer.write(automaton); // } catch( SAXException e ) { // ; // should have been reported // e.printStackTrace(); // return -2; } catch( IOException e ) { printException(e,verbose); return -2; } catch( TooComplicatedException e ) { System.out.println("the grammar is too big"); return -2; } if( instances.isEmpty() ) return 0; // quit here // // run interpreter // try { // create a configured SAX parser factory SAXParserFactory spf = SAXParserFactory.newInstance(); spf.setNamespaceAware(true); spf.setValidating(false); XMLReader reader = spf.newSAXParser().getXMLReader(); ValidateletImpl v = interpreter.createValidatelet(); reader.setContentHandler(v); for (Iterator itr = instances.iterator(); itr.hasNext();) { String name = (String) itr.next(); System.out.println("validating "+name); reader.parse(new File(name).toURL().toExternalForm()); System.out.println("valid"); } } catch( SAXException e ) { printException(e,verbose); return 0; } catch( IOException e ) { printException(e,verbose); return 0; } catch( ParserConfigurationException e ) { printException(e,verbose); return 0; } return 0; } private static void printException( Exception e, boolean verbose ) { if( verbose ) e.printStackTrace(); else System.err.println("ERROR: "+e.getMessage()); } /** * Changes the file extension to a new one. * * @param newExt * something like "abc" without a dot. */ private static String replaceExtension( String pathName, String newExt ) { int idx1 = pathName.lastIndexOf(File.separatorChar); int idx2 = pathName.lastIndexOf('.'); if( idx1<idx2 ) return pathName.substring(0,idx2)+'.'+newExt; else return pathName+"."+newExt; } /** * Encode all names in the grammar and dumps it to the screen. */ public static void testNameEncoding( Grammar g ) throws TooComplicatedException { NameClassEncoder e = NameClassEncoder.build(g); e.dumpSigntures(); } /** * Loads a RELAX NG grammar. * * @return null * If failed to parse a schema */ public static Grammar loadRELAXNGGrammar( URL url ) { RELAXNGReader reader = createRELAXNGReader(); reader.parse( url.toExternalForm() ); return reader.getResult(); } public static RELAXNGReader createRELAXNGReader() { SAXParserFactory spf = new SAXParserFactoryImpl(RELAXNGReader.getRELAXNGSchema4Schema()); spf.setNamespaceAware(true); spf.setValidating(false); RELAXNGReader reader = new RELAXNGReader( new DebugController(true), spf, new RELAXNGReader.StateFactory() { // return our DatatypeLibrary implementation public DatatypeLibrary getDatatypeLibrary( String namespaceURI ) { return new DatatypeLibraryImpl(namespaceURI); } }, new ExpressionPool() ); return reader; } /** * Loads a DTD grammar * * @return null * If failed to parse a schema */ public static Grammar loadDTDGrammar( URL url ) throws IOException, SAXException { DebugController controller = new DebugController(true); DTDReader reader = new DTDReader(controller,new ExpressionPool()); reader.setDatatypeLibrary(new DatatypeLibraryImpl("http://www.w3.org/2001/XMLSchema-datatypes")); DTDParser parser = new DTDParser(); parser.setDtdHandler(reader); parser.setEntityResolver(controller); parser.parse(url.toExternalForm()); return reader.getResult(); } /** * Uses RELAXNGWriter to transform other grammars into RELAX NG. * * @return null * If failed to parse a schema */ public static Grammar loadOtherGrammar( URL url ) throws IOException, ParserConfigurationException, SAXException { // parse it once DebugController controller = new DebugController(true); GrammarLoader loader = new GrammarLoader(); loader.setController(controller); loader.setStrictCheck(true); Grammar grammar = loader.parse(url.toExternalForm()); if(grammar==null) return null; // failed to parse // then write it as RELAX NG, and re-parse it as RELAX NG ByteArrayOutputStream baos = new ByteArrayOutputStream(); RELAXNGWriter writer = new RELAXNGWriter(); // removing dependence on org.apache.xml.serialize.* throw new UnsupportedOperationException("loadOtherGrammar"); } }