package org.kohsuke.bali;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import org.kohsuke.bali.automaton.TreeAutomaton;
import org.kohsuke.bali.automaton.builder.*;
import org.kohsuke.bali.datatype.DatatypeLibraryImpl;
import org.kohsuke.bali.optimizer.AttributeReorder;
import org.kohsuke.bali.optimizer.InterleaveStrengthReducer;
import org.kohsuke.bali.optimizer.Unifier;
import org.kohsuke.bali.optimizer.ZeroOrMoreAttributeExpander;
import org.kohsuke.bali.writer.AutomatonDumper;
import org.kohsuke.bali.writer.AutomatonVisualizer;
import org.kohsuke.bali.writer.AutomatonWriter;
import org.kohsuke.bali.writer.CSharpWriter;
import org.kohsuke.bali.writer.Interpreter;
import org.kohsuke.bali.writer.StringBasedEncoder;
import org.kohsuke.bali.writer.JavaWriter;
import org.kohsuke.bali.writer.MultiWriter;
import org.kohsuke.bali.writer.NullWriter;
import org.kohsuke.bali.writer.WinCppWriter;
import org.relaxng.datatype.DatatypeLibrary;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import com.sun.msv.driver.textui.DebugController;
import com.sun.msv.grammar.AttributeExp;
import com.sun.msv.grammar.ElementExp;
import com.sun.msv.grammar.ExpressionPool;
import com.sun.msv.grammar.Grammar;
import com.sun.msv.grammar.ReferenceExp;
import com.sun.msv.grammar.util.ExpressionWalker;
import com.sun.msv.reader.Controller;
import com.sun.msv.reader.dtd.DTDReader;
import com.sun.msv.reader.trex.ng.RELAXNGReader;
import com.sun.msv.reader.util.GrammarLoader;
import com.sun.msv.scanner.dtd.DTDParser;
import com.sun.msv.verifier.jaxp.SAXParserFactoryImpl;
import com.sun.msv.writer.relaxng.RELAXNGWriter;
import runtime.ValidateletImpl;
/**
*
*
* @author Kohsuke Kawaguchi (kk@kohsuke.org)
*/
public class Driver {
private static int usage( String msg ) {
if(msg!=null)
System.err.println(msg);
System.err.println(
"Usage: bali <schema file name> [<instance1> <instance2> ...]\n"+
"\n"+
"Input Options (by default file extension is used)\n"+
" -dtd: input schema file is DTD\n"+
" -rng: input schema file is RELAX NG\n"+
"Optimization Options (all turned on by default. append '-' to turn off (e.g., -u-) \n"+
" -u : compress the automaton by minimizing the input grammar\n"+
" -ir : compress the automaton by reducing <interleave>s to <group>s if possible\n"+
" -ia : compress ignorable attributes\n"+
" -ra : reorder attributes\n"+
" -ss : compress transition table by sharing them across states\n"+
" -mx : improve the runtim efficiency of <mixed>\n"+
" -za : massage zero-or-more attributes\n"+
"Output Options:\n"+
" -oa <format> :\n"+
" dump automaton into a graphic file (gif/ps/png)\n"+
" -oj <target dir> <fully qualified class name> :\n"+
" output Java validatelet\n"+
" -ocs <target dir> <fully qualified class name> :\n"+
" output .NET C# validatelet\n"+
" -ot :\n"+
" dump automaton to a text file\n"+
" -ow <target dir> <namespace> <class name>\n"+
" output Visual C++ validatelet for Win32\n"+
" <namespace> can be '-' to indicate no namespace\n"+
"Other Options:\n"+
" -ne : just encode names in the grammar and quit\n"+
" -v : be verbose\n"+
"\n"+
" instance documents are validated with the interpreter if specified\n");
return -1;
}
public static void main(String[] args) {
System.exit(doMain(args));
}
// schema language constants
private static final int SCHEMALANGUAGE_AUTODETECT = 0;
private static final int SCHEMALANGUAGE_RELAXNG = 1;
private static final int SCHEMALANGUAGE_DTD = 2;
/**
* Executes the program and returns the exit code.
*
* This method will never call <code>System.exit</code>.
*/
public static int doMain(String[] args) {
String grammarName = null;
AutomatonWriter writer = new NullWriter();
// set to non-null if we need an interpreter.
Interpreter interpreter = null;
boolean unify = true;
boolean interleaveReduction = true;
boolean ignorableAttributeOptimization = true;
boolean nameEncodingTest = false;
boolean useEpsilonTransition = true;
boolean optimizeMixed = true;
boolean expandZeroOrMoreAtts = true;
boolean reorderAttributes = true;
boolean verbose = false;
int schemaLanguage = SCHEMALANGUAGE_AUTODETECT;
ArrayList instances = new ArrayList();
TreeAutomaton automaton;
try {
try {
for( int i=0; i<args.length; i++ ) {
String arg = args[i];
if("-/".indexOf(arg.charAt(0))!=-1) {
String opt = arg.substring(1);
if(opt.equals("oa")) {
String type = args[++i];
writer = new MultiWriter( writer,
new AutomatonVisualizer( type, System.out ));
continue;
}
if(opt.equals("oj")) {
File target = new File(args[++i]);
if( !target.isDirectory() )
return usage("Cowardly refuse to output to a non-existent directory: "+target);
String fullClassName = args[++i];
int idx = fullClassName.lastIndexOf('.');
if(idx==-1)
writer = new MultiWriter( writer,
new JavaWriter( "", fullClassName, target ));
else
writer = new MultiWriter( writer,
new JavaWriter(
fullClassName.substring(0,idx),
fullClassName.substring(idx+1),
target ));
continue;
}
if(opt.equals("ocs")) {
File target = new File(args[++i]);
if( !target.isDirectory() )
return usage("Cowardly refuse to output to a non-existent directory: "+target);
String fullClassName = args[++i];
int idx = fullClassName.lastIndexOf('.');
if(idx==-1)
writer = new MultiWriter( writer,
new CSharpWriter( null, fullClassName, target ));
else
writer = new MultiWriter( writer,
new CSharpWriter(
fullClassName.substring(0,idx),
fullClassName.substring(idx+1),
target ));
continue;
}
if(opt.equals("ot")) {
writer = new MultiWriter( writer,
new AutomatonDumper(System.out));
continue;
}
if(opt.equals("ow")) {
File target = new File(args[++i]);
if( !target.isDirectory() )
return usage("Cowardly refuse to output to a non-existent directory: "+target);
String namespace = args[++i];
String className = args[++i];
if(namespace.equals("-")) namespace=null;
writer = new MultiWriter( writer,
new WinCppWriter( namespace, className, target ) );
continue;
}
if(opt.equals("u")) {
unify = true;
continue;
}
if(opt.equals("u-")) {
unify = false;
continue;
}
if(opt.equals("ir")) {
interleaveReduction = true;
continue;
}
if(opt.equals("ir-")) {
interleaveReduction = false;
continue;
}
if(opt.equals("ia")) {
ignorableAttributeOptimization = true;
continue;
}
if(opt.equals("ia-")) {
ignorableAttributeOptimization = false;
continue;
}
if(opt.equals("ss")) {
useEpsilonTransition = true;
continue;
}
if(opt.equals("ss-")) {
useEpsilonTransition = false;
continue;
}
if(opt.equals("ra")) {
reorderAttributes = true;
continue;
}
if(opt.equals("ra-")) {
reorderAttributes = false;
continue;
}
if(opt.equals("mx")) {
optimizeMixed = true;
continue;
}
if(opt.equals("mx-")) {
optimizeMixed = false;
continue;
}
if(opt.equals("za")) {
expandZeroOrMoreAtts = true;
continue;
}
if(opt.equals("za-")) {
expandZeroOrMoreAtts = false;
continue;
}
if(opt.equals("ne")) {
nameEncodingTest = true;
continue;
}
if(opt.equals("v")) {
verbose = true;
continue;
}
if(opt.equals("dtd")) {
schemaLanguage = SCHEMALANGUAGE_DTD;
continue;
}
if(opt.equals("rng")) {
schemaLanguage = SCHEMALANGUAGE_RELAXNG;
continue;
}
return usage("Unknown option: "+arg);
}
if( grammarName!=null ) {
if( interpreter==null ) {
// make sure that the interpreter will run.
interpreter = new Interpreter();
writer = new MultiWriter( writer, interpreter );
}
instances.add(arg);
continue;
}
grammarName = arg;
}
} catch( ArrayIndexOutOfBoundsException e ) {
// incorrect command line parameters can cause this error
return usage("incorrect parameter arity");
}
if( grammarName==null )
return usage("no grammar is given");
if( schemaLanguage==SCHEMALANGUAGE_AUTODETECT ) {
if( grammarName.substring(Math.max(0,grammarName.length()-4)).equals(".rng") )
schemaLanguage = SCHEMALANGUAGE_RELAXNG;
else
if( grammarName.substring(Math.max(0,grammarName.length()-4)).equals(".dtd") )
schemaLanguage = SCHEMALANGUAGE_DTD;
}
System.err.println("parsing the grammar");
Grammar grammar=null;
URL grammarUrl = new File(grammarName).toURL();
try {
switch( schemaLanguage ) {
case SCHEMALANGUAGE_RELAXNG:
grammar = loadRELAXNGGrammar(grammarUrl);
break;
case SCHEMALANGUAGE_DTD:
grammar = loadDTDGrammar(grammarUrl);
break;
default:
grammar = loadOtherGrammar(grammarUrl);
break;
}
} catch( IOException e ) {
e.printStackTrace();
} catch( SAXException e ) {
; // the error should have been reported already
} catch( ParserConfigurationException e ) {
e.printStackTrace();
}
if(grammar==null) {
System.err.println("failed to parse a grammar");
return -2;
}
if( unify ) {
System.err.println("compacting the grammar");
grammar = Unifier.unify(grammar);
}
if( expandZeroOrMoreAtts ) {
System.err.println("massaging zero or more attributes");
grammar = ZeroOrMoreAttributeExpander.optimize(grammar);
}
if( interleaveReduction ) {
System.err.println("reducing <interleave>s");
grammar = InterleaveStrengthReducer.optimize(grammar);
}
if( reorderAttributes ) {
System.err.println("reordering attributes");
grammar = AttributeReorder.optimize(grammar);
}
if(nameEncodingTest) {
testNameEncoding(grammar);
return 0;
}
System.err.println("building the automaton");
if( ignorableAttributeOptimization )
System.err.println(" + optimize ignorable optional attributes");
if( useEpsilonTransition )
System.err.println(" + compress transition table");
if( optimizeMixed )
System.err.println(" + optimize <mixed>");
automaton = TreeAutomatonBuilder.build(
grammar,
ignorableAttributeOptimization,
useEpsilonTransition,
optimizeMixed);
System.err.println(automaton.countStates()+" states and "+automaton.countTransitions()+" transitions");
// convert this automaton into whatever form the user wants.
writer.write(automaton);
// } catch( SAXException e ) {
// ; // should have been reported
// e.printStackTrace();
// return -2;
} catch( IOException e ) {
printException(e,verbose);
return -2;
} catch( TooComplicatedException e ) {
System.out.println("the grammar is too big");
return -2;
}
if( instances.isEmpty() )
return 0; // quit here
//
// run interpreter
//
try {
// create a configured SAX parser factory
SAXParserFactory spf = SAXParserFactory.newInstance();
spf.setNamespaceAware(true);
spf.setValidating(false);
XMLReader reader = spf.newSAXParser().getXMLReader();
ValidateletImpl v = interpreter.createValidatelet();
reader.setContentHandler(v);
for (Iterator itr = instances.iterator(); itr.hasNext();) {
String name = (String) itr.next();
System.out.println("validating "+name);
reader.parse(new File(name).toURL().toExternalForm());
System.out.println("valid");
}
} catch( SAXException e ) {
printException(e,verbose);
return 0;
} catch( IOException e ) {
printException(e,verbose);
return 0;
} catch( ParserConfigurationException e ) {
printException(e,verbose);
return 0;
}
return 0;
}
private static void printException( Exception e, boolean verbose ) {
if( verbose )
e.printStackTrace();
else
System.err.println("ERROR: "+e.getMessage());
}
/**
* Changes the file extension to a new one.
*
* @param newExt
* something like "abc" without a dot.
*/
private static String replaceExtension( String pathName, String newExt ) {
int idx1 = pathName.lastIndexOf(File.separatorChar);
int idx2 = pathName.lastIndexOf('.');
if( idx1<idx2 )
return pathName.substring(0,idx2)+'.'+newExt;
else
return pathName+"."+newExt;
}
/**
* Encode all names in the grammar and dumps it to the screen.
*/
public static void testNameEncoding( Grammar g ) throws TooComplicatedException {
NameClassEncoder e = NameClassEncoder.build(g);
e.dumpSigntures();
}
/**
* Loads a RELAX NG grammar.
*
* @return null
* If failed to parse a schema
*/
public static Grammar loadRELAXNGGrammar( URL url ) {
RELAXNGReader reader = createRELAXNGReader();
reader.parse( url.toExternalForm() );
return reader.getResult();
}
public static RELAXNGReader createRELAXNGReader() {
SAXParserFactory spf = new SAXParserFactoryImpl(RELAXNGReader.getRELAXNGSchema4Schema());
spf.setNamespaceAware(true);
spf.setValidating(false);
RELAXNGReader reader = new RELAXNGReader(
new DebugController(true),
spf,
new RELAXNGReader.StateFactory() {
// return our DatatypeLibrary implementation
public DatatypeLibrary getDatatypeLibrary( String namespaceURI ) {
return new DatatypeLibraryImpl(namespaceURI);
}
},
new ExpressionPool() );
return reader;
}
/**
* Loads a DTD grammar
*
* @return null
* If failed to parse a schema
*/
public static Grammar loadDTDGrammar( URL url ) throws IOException, SAXException {
DebugController controller = new DebugController(true);
DTDReader reader = new DTDReader(controller,new ExpressionPool());
reader.setDatatypeLibrary(new DatatypeLibraryImpl("http://www.w3.org/2001/XMLSchema-datatypes"));
DTDParser parser = new DTDParser();
parser.setDtdHandler(reader);
parser.setEntityResolver(controller);
parser.parse(url.toExternalForm());
return reader.getResult();
}
/**
* Uses RELAXNGWriter to transform other grammars into RELAX NG.
*
* @return null
* If failed to parse a schema
*/
public static Grammar loadOtherGrammar( URL url ) throws IOException, ParserConfigurationException, SAXException {
// parse it once
DebugController controller = new DebugController(true);
GrammarLoader loader = new GrammarLoader();
loader.setController(controller);
loader.setStrictCheck(true);
Grammar grammar = loader.parse(url.toExternalForm());
if(grammar==null) return null; // failed to parse
// then write it as RELAX NG, and re-parse it as RELAX NG
ByteArrayOutputStream baos = new ByteArrayOutputStream();
RELAXNGWriter writer = new RELAXNGWriter();
// removing dependence on org.apache.xml.serialize.*
throw new UnsupportedOperationException("loadOtherGrammar");
}
}