/* * This file or a portion of this file is licensed under the terms of * the Globus Toolkit Public License, found in file ../GTPL, or at * http://www.globus.org/toolkit/download/license.html. This notice must * appear in redistributions of this file, with or without modification. * * Redistributions of this Software, with or without modification, must * reproduce the GTPL in: (1) the Software, or (2) the Documentation or * some other similar material which is provided with the Software (if * any). * * Copyright 1999-2004 University of Chicago and The University of * Southern California. All rights reserved. */ package org.griphyn.vdl.parser; import org.griphyn.vdl.classes.*; import org.griphyn.vdl.util.Logging; // Xerces import org.xml.sax.*; import javax.xml.parsers.*; import java.io.*; /** * This class uses the Xerces SAX2 parser to validate and parse an XML * document. The content handler <code>VDLContentHandler</code> and * error handler <code>VDLErrorHandler</code> are necessary to handle * various callbacks. * @author Jens-S. Vöckler * @author Yong Zhao * @version $Revision$ * @see VDLContentHandler * @see VDLErrorHandler */ public class VDLxParser { /** * Default parser is the Xerces parser. */ protected static final String vendorParserClass = "org.apache.xerces.parsers.SAXParser"; /** * Holds the instance of a {@link org.xml.sax.XMLReader} class. */ private XMLReader m_parser; /** * Handles the filling in of content, and callbacks to the * {@link DefinitionHandler} interface. */ private VDLContentHandler m_contentHandler; /** * Sets a feature while capturing failed features right here. * * @param uri is the feature's URI to modify * @param flag is the new value to set. * @return true, if the feature could be set, false for an exception */ private boolean set( String uri, boolean flag ) { boolean result = false; try { this.m_parser.setFeature( uri, flag ); result = true; } catch ( SAXException se ) { Logging.instance().log( "default", 0, "Could not set parser feature " + se.getMessage() ); } return result; } /** * The class constructor. This function initializes the Xerces parser * and the features that enable schema validation. * * @param schemaLocation is the default location of the XML Schema * which this parser is capable of parsing. It may be null to use * the defaults provided in the document. */ public VDLxParser( String schemaLocation ) { try { m_parser = (XMLReader) Class.forName(vendorParserClass).newInstance(); m_contentHandler = new VDLContentHandler(); m_parser.setContentHandler(m_contentHandler); m_parser.setErrorHandler(new VDLErrorHandler()); set( "http://xml.org/sax/features/validation", true ); set( "http://apache.org/xml/features/validation/dynamic", true ); set( "http://apache.org/xml/features/validation/schema", true ); // time+memory consuming, see http://xml.apache.org/xerces2-j/features.html // set( "http://apache.org/xml/features/validation/schema-full-checking", true ); // Send XML Schema element default values via characters(). set( "http://apache.org/xml/features/validation/schema/element-default", true ); set( "http://apache.org/xml/features/validation/warn-on-duplicate-attdef", true ); // mysteriously, this one fails with recent Xerces // set( "http://apache.org/xml/features/validation/warn-on-undeclared-elemdef", true ); set( "http://apache.org/xml/features/warn-on-duplicate-entitydef", true ); // set the schema default location. if ( schemaLocation != null ) { setSchemaLocations( Definitions.SCHEMA_NAMESPACE + ' ' + schemaLocation ); Logging.instance().log("parser", 0, "will use " + schemaLocation ); } else { Logging.instance().log("parser", 0, "will use document schema hint" ); } } catch (ClassNotFoundException e) { Logging.instance().log( "defaut", 0, "The SAXParser class was not found: " + e); } catch (InstantiationException e) { Logging.instance().log( "default", 0, "The SAXParser class could not be instantiated: " + e); } catch (IllegalAccessException e) { Logging.instance().log( "default", 0, "The SAXParser class could not be accessed: " + e); } } /** * Sets the list of external real locations where the XML schema may * be found. Since this list can be determined at run-time through * properties etc., we expect this function to be called between * instantiating the parser, and using the parser. * * @param list is a list of strings representing schema locations. The * content exists in pairs, one of the namespace URI, one of the * location URL. */ public void setSchemaLocations( String list ) { /* // default place to add list += "http://www.griphyn.org/working_groups/VDS/vdl-1.24.xsd " + "http://www.griphyn.org/working_groups/VDS/vdl-1.24.xsd"; */ // schema location handling try { m_parser.setProperty( "http://apache.org/xml/properties/schema/external-schemaLocation", list ); } catch ( SAXException se ) { Logging.instance().log( "default", 0, "The SAXParser reported an error: " + se ); } } /** * Sets the list of external real locations where the XML schema may * be found when no namespace is active. Only one location can be * specified. We expect this function to be called between * instantiating the parser, and using the parser. * * @param location is the location of the schema file (location URL). */ public void setDefaultSchemaLocation( String location ) { /* // default place to add list += "http://www.griphyn.org/working_groups/VDS/vdl-1.19.xsd " + "http://www.griphyn.org/working_groups/VDS/vdl-1.19.xsd"; */ // schema location handling try { m_parser.setProperty( "http://apache.org/xml/properties/schema/external-noNamespaceSchemaLocation", location ); } catch ( SAXException se ) { Logging.instance().log( "default", 0, "The SAXParser reported an error: " + se ); } } /** * This function parses a XML source from an InputStream source, and * creates java class instances that correspond to different elements * in the XML source. * * @param reader is a bytestream opened for reading. * @param definitions is a reference to the already known definitions in * the system. The definitions may be empty, but must not be null. * @param overwrite is a flag to indicate the insertion mode. If set to * <code>false</code>, an insert mode is assumed. Violations will be * returned as clashes. With value <code>true</code>, an update mode * is assumed. Old definitions of updates will be returned. * @param dontcare is a flag to minimize memory consumption. Clashes in * insert mode will be signalled with an Exception. Old values in update * mode will be ignored. Effectively, the resulting list is always empty * in dontcare mode. * * @return usually an empty list. If not empty, it contains clashes in * insert, or old definitions in update mode. Please note that each * element is a single Definition, either Transformation or * Derivation. It is not a Definitions object, since multiple old * versions may appear in update mode. Returns null on error! * * @see org.griphyn.vdl.classes.Definitions */ public java.util.List parse( java.io.InputStream reader, Definitions definitions, boolean overwrite, boolean dontcare ) { try { MemoryStorage database = new MemoryStorage( definitions, overwrite, dontcare ); m_contentHandler.setDefinitionHandler(database); m_parser.parse( new InputSource(reader) ); java.util.List result = database.getRejects(); Logging.instance().log( "parser", 1, "Now with " + definitions.getDefinitionCount() + " definitions, and " + result.size() + " rejects" ); return result; } catch (SAXException e) { Logging.instance().log( "default", 0, "SAX Error: " + e ); } catch (IOException e) { Logging.instance().log( "default", 0, "IO Error: " + e ); } return null; } /** * This function parses an XML source (could be a document, a stream, * etc.), and creates java class instances that correspond to * different elements in the XML source. * * @param reader is an XML input source, which may be a character stream, * byte stream, or even an URI. * @param callback is a handler for store callbacks that will take * one complete definition each time one is ready to be processed. * * @return true for successful parsing, false in case of error. * @see org.griphyn.vdl.classes.Definitions */ public boolean parse( InputSource reader, DefinitionHandler callback ) { try { m_contentHandler.setDefinitionHandler(callback); m_parser.parse(reader); return true; } catch (SAXException e) { Logging.instance().log( "default", 0, "SAX Error: " + e ); } catch (IOException e) { Logging.instance().log( "default", 0, "IO Error: " + e ); } return false; } /** * This function parses an XML source (could be a document, a stream, * etc.), and invokes a callback for the top-level element with the * corresponding Java class. Note: The finalizer cannot be called for * Definitions elements. This method should be used for "partial VDLx", * which contains XML for a Transformation or Derivation. * * @param reader is an XML input source, which may be a character stream, * byte stream, or even an URI. * @param callback is a handler for store callbacks that will take * one complete definition. * * @return true for successful parsing, false in case of error. * @see org.griphyn.vdl.classes.Definitions */ public boolean parse( InputSource reader, FinalizerHandler callback ) { try { m_contentHandler.setFinalizerHandler(callback); m_parser.parse(reader); return true; } catch (SAXException e) { Logging.instance().log( "default", 0, "SAX Error: " + e ); } catch (IOException e) { Logging.instance().log( "default", 0, "IO Error: " + e ); } return false; } // public Definitions parse(String xmlURI); // public Definitions parse(InputStream stream); // public Definitions parse(java.io.Reader reader); }