/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.client;
import java.lang.System;
import java.io.File;
import java.text.DecimalFormat;
import java.io.IOException;
import org.xml.sax.Attributes;
import org.xml.sax.XMLReader;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.SAXNotRecognizedException;
import org.xml.sax.SAXNotSupportedException;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import org.apache.xerces.impl.Version;
/**
* This class reads to validate a DAX document. It requires at least Xerces-J 2.10.
*
* @author: Jens-S. Vöckler
* @version: $Id$
*/
public class DAXValidator extends DefaultHandler
{
/**
* Default parser is the Xerces parser.
*/
protected static final String vendorParserClass =
"org.apache.xerces.parsers.SAXParser";
/**
* URI namespace for DAX schema.
*/
public static final String SCHEMA_NAMESPACE =
"http://pegasus.isi.edu/schema/DAX";
/**
* what is the name of the schema file in the filename hint?
*/
private String m_schemafile = "dax-3.3.xsd";
/**
* Holds the instance of a {@link org.xml.sax.XMLReader} class.
*/
private XMLReader m_reader;
/**
* Keep the location within the document.
*/
private Locator m_location;
/**
* How verbose should we be?
*/
protected boolean m_verbose;
/**
* Counts the number of warnings.
*/
protected int m_warnings;
/**
* Counts the number of errors.
*/
protected int m_errors;
/**
* Counts the number of fatal errors.
*/
protected int m_fatals;
/**
* Sets a feature while capturing failed features right here.
*
* @param uri is the feature's URI to modify
* @param flag is the new value to set.
* @return true, if the feature could be set, false for an exception
*/
private boolean set( String uri, boolean flag )
{
boolean result = false;
try {
this.m_reader.setFeature( uri, flag );
result = true;
} catch ( SAXNotRecognizedException e ) {
System.err.println( "Unrecognized feature " + uri + ": " + e );
} catch ( SAXNotSupportedException e ) {
System.err.println( "Unsupported feature " + uri + ": " + e );
} catch ( SAXException e ) {
System.err.println( "Parser feature error: " + e );
}
return result;
}
/**
* Sets a SAX property while capturing failed features right here.
*
* @param uri is the property's URI to modify
* @param value is the new value to set.
* @return true, if the feature could be set, false for an exception
*/
private boolean prop( String uri, Object value )
{
boolean result = false;
try {
this.m_reader.setProperty( uri, value );
result = true;
} catch ( SAXNotRecognizedException e ) {
System.err.println( "Unrecognized property " + uri + ": " + e );
} catch ( SAXNotSupportedException e ) {
System.err.println( "Unsupported property " + uri + ": " + e );
} catch ( SAXException e ) {
System.err.println( "Parser property error: " + e );
}
return result;
}
/**
* default c'tor
*/
public DAXValidator( boolean verbose )
throws Exception
{
m_reader = XMLReaderFactory.createXMLReader(vendorParserClass);
m_reader.setContentHandler(this);
m_reader.setErrorHandler(this);
m_verbose = verbose;
m_warnings = m_errors = m_fatals = 0;
if ( m_verbose ) {
System.err.println( "# XMLReader is " +
org.apache.xerces.impl.Version.getVersion() );
}
//
// turn on almost all features that we can safely turn on.
// WARNING: The features below assume Xerces-J 2.10 or greater.
//
// Perform namespace processing: prefixes will be stripped off
// element and attribute names and replaced with the corresponding
// namespace URIs. By default, the two will simply be concatenated,
// but the namespace-sep core property allows the application to
// specify a delimiter string for separating the URI part and the
// local part.
set( "http://xml.org/sax/features/namespaces", true );
// The methods of the org.xml.sax.ext.EntityResolver2 interface will
// be used when an object implementing this interface is registered
// with the parser using setEntityResolver.
//
// If the disallow DOCTYPE declaration feature is set to true
// org.xml.sax.ext.EntityResolver2.getExternalSubset() will not be
// called when the document contains no DOCTYPE declaration.
set( "http://xml.org/sax/features/use-entity-resolver2", true );
// Validate the document and report validity errors.
//
// If this feature is set to true, the document must specify a
// grammar. By default, validation will occur against DTD. For more
// information, please, refer to the FAQ. If this feature is set to
// false, and document specifies a grammar that grammar might be
// parsed but no validation of the document contents will be
// performed.
set( "http://xml.org/sax/features/validation", true );
// true: The parser will validate the document only if a grammar is
// specified.
// false: Validation is determined by the state of the validation
// feature.
set( "http://apache.org/xml/features/validation/dynamic", false );
// Turn on XML Schema validation by inserting an XML Schema
// validator into the pipeline.
//
// Validation errors will only be reported if the validation feature
// is set to true. For more information, please, refer to the FAQ.
//
// Checking of constraints on a schema grammar which are either
// time-consuming or memory intensive such as unique particle
// attribution will only occur if the schema full checking feature
// is set to true.
set( "http://apache.org/xml/features/validation/schema", true );
// Enable full schema grammar constraint checking, including
// checking which may be time-consuming or memory
// intensive. Currently, unique particle attribution constraint
// checking and particle derivation restriction checking are
// controlled by this option.
//
// This feature checks the Schema grammar itself for additional
// errors that are time-consuming or memory intensive. It does not
// affect the level of checking performed on document instances that
// use Schema grammars.
set( "http://apache.org/xml/features/validation/schema-full-checking", true );
// Expose via SAX and DOM XML Schema normalized values for
// attributes and elements.
//
// XML Schema normalized values will be exposed only if both schema
// validation and validation features are set to true.
set( "http://apache.org/xml/features/validation/schema/normalized-value", true );
// Send XML Schema element default values via characters().
//
// XML Schema default values will be send via characters() if both
// schema validation and validation features are set to true.
set( "http://apache.org/xml/features/validation/schema/element-default", true );
// Augment Post-Schema-Validation-Infoset.
//
// This feature can be turned off to improve parsing performance.
set( "http://apache.org/xml/features/validation/schema/augment-psvi", true );
// xsi:type attributes will be ignored until a global element
// declaration has been found, at which point xsi:type attributes
// will be processed on the element for which the global element
// declaration was found as well as its descendants.
set( "http://apache.org/xml/features/validation/schema/ignore-xsi-type-until-elemdecl", true );
// Enable generation of synthetic annotations. A synthetic
// annotation will be generated when a schema component has
// non-schema attributes but no child annotation.
set( "http://apache.org/xml/features/generate-synthetic-annotations", true );
// Schema annotations will be laxly validated against available
// schema components.
set( "http://apache.org/xml/features/validate-annotations", true );
// All schema location hints will be used to locate the components
// for a given target namespace.
set( "http://apache.org/xml/features/honour-all-schemaLocations", true );
// Include external general entities.
set( "http://xml.org/sax/features/external-general-entities", true );
// Include external parameter entities and the external DTD subset.
set( "http://xml.org/sax/features/external-parameter-entities", true );
// Construct an optimal representation for DTD content models to
// significantly reduce the likelihood a StackOverflowError will
// occur when large content models are processed.
//
// Enabling this feature may cost your application some performance
// when DTDs are processed so it is recommended that it only be
// turned on when necessary.
set( "http://apache.org/xml/features/validation/balance-syntax-trees", true );
// Enable checking of ID/IDREF constraints.
//
// This feature only applies to schema validation.
set( "http://apache.org/xml/features/validation/id-idref-checking", true );
// Enable identity constraint checking.
set( "http://apache.org/xml/features/validation/identity-constraint-checking", true );
// Check that each value of type ENTITY matches the name of an
// unparsed entity declared in the DTD.
//
// This feature only applies to schema validation.
set( "http://apache.org/xml/features/validation/unparsed-entity-checking", true );
// Report a warning when a duplicate attribute is re-declared.
set( "http://apache.org/xml/features/validation/warn-on-duplicate-attdef", true );
// Report a warning if an element referenced in a content model is
// not declared.
set( "http://apache.org/xml/features/validation/warn-on-undeclared-elemdef", true );
// Report a warning for duplicate entity declaration.
set( "http://apache.org/xml/features/warn-on-duplicate-entitydef", true );
// Do not allow Java encoding names in XMLDecl and TextDecl line.
//
// A true value for this feature allows the encoding of the file to
// be specified as a Java encoding name as well as the standard ISO
// encoding name. Be aware that other parsers may not be able to use
// Java encoding names. If this feature is set to false, an error
// will be generated if Java encoding names are used.
set( "http://apache.org/xml/features/allow-java-encodings", false );
// Attempt to continue parsing after a fatal error.
//
// The behavior of the parser when this feature is set to true is
// undetermined! Therefore use this feature with extreme caution
// because the parser may get stuck in an infinite loop or worse.
set( "http://apache.org/xml/features/continue-after-fatal-error", true );
// Load the DTD and use it to add default attributes and set
// attribute types when parsing.
//
// This feature is always on when validation is on.
set( "http://apache.org/xml/features/nonvalidating/load-dtd-grammar", true );
// Load the external DTD.
//
// This feature is always on when validation is on.
set( "http://apache.org/xml/features/nonvalidating/load-external-dtd", true );
// Notifies the handler of character reference boundaries in the
// document via the start/endEntity callbacks.
set( "http://apache.org/xml/features/scanner/notify-char-refs", false );
// Notifies the handler of built-in entity boundaries (e.g &) in
// the document via the start/endEntity callbacks.
set( "http://apache.org/xml/features/scanner/notify-builtin-refs", false );
// A fatal error is thrown if the incoming document contains a
// DOCTYPE declaration.
set( "http://apache.org/xml/features/disallow-doctype-decl", true );
// Requires that a URI has to be provided where a URI is expected.
//
// It's recommended to set this feature to true if you want your
// application/documents to be truly portable across different XML
// processors.
set( "http://apache.org/xml/features/standard-uri-conformant", true );
// Report the original prefixed names and attributes used for
// namespace declarations.
set( "http://xml.org/sax/features/namespace-prefixes", true );
// All element names, prefixes, attribute names, namespace URIs, and
// local names are internalized using the
// java.lang.String#intern(String):String method.
set( "http://xml.org/sax/features/string-interning", true );
// Report the beginning and end of parameter entities to a
// registered LexicalHandler.
set( "http://xml.org/sax/features/lexical-handler/parameter-entities", true );
// set( "http://apache.org/xml/features/xinclude", true );
// set( "http://apache.org/xml/features/xinclude/fixup-base-uris", true );
// set( "http://apache.org/xml/features/xinclude/fixup-language", true );
//
// set( "http://xml.org/sax/features/is-standalone", true );
// set( "http://xml.org/sax/features/unicode-normalization-checking", true );
// set( "http://xml.org/sax/features/use-attributes2", true );
// set( "http://xml.org/sax/features/use-locator2", true );
// The system identifiers passed to the notationDecl,
// unparsedEntityDecl, and externalEntityDecl events will be
// absolutized relative to their base URIs before reporting.
//
// This feature does not apply to EntityResolver.resolveEntity(),
// which is not used to report declarations, or to
// LexicalHandler.startDTD(), which already provides the
// non-absolutized URI.
set( "http://xml.org/sax/features/resolve-dtd-uris", true );
// true: When the namespace-prefixes feature is set to true, namespace
// declaration attributes will be reported as being in the
// http://www.w3.org/2000/xmlns/ namespace.
// false: Namespace declaration attributes are reported as having no
// namespace.
set( "http://xml.org/sax/features/xmlns-uris", true );
String schemaLocation = null;
String pegasus_home = System.getenv("PEGASUS_HOME");
if ( pegasus_home != null ) {
File sl = new File( new File( pegasus_home, "etc" ), m_schemafile );
if ( sl.canRead() ) {
schemaLocation = sl.toString();
} else {
System.err.println( "Warning: Unable to read " + sl );
}
}
// The XML Schema Recommendation explicitly states that the
// inclusion of schemaLocation/noNamespaceSchemaLocation attributes
// is only a hint; it does not mandate that these attributes must be
// used to locate schemas. Similar situation happens to <import>
// element in schema documents. This property allows the user to
// specify a list of schemas to use. If the targetNamespace of a
// schema (specified using this property) matches the
// targetNamespace of a schema occurring in the instance document in
// schemaLocation attribute, or if the targetNamespace matches the
// namespace attribute of <import> element, the schema specified by
// the user using this property will be used (i.e., the
// schemaLocation attribute in the instance document or on the
// <import> element will be effectively ignored).
//
// The syntax is the same as for schemaLocation attributes in
// instance documents: e.g, "http://www.example.com
// file_name.xsd". The user can specify more than one XML Schema in
// the list.
if ( schemaLocation != null ) {
prop( "http://apache.org/xml/properties/schema/external-schemaLocation",
SCHEMA_NAMESPACE + " " + schemaLocation );
if ( m_verbose ) System.err.println( "# will use " + schemaLocation );
} else {
if ( m_verbose ) System.err.println( "# will use document schema hint" );
}
// The size of the input buffer in the readers. This determines how
// many bytes to read for each chunk. Some tests indicate that a
// bigger buffer size can improve the parsing performance for
// relatively large files. The default buffer size in Xerces is
// 2K. This would give a good performance for small documents (less
// than 10K). For documents larger than 10K, specifying the buffer
// size to 4K or 8K will significantly improve the performance. But
// it's not recommended to set it to a value larger than 16K. For
// really tiny documents (1K, for example), you can also set it to a
// value less than 2K, to get the best performance.
prop( "http://apache.org/xml/properties/input-buffer-size", 16384 );
}
// --- ErrorHandler ---
public void warning(SAXParseException ex)
throws SAXException
{
m_warnings++;
System.err.println("WARNING in " + full_where() + ": " + ex.getMessage());
}
public void error(SAXParseException ex)
throws SAXException
{
m_errors++;
System.err.println("ERROR in " + full_where() + ": " + ex.getMessage());
}
public void fatalError(SAXParseException ex)
throws SAXException
{
m_fatals++;
System.err.println("FATAL in " + full_where() + ": " + ex.getMessage());
}
// --- ContentHandler ---
public void setDocumentLocator( Locator locator )
{
this.m_location = locator;
}
private String full_where()
{
return ( "line " + m_location.getLineNumber() +
", col " + m_location.getColumnNumber() );
}
private String where()
{
return ( m_location.getLineNumber() +
":" +
m_location.getColumnNumber() );
}
public void startDocument()
throws SAXException
{
if ( m_verbose ) {
System.out.println( where() + " *** start of document ***" );
}
}
public void endDocument()
{
if ( m_verbose ) {
System.out.println( where() + " *** end of document ***" );
}
}
public void startElement( String nsURI, String localName,
String qName, Attributes attrs )
throws SAXException
{
if ( m_verbose ) {
System.out.print( where() + " <" + qName );
for ( int i=0; i < attrs.getLength(); i++ ) {
System.out.print( " " + attrs.getQName(i) );
System.out.print( "=\"" + attrs.getValue(i) + "\"" );
}
System.out.println(">");
}
}
public void endElement( String nsURI, String localName, String qName )
throws SAXException
{
if ( m_verbose ) {
System.out.println( where() + " </" + qName + ">" );
}
}
public void characters( char[] ch, int start, int length )
throws SAXException
{
if ( m_verbose ) {
String s = new String( ch, start, length ).trim();
if ( s.length() > 0 ) System.out.println( where() + " \"" + s + "\"" );
}
}
public void ignorableWhitespace( char[] ch, int start, int length )
throws SAXException
{
// if ( m_verbose ) {
// String s = new String( ch, start, length ).trim();
// if ( s.length() > 0 ) System.out.println( where() + " \"" + s + "\"" );
// }
}
public void parse( String filename )
throws Exception
{
m_reader.parse(filename);
}
/**
* Show how many warnings, errors and fatals were shown.
*
* @return true, if we should transmit an error exit code.
*/
public boolean statistics()
{
System.out.println();
System.out.print( m_warnings + " warnings, " );
System.out.print( m_errors + " errors, and " );
System.out.println( m_fatals + " fatal errors detected." );
return ( m_errors > 0 || m_fatals > 0 );
}
// --- main ---
public static void main( String args[] )
throws Exception
{
boolean fail = true;
if ( args.length > 0 ) {
try {
DAXValidator validator = new DAXValidator( args.length > 1 );
validator.parse( args[0] );
fail = validator.statistics();
} catch ( IOException ioe ) {
System.err.println( ioe );
} catch ( SAXException spe ) {
System.err.println( spe );
}
}
if ( fail ) System.exit(1);
}
}