/**
* Copyright 2007-2008 University Of Southern California
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package edu.isi.pegasus.planner.parser;
import edu.isi.pegasus.common.logging.LogManagerFactory;
import edu.isi.pegasus.planner.classes.PegasusBag;
import edu.isi.pegasus.common.logging.LogManager;
import edu.isi.pegasus.planner.common.PegasusProperties;
import org.xml.sax.Attributes;
import org.xml.sax.Locator;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;
import java.io.File;
import java.io.FileNotFoundException;
/**
* This is the base class which all the xml parsing classes extend.
* It initializes the xml parser namely Xerces, sets it's various features
* like turning on validation against schema etc, plus the namespace resolution.
*
* @author Karan Vahi
* @author Gaurang Mehta
*
* @version $Revision$
*
*/
public abstract class Parser extends DefaultHandler{
/**
* Default parser name. Using Xerces at present.
*/
protected final String DEFAULT_PARSER_NAME = "org.apache.xerces.parsers.SAXParser";
/**
* Locator object to determine on which line in the xml has the error
* occured.
*/
protected Locator mLocator;
/**
* Holds the text in an element (text between start and final tags if any).
* Used in case of elements of mixed type.
*/
protected StringBuffer mTextContent ;
/**
* The LogManager object which logs the Pegasus messages.
*/
protected LogManager mLogger;
/**
* The String which contains the messages to be logged.
*/
protected String mLogMsg;
/**
* The object which is used to parse the dax. This reads the XML document
* and sends it to the event handlers.
*/
protected XMLReader mParser = null;
/**
* The object holding all the properties pertaining to Pegasus.
*/
protected PegasusProperties mProps;
/**
* A String that holds the contents of data passed as text. The string
* should only be trimmed when the appropriate end tag of the element is
* invoked. At this point, a whitespace is added if there are whitespaces in
* at the ends.
*/
protected String mTextString;
/**
* Boolean flag to ensure that two adjacent filenames are separated by a
* whitespace.
*/
protected boolean mAdjFName;
/**
* Intialises the parser. Sets the various features. However the parsing is
* done in the implementing class, by call mParser.parse(filename).
*
* @param bag the bag of objects that is useful for initialization.
*/
public Parser( PegasusBag bag ) {
mTextContent = new StringBuffer();
mLogMsg = "";
mLogger = bag.getLogger();
mProps = bag.getPegasusProperties();
mTextString = "";
mAdjFName = false;
mTextContent.setLength(0);
createParserInstance();
}
/**
* Intialises the parser. Sets the various features. However the parsing is
* done in the implementing class, by call mParser.parse(filename).
*
* @param properties the properties passed at runtime.
*/
public Parser( PegasusProperties properties ) {
mTextContent = new StringBuffer();
mLogMsg = "";
mLogger = LogManagerFactory.loadSingletonInstance( properties );
mProps = properties;
mTextString = "";
mAdjFName = false;
mTextContent.setLength(0);
createParserInstance();
}
/**
* An empty implementation is provided by DefaultHandler of ContentHandler.
* This method receives the notification from the sacks parser when start
* tag of an element comes. Any parser class must implement this method.
*/
public abstract void startElement(String uri, String local, String raw,
Attributes attrs) throws SAXException ;
/**
* An empty implementation is provided by DefaultHandler class. This method
* is called automatically by the Sax parser when the end tag of an element
* comes in the xml file. Any parser class should implement this method
*/
public abstract void endElement(String uri,String localName,String qName) throws SAXException;
/**
* This is called automatically when the end of the XML file is reached.
*/
public abstract void endDocument();
/**
* Start the parser. This starts the parsing of the file by the parser.
*
* @param file the path to the XML file you want to parse.
*/
public abstract void startParser(String file);
/**
* Helps the load database to locate the XML schema, if available.
* Please note that the schema location URL in the instance document
* is only a hint, and may be overriden by the findings of this method.
*
* @return a location pointing to a definition document of the XML
* schema that can read VDLx. Result may be null, if such a document
* is unknown or unspecified.
*/
public abstract String getSchemaLocation();
/**
* Returns the XML schema namespace that a document being parsed conforms
* to.
*
* @return the schema namespace
*/
public abstract String getSchemaNamespace( );
/**
* Sets the list of external real locations where the XML schema may be found.
* Since this list can be determined at run-time through properties etc., we
* expect this function to be called between instantiating the parser, and
* using the parser
*
* @param list is a list of strings representing schema locations. The content
* exists in pairs, one of the namespace URI, one of the location URL.
*
*
*/
public void setSchemaLocations( String list ){
/*
// default place to add
list += "http://www.griphyn.org/working_groups/VDS/vdl-1.19.xsd " +
"http://www.griphyn.org/working_groups/VDS/vdl-1.19.xsd";
*/
// schema location handling
try {
mParser.setProperty(
"http://apache.org/xml/properties/schema/external-schemaLocation",
list);
}
catch (SAXException se) {
mLogger.log("The SAXParser reported an error: " + se.getMessage(),
LogManager.ERROR_MESSAGE_LEVEL);
}
}
/**
* This is used to store the character data that is in xml. An implementation
* of the interface for the Sacks parser.
*/
public void characters(char[] chars,int start,int length){
//appending the buffer with chars. We use this way bec sacks parser can
//parse internally the data any way they like
//Very IMPORTANT
String temp = new String(chars,start,length);
/*if(temp.trim().length() > 0){
mTextContent.append(temp);
}*/
temp = this.ignoreWhitespace(temp);
mTextContent.append(temp);
//set the adjacent flag to false
mAdjFName = false;
}
/**
* Our own implementation for ignorable whitespace. A String that holds the
* contents of data passed as text by the underlying parser. The whitespaces
* at the end are replaced by one whitespace.
*
* @param str The string that contains whitespaces.
*
* @return String corresponding to the trimmed version.
*
*/
public String ignoreWhitespace(String str){
return ignoreWhitespace( str, false );
}
/**
* Our own implementation for ignorable whitespace. A String that holds the
* contents of data passed as text by the underlying parser. The whitespaces
* at the end are replaced by one whitespace.
*
* @param str The string that contains whitespaces.
*
* @return String corresponding to the trimmed version.
*
*/
/* public String ignoreWhitespace(String str){
boolean st = false;
boolean end = false;
int length = str.length();
if(length > 0){
//check for whitespace in the
//starting
if(str.charAt(0) == ' ' || str.charAt(0) == '\t' || str.charAt(0) == '\n'){
st = true;
}
//check for whitespace in the end
if(str.length() > 1 &&
(str.charAt(length -1) == ' ' ||
str.charAt(length -1) == '\t' ||
str.charAt(length -1) == '\n')){
end = true;
}
//trim the string and add a single whitespace accordingly
str = str.trim();
str = st == true ? ' ' + str:str;
str = end == true ? str + ' ':str;
}
return str;
}
*/
/**
* Our own implementation for ignorable whitespace. A String that holds the
* contents of data passed as text by the underlying parser. The whitespaces
* at the end are replaced by one whitespace.
*
* @param str The string that contains whitespaces.
*
* @return String corresponding to the trimmed version.
*
*/
public String ignoreWhitespace(String str, boolean preserveLineBreak ){
boolean st = false;
boolean end = false;
int length = str.length();
boolean sN = false;//start with \n ;
boolean eN = false;//end with \n
if(length > 0){
sN = str.charAt(0) == '\n';
eN = str.charAt(length -1) == '\n';
//check for whitespace in the
//starting
if(str.charAt(0) == ' ' || str.charAt(0) == '\t' || str.charAt(0) == '\n'){
st = true;
}
//check for whitespace in the end
if(str.length() > 1 &&
(str.charAt(length -1) == ' ' ||
str.charAt(length -1) == '\t' ||
str.charAt(length -1) == '\n')){
end = true;
}
//trim the string and add a single whitespace accordingly
str = str.trim();
str = st == true ? ' ' + str:str;
str = end == true ? str + ' ':str;
if( preserveLineBreak ){
str = sN ? '\n' + str:str;
str = eN ? str + '\n':str;
}
}
return str;
}
/**
* Overrides the empty implementation provided by Default Handler and sets
* the locator variable for the locator.
*
* @param loc the Locator object which keeps the track as to the line
* numbers of the line being parsed.
*/
public void setDocumentLocator(Locator loc) {
this.mLocator = loc;
}
/**
* Tests whether the file exists or not.
*/
public void testForFile(String file) throws FileNotFoundException{
File f = new File(file);
if (!f.exists()){
mLogMsg = "The file (" + file +" ) specified does not exist";
throw new FileNotFoundException( mLogMsg );
}
}
/**
* Creates an instance of the parser, and sets the various options to it.
*/
private void createParserInstance(){
//creating a parser
try{
mParser = XMLReaderFactory.createXMLReader(DEFAULT_PARSER_NAME);
}
catch(Exception e){
throw new RuntimeException( "Unable to create XMLReader" + e.getMessage() , e );
}
//setting the handlers The class extend DefaultHandler which provides
//for a empty implemetnation of the four handlers
mParser.setContentHandler(this);
mParser.setErrorHandler(new XMLErrorHandler());
try{
//setting the feature that xml should be validated against the
//xml schema specified in it
setParserFeature("http://xml.org/sax/features/validation", true);
setParserFeature("http://apache.org/xml/features/validation/schema", true);
//should be set only for debugging purposes
//setParserFeature("http://apache.org/xml/features/validation/schema-full-checking", true);
setParserFeature("http://apache.org/xml/features/validation/dynamic", true);
setParserFeature("http://apache.org/xml/features/validation/warn-on-duplicate-attdef", true);
//fails with the new xerces
//setParserFeature("http://apache.org/xml/features/validation/warn-on-undeclared-elemdef", true);
setParserFeature("http://apache.org/xml/features/warn-on-duplicate-entitydef", true);
setParserFeature("http://apache.org/xml/features/validation/schema/element-default", true);
}
catch (Exception e){
//if a locator error then
if(mLocator != null){
String message = "Error in " + mLocator.getSystemId() + " at line " +
mLocator.getLineNumber() + " at column " +
mLocator.getColumnNumber() + e.getMessage();
mLogger.log(message,LogManager.ERROR_MESSAGE_LEVEL );
}
mLogger.log(e.getMessage(),LogManager.ERROR_MESSAGE_LEVEL);
}
}
/**
* Sets a parser feature, and fails here enabling us to set all the following
* features.
*
* @param uri is the feature's URI to modify
* @param flag is the new value to set.
* @return true if the feature could be set, else false for
* an exception.
*/
public boolean setParserFeature( String uri, boolean flag){
boolean result = false;
try{
this.mParser.setFeature(uri,flag);
result = true;
}
catch (SAXException se){
mLogger.log("Unable to set parser feature " + uri + " :" +
se.getMessage(),LogManager.ERROR_MESSAGE_LEVEL);
}
return result;
}
}