/*
* GeoTools - The Open Source Java GIS Toolkit
* http://geotools.org
*
* (C) 2002-2008, Open Source Geospatial Foundation (OSGeo)
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation;
* version 2.1 of the License.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*/
package org.geotools.xml;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.List;
import javax.xml.namespace.QName;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.stream.StreamResult;
import org.apache.xerces.parsers.SAXParser;
import org.eclipse.xsd.XSDSchema;
import org.geotools.xml.impl.ParserHandler;
import org.geotools.xs.XS;
import org.w3c.dom.Document;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.NamespaceSupport;
/**
* GeoTools XML parser.
* <p>
* This parser uses a sax based driver to parse an input stream into a single object. For streaming
* look at {@link StreamingParser}. If the source document being parsed as already been parsed into
* a {@link Document} the {@link DOMParser} class may be used.
* </p>
* <p>
* <h3>Schema Resolution</h3>
* See {@link org.geotools.xml.Configuration} javadocs for instructions on how
* to customize schema resolution. This is often desirable in the case that
* the instance document being parsed contains invalid uri's in schema imports
* and includes.
* </p>
* @author Justin Deoliveira, The Open Planning Project
*
*
* @source $URL$
*/
public class Parser {
/** sax handler which maintains the element stack */
private ParserHandler handler;
/** the sax parser driving the handler */
private SAXParser parser;
/** the instance document being parsed */
private InputStream input;
/**
* Creats a new instance of the parser.
*
* @param configuration The parser configuration, bindings and context,
* must never be <code>null</code>.
*
*/
public Parser(Configuration configuration) {
if (configuration == null) {
throw new NullPointerException("configuration");
}
handler = new ParserHandler(configuration);
configuration.setupParser(this);
}
/**
* Creates a new instance of the parser.
*
* @param configuration Object representing the configuration of the parser.
* @param input A uri representing the instance document to be parsed.
*
* @throws ParserConfigurationException
* @throws SAXException If a sax parser can not be created.
* @throws URISyntaxException If <code>input</code> is not a valid uri.
*
* @deprecated use {@link #Parser(Configuration)} and {@link #parse(InputStream)}.
*/
public Parser(Configuration configuration, String input)
throws IOException, URISyntaxException {
this(configuration, new BufferedInputStream(new FileInputStream(new File(new URI(input)))));
}
/**
* Creates a new instance of the parser.
*
* @param configuration Object representing the configuration of the parser.
* @param input The stream representing the instance document to be parsed.
*
* @deprecated use {@link #Parser(Configuration)} and {@link #parse(InputStream)}.
*/
public Parser(Configuration configuration, InputStream input) {
this(configuration);
this.input = input;
}
/**
* @return The underlying parser handler.
*/
ParserHandler getParserHandler() {
return handler;
}
/**
* Signals the parser to parse the entire instance document. The object
* returned from the parse is the object which has been bound to the root
* element of the document. This method should only be called once for
* a single instance document.
*
* @return The object representation of the root element of the document.
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*
* @deprecated use {@link #parse(InputStream)}
*/
public Object parse() throws IOException, SAXException, ParserConfigurationException {
return parse(input);
}
/**
* Parses an instance documented defined by an input stream.
* <p>
* The object returned from the parse is the object which has been bound to the root
* element of the document. This method should only be called once for a single instance document.
* </p>
*
* @return The object representation of the root element of the document.
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public Object parse(InputStream input)
throws IOException, SAXException, ParserConfigurationException {
return parse(new InputSource(input));
}
/**
* Parses an instance documented defined by a reader.
* <p>
* The object returned from the parse is the object which has been bound to the root
* element of the document. This method should only be called once for a single instance document.
* </p>
*
* @return The object representation of the root element of the document.
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public Object parse(Reader reader)
throws IOException, SAXException, ParserConfigurationException {
return parse(new InputSource(reader));
}
/**
* Parses an instance document defined by a transformer source.
* <p>
* Note: Currently this method reads the entire source into memory in order to validate
* it. If large documents must be parsed one of {@link #
* </p>
* @param source THe source of the instance document.
*
* @return @return The object representation of the root element of the document.
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
* @throws TransformerException
*
* @since 2.6
*/
public Object parse(Source source) throws IOException, SAXException, ParserConfigurationException, TransformerException {
//TODO: use SAXResult to stream, need to figure out how to enable
// validation with transformer api
//SAXResult result = new SAXResult( handler );
StreamResult result = new StreamResult( new ByteArrayOutputStream() );
TransformerFactory tf = TransformerFactory.newInstance();
Transformer tx = tf.newTransformer();
tx.transform( source, result );
return parse( new ByteArrayInputStream( ((ByteArrayOutputStream)result.getOutputStream()).toByteArray() ) );
}
/**
* Parses an instance documented defined by a sax input source.
* <p>
* The object returned from the parse is the object which has been bound to the root
* element of the document. This method should only be called once for a single instance document.
* </p>
*
* @return The object representation of the root element of the document.
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public Object parse(InputSource source)
throws IOException, SAXException, ParserConfigurationException {
parser = parser();
parser.setContentHandler(handler);
parser.setErrorHandler(handler);
parser.parse(source);
return handler.getValue();
}
/**
* Sets the strict parsing flag.
* <p>
* When set to <code>true</code>, this will cause the parser to operate in
* a strict mode, which means that xml being parsed must be exactly correct
* with respect to the schema it references.
* </p>
* <p>
* Some examples of cases in which the parser will throw an exception while
* operating in strict mode:
* <ul>
* <li>no 'schemaLocation' specified, or specified incorrectly
* <li>element found which is not declared in the schema
* </ul>
* </p>
* @param strict The strict flag.
*/
public void setStrict(boolean strict) {
handler.setStrict(strict);
}
/**
* Sets the flag controlling wether the parser should validate or not.
*
* @param validating Validation flag, <code>true</code> to validate, otherwise <code>false</code>
*/
public void setValidating(boolean validating) {
handler.setValidating(validating);
}
/**
* @return Flag determining if the parser is validatin or not.
*/
public boolean isValidating() {
return handler.isValidating();
}
/**
* Sets the flag which controls how the parser handles validation errors.
* <p>
* When this flag is set, the parser will throw an exception when it encounters
* a validation error. Otherise the error will be stored, retreivable from
* {@link #getValidationErrors()}.
* </p>
* <p>
* The default behavior is to set this flag to <code>false</code>. So client
* code should explicitly set this flag if it is desired that the exception
* be thrown when the validation error occurs.
* </p>
* @param fail failure flag, <code>true</code> to fail, otherwise <code>false</code>
*/
public void setFailOnValidationError( boolean fail ) {
handler.setFailOnValidationError( fail );
}
/**
* @return The flag determining how the parser deals with validation errors.
*/
public boolean isFailOnValidationError() {
return handler.isFailOnValidationError();
}
/**
* Sets flag that controls whether the parser will process mixed content in a way
* that preserves order of child elements and text.
*
* @since 2.7
*/
public void setHandleMixedContent(boolean handleMixedContent) {
handler.setHandleMixedContent(handleMixedContent);
}
/**
* Flag that controls whether the parser will process mixed content in a way
* that preserves order of child elements and text.
* <p>
* By default the parser will simply concatenate blindly all child text and not preserve order
* with respect to other elements within a mixed content type.
* </p>
*
* @since 2.7
*/
public boolean isHandleMixedContent() {
return handler.isHandleMixedContent();
}
/**
* Returns a list of any validation errors that occured while parsing.
*
* @return A list of errors, or an empty list if none.
*/
public List getValidationErrors() {
return handler.getValidationErrors();
}
/**
* Validates an instance document defined by a input stream.
* <p>
* Clients should call {@link #getValidationErrors()} after this method to
* retrieve any validation errors that occurred. Clients do not need to call
* {@link #setValidating(boolean)} when using this method to validate.
* </p>
* <p>
* This method does not do any of the work done by {@link #parse(InputSource)}, it
* only validates.
* </p>
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public void validate( InputStream in ) throws IOException, SAXException, ParserConfigurationException {
validate( new InputSource( in ) );
}
/**
* Validates an instance document defined by a reader.
* <p>
* Clients should call {@link #getValidationErrors()} after this method to
* retrieve any validation errors that occurred. Clients do not need to call
* {@link #setValidating(boolean)} when using this method to validate.
* </p>
* <p>
* This method does not do any of the work done by {@link #parse(InputSource)}, it
* only validates.
* </p>
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public void validate( Reader reader ) throws IOException, SAXException, ParserConfigurationException {
validate( new InputSource( reader ) );
}
/**
* Validates an instance document defined by a input source.
* <p>
* Clients should call {@link #getValidationErrors()} after this method to
* retrieve any validation errors that occurred. Clients do not need to call
* {@link #setValidating(boolean)} when using this method to validate.
* </p>
* <p>
* This method does not do any of the work done by {@link #parse(InputSource)}, it
* only validates.
* </p>
*
* @throws IOException
* @throws SAXException
* @throws ParserConfigurationException
*/
public void validate( InputSource source ) throws IOException, SAXException, ParserConfigurationException {
SAXParser parser = parser( true );
parser.setContentHandler( handler.getValidator() );
parser.setErrorHandler( handler.getValidator() );
parser.parse( source );
}
/**
* Returns the schema objects referenced by the instance document being
* parsed. This method can only be called after a successful parse has
* begun.
*
* @return The schema objects used to parse the document, or null if parsing
* has not commenced.
*/
public XSDSchema[] getSchemas() {
if (handler != null) {
return handler.getSchemas();
}
return null;
}
/**
* Returns the namespace mappings maintained by the parser.
* <p>
* Clients may register additional namespace mappings. This is useful when
* an application whishes to provide some "default" namespace mappings.
* </p>
* <p>
* Clients should register namespace mappings in the current "context", ie
* do not call {@link NamespaceSupport#pushContext()}. Example:
* <code>
* Parser parser = new Parser( ... );
* parser.getNamespaces().declarePrefix( "foo", "http://www.foo.com" );
* ...
* </code>
* </p>
*
* @return The namespace support containing prefix to uri mappings.
* @since 2.4
*/
public NamespaceSupport getNamespaces() {
return handler.getNamespaceSupport();
}
protected SAXParser parser() throws ParserConfigurationException, SAXException {
return parser( isValidating() );
}
protected SAXParser parser(boolean validate) throws ParserConfigurationException, SAXException {
//JD: we use xerces directly here because jaxp does seem to allow use to
// override all the namespaces to validate against
SAXParser parser = new SAXParser();
//set the appropriate features
parser.setFeature("http://xml.org/sax/features/namespaces", true);
if (validate) {
parser.setFeature("http://xml.org/sax/features/validation", true);
parser.setFeature("http://apache.org/xml/features/validation/schema", true);
parser.setFeature("http://apache.org/xml/features/validation/schema-full-checking", true);
}
//set the schema sources of this configuration, and all dependent ones
StringBuffer schemaLocation = new StringBuffer();
for (Iterator d = handler.getConfiguration().allDependencies().iterator(); d.hasNext();) {
Configuration dependency = (Configuration) d.next();
//ignore xs namespace
if (XS.NAMESPACE.equals(dependency.getNamespaceURI())) {
continue;
}
//seperate entries by space
if (schemaLocation.length() > 0) {
schemaLocation.append(" ");
}
//add the entry
schemaLocation.append(dependency.getNamespaceURI());
schemaLocation.append(" ");
schemaLocation.append(dependency.getSchemaFileURL());
}
//set hte property to map namespaces to schema locations
parser.setProperty("http://apache.org/xml/properties/schema/external-schemaLocation",
schemaLocation.toString());
return parser;
}
/**
* Properties used to control the parser behaviour.
* <p>
* Parser properties are set in the configuration of a parser.
* <pre>
* Configuration configuration = new ....
* configuration.getProperties().add( Parser.Properties.PARSE_UNKNOWN_ELEMENTS );
* configuration.getProperties().add( Parser.Properties.PARSE_UNKNOWN_ATTRIBUTES );
* </pre>
* </p>
* @author Justin Deoliveira, The Open Planning Project
* @deprecated
*/
public static interface Properties {
/**
* If set, the parser will continue to parse when it finds an element
* and cannot determine its type.
*
* @deprecated use {@link Parser#setStrict(boolean)}
*/
QName PARSE_UNKNOWN_ELEMENTS = new QName("http://www.geotools.org", "parseUnknownElements");
/**
* If set, the parser will continue to parse when it finds an attribute
* and cannot determine its type.
*
* @deprecated use {@link Parser#setStrict(boolean)}
*/
QName PARSE_UNKNOWN_ATTRIBUTES = new QName("http://www.geotools.org",
"parseUnknownAttributes");
/**
* If set, the parser will ignore the schemaLocation attribute of an
* instance document.
*
* @deprecated use {@link Parser#setStrict(boolean)}
*/
QName IGNORE_SCHEMA_LOCATION = new QName("http://www.geotools.org", "ignoreSchemaLocation");
}
}