/*******************************************************************************
* Copyright (c) 2008 Scott Stanchfield.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Based on the ANTLR parser generator by Terence Parr, http://antlr.org
* Ric Klaren <klaren@cs.utwente.nl>
* Scott Stanchfield - Modifications for XML Parsing
*******************************************************************************/
package com.javadude.antxr.scanner;
import java.io.Reader;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import com.javadude.antxr.Parser;
import com.javadude.antxr.Token;
import com.javadude.antxr.TokenStream;
import com.javadude.antxr.TokenStreamException;
/**
* A Simple version of an XML token stream that uses the Xerces SAX parser.
* This parser can validate your XML against a schema.
*
* You can use this version if:
* <ul>
* <li>You want to use Xerces to parse your XML</li>
* <li>You have Xerces in your classpath</li>
* <li>You only want to configure the following Xerces options:
* <ul>
* <li>namespace awareness (are there namespaces in the XML to parse?)</li>
* <li>validation (are there namespaces in the XML to parse?)</li>
* <li>you want to specify schema or non-schema validation</li>
* </ul>
* </li>
* </ul>
* @author scott
*
*/
public class BasicXercesXMLTokenStream implements TokenStream {
private XMLTokenStream xmlTokenStream;
private static final Class<?>[] NO_PARAMETERS = new Class[] {};
private static final Object[] NO_ARGUMENTS = new Object[] {};
/**
* Create the xml token stream. This version does not gate the number of
* tokens read by the SAX parser. <i>Note that this can cause the entire
* XML to be read into memory!</i> If you have a small XML document to
* parse, this is more efficient, but large XML documents can cause memory
* problems. If you want to use a large XML file, call the other constructor
* and pass it a maximumQueueSize and resumeQueueSize.
* @param xmlToParse The XML input to parse
* @param parserClass Your parser class. The parser must have been generated
* with the xmlMode=true option specified
* @param namespaceAware true if the XML (and your grammar) uses namespaces
* @param validating true if you want SAX to validate your XML
* @param validateWithSchema true if you want to validate using an XML schema
* @throws IllegalArgumentException if you pass in an invalid parser
*/
public BasicXercesXMLTokenStream(Reader xmlToParse,
Class<? extends Parser> parserClass,
boolean namespaceAware,
boolean validating,
boolean validateWithSchema) {
this(xmlToParse, parserClass, namespaceAware, validating, validateWithSchema, -1, -1);
}
/**
* Create the xml token stream. This version does not gate the number of
* tokens read by the SAX parser. <i>Note that this can cause the entire
* XML to be read into memory!</i> If you have a small XML document to
* parse, this is more efficient, but large XML documents can cause memory
* problems. If you want to use a large XML file, call the other constructor
* and pass it a maximumQueueSize and resumeQueueSize.
* @param xmlToParse The XML input to parse
* @param parserClass Your parser class. The parser must have been generated
* with the xmlMode=true option specified
* @param namespaceAware true if the XML (and your grammar) uses namespaces
* @param validating true if you want SAX to validate your XML
* @param validateWithSchema true if you want to validate using an XML schema
* @param maximumQueueSize the maximum number of tokens you want to place
* in the blocking queue ready for the ANTXR parser
* to fetch. This will put the SAX parse on hold
* until resumeQueue size is reached.
* @param resumeQueueSize The number of buffered tokens at which you will
* resume the SAX parse
* @throws IllegalArgumentException if you pass in an invalid parser
*/
public BasicXercesXMLTokenStream(Reader xmlToParse,
Class<? extends Parser> parserClass,
boolean namespaceAware,
boolean validating,
boolean validateWithSchema,
int maximumQueueSize,
int resumeQueueSize) {
try {
// Create the SAX parser (really part of the scanner)
System.setProperty("javax.xml.parsers.DocumentBuilderFactory",
"org.apache.xerces.jaxp.DocumentBuilderFactoryImpl");
System.setProperty("javax.xml.parsers.SAXParserFactory",
"org.apache.xerces.jaxp.SAXParserFactoryImpl");
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(namespaceAware);
factory.setValidating(validating);
factory.setFeature("http://apache.org/xml/features/validation/schema", validateWithSchema);
SAXParser parser= factory.newSAXParser();
Field field = parserClass.getField("_tokenNames");
String[] tokenNames = (String[])field.get(null);
Method getNameSpaceMapMethod = parserClass.getMethod("getNamespaceMap", BasicXercesXMLTokenStream.NO_PARAMETERS);
@SuppressWarnings("unchecked")
Map<String, String> namespaceMap = (Map<String, String>) getNameSpaceMapMethod.invoke(null, BasicXercesXMLTokenStream.NO_ARGUMENTS);
// Create our scanner (using the SAX parser)
xmlTokenStream =
new XMLTokenStream(tokenNames, namespaceMap,
new InputSource(xmlToParse), parser, null, null);
}
catch (NoSuchFieldException e) {
throw new IllegalArgumentException("Cannot find _tokenNames in the parser class -- is it an XML parser?");
}
catch (NoSuchMethodException e) {
throw new IllegalArgumentException("Cannot find getNamespaceMap() in the parser class -- is it an XML parser?");
}
catch (ParserConfigurationException e) {
throw new RuntimeException("Cannot configure the SAX parser. See nested exception.", e);
}
catch (SAXException e) {
throw new RuntimeException("Error building SAX parser. See nested exception.", e);
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Cannot access _tokenNames or getNamespaceMap() in the parser class (they should be static)");
}
catch (IllegalAccessException e) {
throw new IllegalArgumentException("Cannot access _tokenNames or getNamespaceMap() in the parser class (they should be public)");
}
catch (InvocationTargetException e) {
throw new RuntimeException("Exception thrown when running getNamespaceMap(). See nested exception.", e);
}
}
/**
* State whether the given token is an XML start tag
* @param token the token to check
* @return true if it's a start tag, false otherwise
*/
public boolean isStartTag(Token token) {
return xmlTokenStream.isStartTag(token);
}
/** {@inheritDoc} */
public Token nextToken() throws TokenStreamException {
return xmlTokenStream.nextToken();
}
}