/*******************************************************************************
* Copyright (c) 2008 Scott Stanchfield.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* Based on the ANTLR parser generator by Terence Parr, http://antlr.org
* Ric Klaren <klaren@cs.utwente.nl>
* Scott Stanchfield - Modifications for XML Parsing
*******************************************************************************/
package com.javadude.antxr.scanner;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import com.javadude.antxr.CommonToken;
import com.javadude.antxr.Token;
import com.javadude.antxr.TokenStream;
import com.javadude.antxr.TokenStreamException;
/**
* An XML token stream. You can pass any XmlPull parser, with whatever
* configuration you want for use as the scanner.
*/
public class XMLPullTokenStream implements TokenStream {
private boolean[] startTag;
private Map<String, Map<String, Integer>> namespaces = new HashMap<String, Map<String,Integer>>();
private Map<String, Integer> tokens = new HashMap<String, Integer>();
private int pcdataNum;
private int endTagValue;
private int otherTagValue = -1;
private XmlPullParser parser;
/**
* Create the xml token stream.
* @param tokenNames An array of token names for your parser. You can get
* this by passing YourParser._tokenNames, where YourParser
* is an XML parser generated by ANTXR
* @param namespaceMap A map of namespace/prefix mappings. You can get this
* by passing YourParser.getNamespaceMap(), where
* YourParser is an XML parser generated by ANTXR
* @param parser The XmlPull Parser that you want to use to scan (and possibly
* validate) your XML. The input should already be set, but not read
*/
public XMLPullTokenStream(String[] tokenNames, Map<String, String> namespaceMap, XmlPullParser parser) {
readTokens(tokenNames, namespaceMap);
// TODO avoid NPE on following
Integer tokenNum = tokens.get("PCDATA");
if (tokenNum == null) {
pcdataNum = -99;
} else {
pcdataNum = tokenNum.intValue();
}
this.parser = parser;
}
/**
* Set up the tokens to use when scanning
* @param tokenNames The names of the tokens in the grammar
* @param namespaceMap A mapping that includes prefixes
*/
private void readTokens(String[] tokenNames, Map<String, String> namespaceMap) {
startTag = new boolean[tokenNames.length];
Pattern pattern = Pattern.compile("\"<((.*):)?(.*)>\"");
for (int i = 0; i < tokenNames.length; i++) {
String tokenName = tokenNames[i];
Matcher matcher = pattern.matcher(tokenName);
Integer integerValue = new Integer(i);
if (matcher.matches()) {
String namespace = matcher.group(2);
String tag = matcher.group(3);
if (namespace == null) {
namespace = namespaceMap.get("$DEFAULT");
}
addTag(namespace, tag, integerValue);
}
else {
tokens.put(tokenName,integerValue);
if ("XML_END_TAG".equals(tokenName)) {
endTagValue = integerValue.intValue();
}
if ("OTHER_TAG".equals(tokenName)) {
otherTagValue = integerValue.intValue();
}
}
}
}
// TODO if only one namespace, optimize further (no hashmap lookup)
/**
* Get the numerical token number for an XML tag
* @param namespace The tag's namespace
* @param tag The tag name
* @return The tag's token id
*/
private Integer getTokenValue(String namespace, String tag) {
return getTags(namespace).get(tag);
}
/**
* Add an XML tag to our mapping
* @param namespace The namespace/prefix map from the grammar
* @param tag The xml tag to store
* @param integerValue The integer value of the tag
*/
private void addTag(String namespace, String tag, Integer integerValue) {
if (namespace == null) {
namespace = "";
}
getTags(namespace).put(tag, integerValue);
startTag[integerValue.intValue()] = true;
}
/**
* State whether the given token is an XML start tag
* @param token the token to check
* @return true if it's a start tag, false otherwise
*/
public boolean isStartTag(Token token) {
return startTag[token.getType()];
}
/**
* Get all the tags defined in the given namespace
* @param namespace The namespace to check
* @return A map of tags to token ids
*/
private Map<String, Integer> getTags(String namespace) {
if (namespace == null) {
namespace = "";
}
Map<String, Integer> tags = namespaces.get(namespace);
if (tags == null) {
tags = new HashMap<String, Integer>();
namespaces.put(namespace, tags);
}
return tags;
}
/** {@inheritDoc} */
public Token nextToken() throws TokenStreamException {
try {
try {
while (true) {
switch(parser.getEventType()) {
case XmlPullParser.START_DOCUMENT :
// nothing interesting to do here...
// just skip to the next event so we can return it
parser.next(); // just get the next event
continue;
case XmlPullParser.END_DOCUMENT:
// queue an EOF_TOKEN
return createToken(Token.EOF_TYPE,"");
case XmlPullParser.START_TAG:
// create a start token
return createXMLToken();
case XmlPullParser.END_TAG:
return createToken(endTagValue,"");
case XmlPullParser.TEXT:
// if PCDATA isn't used in the parser, don't collect characters
if (pcdataNum == -99 || parser.isWhitespace()) {
parser.next(); // skip to next event
continue;
}
return createToken(pcdataNum, parser.getText());
}
}
}
// when we exit nextToken(), we want to move the XML parser forward
finally {
parser.next();
}
}
catch (XmlPullParserException e) {
throw new TokenStreamException(e);
}
catch (IOException e) {
throw new TokenStreamException(e);
}
}
/**
* Create and return a token
* @param tokenNum the token id
* @param tokenText the token text
* @return the created token
*/
private Token createToken(int tokenNum, String tokenText) {
Token token = new CommonToken(tokenNum, tokenText);
token.setLine(parser.getLineNumber());
token.setColumn(parser.getColumnNumber());
return token;
}
protected Token createXMLToken() {
String localName = parser.getName();
String uri = parser.getNamespace();
Integer id = getTokenValue(uri, localName);
String name = "";
if (uri != null && !"".equals(uri.trim())) {
name += uri + ":";
}
name += localName;
int tokenValue;
if (id != null) {
tokenValue = id.intValue();
}
else if (otherTagValue != -1) {
tokenValue = otherTagValue;
}
else {
throw new RuntimeException("Tag '" + name + "' not defined in parser grammar");
}
int attributeCount = parser.getAttributeCount();
List<Attribute> attributeList;
if (attributeCount == 0) {
attributeList = Collections.emptyList();
} else {
attributeList = new ArrayList<Attribute>(attributeCount);
}
for (int i = 0; i < attributeCount; i++) {
attributeList.add(
new Attribute(parser.getAttributeNamespace(i),
parser.getAttributeName(i),
parser.getAttributeValue(i),
parser.getAttributeType(i)));
}
XMLToken token = new XMLToken(tokenValue, name, attributeList);
token.setLine(parser.getLineNumber());
token.setColumn(parser.getColumnNumber());
return token;
}
}