/******************************************************************************* * Copyright (c) 2008 Scott Stanchfield. * All rights reserved. This program and the accompanying materials * are made available under the terms of the Eclipse Public License v1.0 * which accompanies this distribution, and is available at * http://www.eclipse.org/legal/epl-v10.html * * Contributors: * Based on the ANTLR parser generator by Terence Parr, http://antlr.org * Ric Klaren <klaren@cs.utwente.nl> * Scott Stanchfield - Modifications for XML Parsing *******************************************************************************/ package com.javadude.antxr.scanner; import java.io.IOException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.regex.Matcher; import java.util.regex.Pattern; import org.xmlpull.v1.XmlPullParser; import org.xmlpull.v1.XmlPullParserException; import com.javadude.antxr.CommonToken; import com.javadude.antxr.Token; import com.javadude.antxr.TokenStream; import com.javadude.antxr.TokenStreamException; /** * An XML token stream. You can pass any XmlPull parser, with whatever * configuration you want for use as the scanner. */ public class XMLPullTokenStream implements TokenStream { private boolean[] startTag; private Map<String, Map<String, Integer>> namespaces = new HashMap<String, Map<String,Integer>>(); private Map<String, Integer> tokens = new HashMap<String, Integer>(); private int pcdataNum; private int endTagValue; private int otherTagValue = -1; private XmlPullParser parser; /** * Create the xml token stream. * @param tokenNames An array of token names for your parser. You can get * this by passing YourParser._tokenNames, where YourParser * is an XML parser generated by ANTXR * @param namespaceMap A map of namespace/prefix mappings. You can get this * by passing YourParser.getNamespaceMap(), where * YourParser is an XML parser generated by ANTXR * @param parser The XmlPull Parser that you want to use to scan (and possibly * validate) your XML. The input should already be set, but not read */ public XMLPullTokenStream(String[] tokenNames, Map<String, String> namespaceMap, XmlPullParser parser) { readTokens(tokenNames, namespaceMap); // TODO avoid NPE on following Integer tokenNum = tokens.get("PCDATA"); if (tokenNum == null) { pcdataNum = -99; } else { pcdataNum = tokenNum.intValue(); } this.parser = parser; } /** * Set up the tokens to use when scanning * @param tokenNames The names of the tokens in the grammar * @param namespaceMap A mapping that includes prefixes */ private void readTokens(String[] tokenNames, Map<String, String> namespaceMap) { startTag = new boolean[tokenNames.length]; Pattern pattern = Pattern.compile("\"<((.*):)?(.*)>\""); for (int i = 0; i < tokenNames.length; i++) { String tokenName = tokenNames[i]; Matcher matcher = pattern.matcher(tokenName); Integer integerValue = new Integer(i); if (matcher.matches()) { String namespace = matcher.group(2); String tag = matcher.group(3); if (namespace == null) { namespace = namespaceMap.get("$DEFAULT"); } addTag(namespace, tag, integerValue); } else { tokens.put(tokenName,integerValue); if ("XML_END_TAG".equals(tokenName)) { endTagValue = integerValue.intValue(); } if ("OTHER_TAG".equals(tokenName)) { otherTagValue = integerValue.intValue(); } } } } // TODO if only one namespace, optimize further (no hashmap lookup) /** * Get the numerical token number for an XML tag * @param namespace The tag's namespace * @param tag The tag name * @return The tag's token id */ private Integer getTokenValue(String namespace, String tag) { return getTags(namespace).get(tag); } /** * Add an XML tag to our mapping * @param namespace The namespace/prefix map from the grammar * @param tag The xml tag to store * @param integerValue The integer value of the tag */ private void addTag(String namespace, String tag, Integer integerValue) { if (namespace == null) { namespace = ""; } getTags(namespace).put(tag, integerValue); startTag[integerValue.intValue()] = true; } /** * State whether the given token is an XML start tag * @param token the token to check * @return true if it's a start tag, false otherwise */ public boolean isStartTag(Token token) { return startTag[token.getType()]; } /** * Get all the tags defined in the given namespace * @param namespace The namespace to check * @return A map of tags to token ids */ private Map<String, Integer> getTags(String namespace) { if (namespace == null) { namespace = ""; } Map<String, Integer> tags = namespaces.get(namespace); if (tags == null) { tags = new HashMap<String, Integer>(); namespaces.put(namespace, tags); } return tags; } /** {@inheritDoc} */ public Token nextToken() throws TokenStreamException { try { try { while (true) { switch(parser.getEventType()) { case XmlPullParser.START_DOCUMENT : // nothing interesting to do here... // just skip to the next event so we can return it parser.next(); // just get the next event continue; case XmlPullParser.END_DOCUMENT: // queue an EOF_TOKEN return createToken(Token.EOF_TYPE,""); case XmlPullParser.START_TAG: // create a start token return createXMLToken(); case XmlPullParser.END_TAG: return createToken(endTagValue,""); case XmlPullParser.TEXT: // if PCDATA isn't used in the parser, don't collect characters if (pcdataNum == -99 || parser.isWhitespace()) { parser.next(); // skip to next event continue; } return createToken(pcdataNum, parser.getText()); } } } // when we exit nextToken(), we want to move the XML parser forward finally { parser.next(); } } catch (XmlPullParserException e) { throw new TokenStreamException(e); } catch (IOException e) { throw new TokenStreamException(e); } } /** * Create and return a token * @param tokenNum the token id * @param tokenText the token text * @return the created token */ private Token createToken(int tokenNum, String tokenText) { Token token = new CommonToken(tokenNum, tokenText); token.setLine(parser.getLineNumber()); token.setColumn(parser.getColumnNumber()); return token; } protected Token createXMLToken() { String localName = parser.getName(); String uri = parser.getNamespace(); Integer id = getTokenValue(uri, localName); String name = ""; if (uri != null && !"".equals(uri.trim())) { name += uri + ":"; } name += localName; int tokenValue; if (id != null) { tokenValue = id.intValue(); } else if (otherTagValue != -1) { tokenValue = otherTagValue; } else { throw new RuntimeException("Tag '" + name + "' not defined in parser grammar"); } int attributeCount = parser.getAttributeCount(); List<Attribute> attributeList; if (attributeCount == 0) { attributeList = Collections.emptyList(); } else { attributeList = new ArrayList<Attribute>(attributeCount); } for (int i = 0; i < attributeCount; i++) { attributeList.add( new Attribute(parser.getAttributeNamespace(i), parser.getAttributeName(i), parser.getAttributeValue(i), parser.getAttributeType(i))); } XMLToken token = new XMLToken(tokenValue, name, attributeList); token.setLine(parser.getLineNumber()); token.setColumn(parser.getColumnNumber()); return token; } }