XMLParser.java example

Explorer
AndroidRivers-master
- HoloEverywhere-master
- NewsEngine
  - src
    - com
      - silverkeytech
        news_engine
        DateHelper.java
        outlines
        Body.java
        Head.java
        Opml.java
        Outline.java
        riverjs
        River.java
        RiverEnclosure.java
        RiverImage.java
        RiverItem.java
        RiverOpml.java
        RiverOpmlBody.java
        RiverOpmlHead.java
        RiverOpmlOutline.java
        RiverSite.java
        RiverSiteCollection.java
        RiverSource.java
        syndications
        atom
        CategoryElement.java
        ContentElement.java
        Entry.java
        Feed.java
        LinkElement.java
        PersonElement.java
        Source.java
        TextElement.java
        rss
        Channel.java
        Cloud.java
        Enclosure.java
        Item.java
        Rss.java
      - thebuzzmedia
        sjxp
        XMLParser.java
        XMLParserException.java
        rule
        DefaultRule.java
        IRule.java
        ParsingMode.java
    - org
      - apache
        abdera
        model
        AtomDate.java
- NewsEngineTests
  - src
    - com
      - silverkeytech
        news_engine_tests
        HttpRequest.java
- gen
  - com
    - silverkeytech
      - android_rivers
        BuildConfig.java
        Manifest.java
        R.java
- src
  - com
    - github
      - kevinsawicki
        http
        HttpRequest.java
    - pl
      - polidea
        treeview
        AbstractTreeViewAdapter.java
        InMemoryTreeNode.java
        InMemoryTreeStateManager.java
        NodeAlreadyInTreeException.java
        NodeNotInTreeException.java
        TreeBuilder.java
        TreeConfigurationException.java
        TreeNodeInfo.java
        TreeStateManager.java
        TreeViewList.java
    - silverkeytech
      - android_rivers
        DateHelper.java
        TextLimiter.java
        creators
        AirportAutoComplete.java
        CityAutoComplete.java
  - go
    - goyalla
      - dict
        arabicDictionary
        file
        ArabicReshape.java
  - org
    - org
      - wordpress
        android
        util
        Base64.java
        TrustAllManager.java
        TrustAllSSLSocketFactory.java
    - xmlrpc
      - android
        ConnectionClient.java
        XMLRPCClient.java
        XMLRPCException.java
        XMLRPCFault.java
        XMLRPCSerializer.java
/**
 * Copyright 2011 The Buzz Media, LLC
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.thebuzzmedia.sjxp;

import com.thebuzzmedia.sjxp.rule.IRule;
import org.xmlpull.v1.XmlPullParser;
import org.xmlpull.v1.XmlPullParserException;
import org.xmlpull.v1.XmlPullParserFactory;

import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * Class used to define a parser that makes parsing using the performance of an
 * XML Pull Parser with the ease of XPath-like expressions possible.
 *
 * <h3>Thread Safety</h3> This class is not thread-safe, however instances of
 * {@link com.thebuzzmedia.sjxp.XMLParser} can safely be re-used to parse multiple files once the
 * previous parse operation is done.
 *
 * @param <T>
 *            The class type of any user-supplied object that the caller wishes
 *            to be passed through from one of the {@link com.thebuzzmedia.sjxp.XMLParser}'s
 *            <code>parse</code> methods directly to the handler when an
 *            {@link com.thebuzzmedia.sjxp.rule.IRule} matches. This is typically a data storage mechanism
 *            like a DAO or cache used to store the parsed value in some
 *            valuable way, but it can ultimately be anything. If you do not
 *            need to make use of the user object, there is no need to
 *            parameterize the class.
 *
 * @author Riyad Kalla (software@thebuzzmedia.com)
 */
public class XMLParser<T> {
    /**
     * Flag used to indicate if debugging output has been enabled by setting the
     * "sjxp.debug" system property to <code>true</code>. This value will be
     * <code>false</code> if the "sjxp.debug" system property is undefined or
     * set to <code>false</code>.
     * <p/>
     * This system property can be set on startup with:<br/>
     * <code>
     * -Dsjxp.debug=true
     * </code> or by calling {@link System#setProperty(String, String)} before
     * this class is loaded.
     * <p/>
     * This is <code>false</code> by default.
     */
    public static Boolean DEBUG = Boolean.getBoolean("sjxp.debug");

    /**
     * Flag used to indicate if this parser should be namespace-aware by setting
     * the "sjxp.namespaces" system property to <code>true</code>. This value
     * will be <code>true</code> if the "sjxp.namespaces" system property is
     * undefined. Namespace awareness can only be disabled by setting this
     * system property to <code>false</code>.
     * <p/>
     * <strong>NOTE</strong>: If you intentionally disable namespace awareness,
     * any {@link com.thebuzzmedia.sjxp.rule.IRule} you provide that uses namespace qualified values (e.g.
     * [http://w3.org/text]book) will fail to match as the parser can no longer
     * see namespace URIs.
     * <p/>
     * This system property can be set on startup with:<br/>
     * <code>
     * -Dsjxp.namespaces=true
     * </code> or by calling {@link System#setProperty(String, String)} before
     * this class is loaded.
     * <p/>
     * This is <code>true</code> by default.
     */
    public static final Boolean ENABLE_NAMESPACES = (System
            .getProperty("sjxp.namespaces") == null ? Boolean.TRUE : Boolean
            .getBoolean("sjxp.namespaces"));

    /**
     * Flag used to indicate if this parser should validate the parsed XML
     * against the references DTD or XML Schema by setting the "sjxp.validation"
     * system property to <code>true</code>. This value will be
     * <code>false</code> if the "sjxp.validation" system property is undefined
     * or set to <code>false</code>.
     * <p/>
     * This system property can be set on startup with:<br/>
     * <code>
     * -Dsjxp.validation=true
     * </code> or by calling {@link System#setProperty(String, String)} before
     * this class is loaded.
     * <p/>
     * This is <code>false</code> by default.
     */
    public static final Boolean ENABLE_VALIDATION = Boolean
            .getBoolean("sjxp.validation");

    /**
     * Prefix to every log message this library logs. Using a well-defined
     * prefix helps make it easier both visually and programmatically to scan
     * log files for messages produced by this library.
     * <p/>
     * The value is "[sjxp] " (including the space).
     */
    public static final String LOG_MESSAGE_PREFIX = "[sjxp] ";

    /**
     * Singleton {@link org.xmlpull.v1.XmlPullParserFactory} instance used to create new
     * underlying {@link org.xmlpull.v1.XmlPullParser} instances for each instance of
     * {@link com.thebuzzmedia.sjxp.XMLParser}.
     */
    public static final XmlPullParserFactory XPP_FACTORY;

    /**
     * Static initializer used to init the {@link org.xmlpull.v1.XmlPullParserFactory} with the
     * configured namespace and validation settings.
     */
    static {
        if (DEBUG)
            log("Debug output ENABLED");

        try {
            XPP_FACTORY = XmlPullParserFactory.newInstance();

            // Configure pull parser features
            XPP_FACTORY.setFeature(XmlPullParser.FEATURE_PROCESS_NAMESPACES,
                    ENABLE_NAMESPACES);
            XPP_FACTORY.setFeature(XmlPullParser.FEATURE_VALIDATION,
                    ENABLE_VALIDATION);

            if (DEBUG)
                log("XmlPullParserFactory configured [namespaces=%s, validation=%s]",
                        ENABLE_NAMESPACES, ENABLE_VALIDATION);
        } catch (XmlPullParserException e) {
            throw new RuntimeException(
                    "An exception occurred while calling XmlPullParserFactory.newInstance(). A library providing the impl of the XML Pull Parser spec (e.g. XPP3 or Android SDK) must be available at runtime.",
                    e);
        }
    }

    /**
     * Helper method used to ensure a message is loggable before it is logged
     * and then pre-pend a universal prefix to all log messages generated by
     * this library to make the log entries easy to parse visually or
     * programmatically.
     * <p/>
     * If a message cannot be logged (logging is disabled) then this method
     * returns immediately.
     * <p/>
     * <strong>NOTE</strong>: Because Java will auto-box primitive arguments
     * into Objects when building out the <code>params</code> array, care should
     * be taken not to call this method with primitive values unless
     * {@link #DEBUG} is <code>true</code>; otherwise the VM will be spending
     * time performing unnecessary auto-boxing calculations.
     *
     * @param message
     *            The log message in <a href=
     *            "http://download.oracle.com/javase/6/docs/api/java/util/Formatter.html#syntax"
     *            >format string syntax</a> that will be logged.
     * @param params
     *            The parameters that will be swapped into all the place holders
     *            in the original messages before being logged.
     *
     * @see #LOG_MESSAGE_PREFIX
     */
    protected static void log(String message, Object... params) {
        if (DEBUG)
            System.out.printf(LOG_MESSAGE_PREFIX + message + '\n', params);
    }

    private String toStringCache;
    private boolean continueParsing = true;

    private Location location;
    private XmlPullParser xpp;

    private Map<Integer, List<IRule<T>>> tagRuleMap;
    private Map<Integer, List<IRule<T>>> attrRuleMap;
    private Map<Integer, List<IRule<T>>> charRuleMap;

    /**
     * Create a new parser that uses the given {@link IRule}s when parsing any
     * XML content.
     *
     * @param rules
     *            The rules applied to any parsed content.
     *
     * @throws IllegalArgumentException
     *             if <code>rules</code> is <code>null</code> or empty.
     * @throws com.thebuzzmedia.sjxp.XMLParserException
     *             if the {@link #XPP_FACTORY} is unable to create a new
     *             {@link org.xmlpull.v1.XmlPullParser} instance and throws an exception.
     */
    public XMLParser(IRule<T>... rules) throws IllegalArgumentException,
            XMLParserException {
        if (rules == null || rules.length == 0)
            throw new IllegalArgumentException(
                    "rules cannot be null or empty, you must provide at least 1 rule to execute otherwise parsing will do nothing.");

        location = new Location();

        try {
            xpp = XPP_FACTORY.newPullParser();
        } catch (XmlPullParserException e) {
            throw new XMLParserException(
                    "An exception occurred while trying to create a new XmlPullParser instance using the XmlPullParserFactory.",
                    e);
        }

        // Load all the rules
        initRules(rules);
    }

    /**
     * Overridden to provide a nicely formatted representation of the parser for
     * easy debugging.
     * <p/>
     * As an added bonus, since {@link com.thebuzzmedia.sjxp.XMLParser}s are intended to be immutable,
     * the result of <code>toString</code> is cached on the first call and the
     * cache returned every time to avoid re-computing the completed
     * {@link String}.
     *
     * @return a nicely formatted representation of the parser for easy
     *         debugging.
     */
    @Override
    public synchronized String toString() {
        if (toStringCache == null) {
            toStringCache = this.getClass().getName() + "[attributeRules="
                    + attrRuleMap + ", characterRules=" + charRuleMap + "]";
        }

        return toStringCache;
    }

    /**
     * Used to indicate to the parser that you would like it to stop parsing.
     * <p/>
     * Internally the parser uses a simple <code>boolean</code> to indicate if
     * it should keep parsing. A call to this method sets the boolean value to
     * <code>false</code> which the parser checks at the next parse event and
     * then stops.
     * <p/>
     * This is a safe operation that simply flips a flag to tell the underlying
     * {@link org.xmlpull.v1.XmlPullParser} to stop working after it's done with its current
     * parse event and return from whichever <code>parse</code> method was
     * called.
     */
    public void stop() {
        continueParsing = false;
    }

    /**
     * Parse the XML out of the given stream matching the {@link IRule}s
     * provided when the {@link com.thebuzzmedia.sjxp.XMLParser} was instantiated.
     * <p/>
     * The underlying {@link org.xmlpull.v1.XmlPullParser} will attempt to determine the
     * stream's encoding based on the pull parser spec or fall back to a default
     * of UTF-8.
     * <p/>
     * This class will make no attempt at closing the given {@link java.io.InputStream},
     * the caller must take care to clean up that resource.
     * <h3>Stopping Parsing</h3>
     * Parsing can be safely stopped by calling {@link #stop()}. This allows
     * {@link IRule} implementations control over stopping parsing, for example,
     * if an arbitrary threshold is hit. A followup call to any of the
     * <code>parse</code> methods will reset the stopped state.
     *
     * @param source
     *            The stream that XML content will be read out of.
     *
     * @throws IllegalArgumentException
     *             if <code>source</code> is <code>null</code>.
     * @throws com.thebuzzmedia.sjxp.XMLParserException
     *             if any error occurs with the underlying stream during parsing
     *             of if the XML content itself is malformed and the underlying
     *             pull parser cannot parse it.
     */
    public void parse(InputStream source) throws IllegalArgumentException,
            XMLParserException {
        try {
            parse(source, null, null);
        } catch (UnsupportedEncodingException e) {
            // no-op, this should never happen as null is a valid encoding.
        }
    }

    /**
     * Parse the XML out of the given stream matching the {@link IRule}s
     * provided when the {@link com.thebuzzmedia.sjxp.XMLParser} was instantiated.
     * <p/>
     * The underlying {@link org.xmlpull.v1.XmlPullParser} will attempt to determine the
     * stream's encoding based on the pull parser spec or fall back to a default
     * of UTF-8.
     * <p/>
     * This class will make no attempt at closing the given {@link java.io.InputStream},
     * the caller must take care to clean up that resource.
     * <h3>Stopping Parsing</h3>
     * Parsing can be safely stopped by calling {@link #stop()}. This allows
     * {@link IRule} implementations control over stopping parsing, for example,
     * if an arbitrary threshold is hit. A followup call to any of the
     * <code>parse</code> methods will reset the stopped state.
     *
     * @param source
     *            The stream that XML content will be read out of.
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     *
     * @throws IllegalArgumentException
     *             if <code>source</code> is <code>null</code>.
     * @throws com.thebuzzmedia.sjxp.XMLParserException
     *             if any error occurs with the underlying stream during parsing
     *             of if the XML content itself is malformed and the underlying
     *             pull parser cannot parse it.
     */
    public void parse(InputStream source, T userObject)
            throws IllegalArgumentException, XMLParserException {
        try {
            parse(source, null, userObject);
        } catch (UnsupportedEncodingException e) {
            // no-op, this should never happen as null is a valid encoding.
        }
    }

    /**
     * Parse the XML out of the given stream (producing content matching the
     * given encoding) matching the {@link IRule}s provided when the
     * {@link com.thebuzzmedia.sjxp.XMLParser} was instantiated.
     * <p/>
     * This class will make no attempt at closing the given {@link java.io.InputStream},
     * the caller must take care to clean up that resource.
     * <h3>Stopping Parsing</h3>
     * Parsing can be safely stopped by calling {@link #stop()}. This allows
     * {@link IRule} implementations control over stopping parsing, for example,
     * if an arbitrary threshold is hit. A followup call to any of the
     * <code>parse</code> methods will reset the stopped state.
     *
     * @param source
     *            The stream that XML content will be read out of.
     * @param encoding
     *            The character encoding (e.g. "UTF-8") of the data from the
     *            given stream. If the encoding is not known, passing
     *            <code>null</code> or calling {@link #parse(java.io.InputStream)}
     *            instead will allow the underlying {@link org.xmlpull.v1.XmlPullParser} to try
     *            and automatically determine the encoding.
     *
     * @throws IllegalArgumentException
     *             if <code>source</code> is <code>null</code>.
     * @throws java.io.UnsupportedEncodingException
     *             if <code>encoding</code> represents an encoding name that is
     *             not recognized by {@link java.nio.charset.Charset#isSupported(String)}
     * @throws com.thebuzzmedia.sjxp.XMLParserException
     *             if any error occurs with the underlying stream during parsing
     *             of if the XML content itself is malformed and the underlying
     *             pull parser cannot parse it.
     */
    public void parse(InputStream source, String encoding)
            throws IllegalArgumentException, UnsupportedEncodingException,
            XMLParserException {
        parse(source, encoding, null);
    }

    /**
     * Parse the XML out of the given stream (producing content matching the
     * given encoding) matching the {@link IRule}s provided when the
     * {@link com.thebuzzmedia.sjxp.XMLParser} was instantiated.
     * <p/>
     * This class will make no attempt at closing the given {@link java.io.InputStream},
     * the caller must take care to clean up that resource.
     * <h3>Stopping Parsing</h3>
     * Parsing can be safely stopped by calling {@link #stop()}. This allows
     * {@link IRule} implementations control over stopping parsing, for example,
     * if an arbitrary threshold is hit. A followup call to any of the
     * <code>parse</code> methods will reset the stopped state.
     *
     * @param source
     *            The stream that XML content will be read out of.
     * @param encoding
     *            The character encoding (e.g. "UTF-8") of the data from the
     *            given stream. If the encoding is not known, passing
     *            <code>null</code> or calling {@link #parse(java.io.InputStream)}
     *            instead will allow the underlying {@link org.xmlpull.v1.XmlPullParser} to try
     *            and automatically determine the encoding.
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     *
     * @throws IllegalArgumentException
     *             if <code>source</code> is <code>null</code>.
     * @throws java.io.UnsupportedEncodingException
     *             if <code>encoding</code> represents an encoding name that is
     *             not recognized by {@link java.nio.charset.Charset#isSupported(String)}
     * @throws com.thebuzzmedia.sjxp.XMLParserException
     *             if any error occurs with the underlying stream during parsing
     *             of if the XML content itself is malformed and the underlying
     *             pull parser cannot parse it.
     */
    public void parse(InputStream source, String encoding, T userObject)
            throws IllegalArgumentException, UnsupportedEncodingException,
            XMLParserException {
        if (source == null)
            throw new IllegalArgumentException("source cannot be null");
        if (encoding != null) {
            // If empty, ensure it is null so XPP gets encoding from XML header
            if (encoding.trim().length() == 0)
                encoding = null;
                // Extra-safe, make sure the provided encoding is valid
            else if (!Charset.isSupported(encoding))
                throw new UnsupportedEncodingException(
                        "Encoding ["
                                + encoding
                                + "] is not a valid charset encoding in this runtime according to Charset.isSupported(encoding).");
        }

        try {
            xpp.setInput(source, encoding);

            if (DEBUG)
                log("Underlying XmlPullParser input set [type=InputStream, encoding=%s (null is OK), userObject=%s]",
                        xpp.getInputEncoding(), (userObject == null ? ""
                        : userObject));
        } catch (XmlPullParserException e) {
            throw new XMLParserException(
                    "Unable to set the given InputStream (with an optional encoding of '"
                            + encoding
                            + "') as input for the underlying XmlPullParser.",
                    e);
        }

        try {
            doParse(userObject);
        } catch (IOException e) {
            throw new XMLParserException(
                    "An exception occurred while parsing the given source, the XML document may be malformed.",
                    e);
        } catch (XmlPullParserException e) {
            throw new XMLParserException(
                    "An error with the underlying data stream being parsed occurred.",
                    e);
        }
    }

    protected void initRules(IRule<T>... rules) {
        // calculate a rough optimal size for the rule maps
        int optSize = (rules.length > 64 ? rules.length * 2 : 64);

        // init the rule maps
        tagRuleMap = new HashMap<Integer, List<IRule<T>>>(optSize);
        attrRuleMap = new HashMap<Integer, List<IRule<T>>>(optSize);
        charRuleMap = new HashMap<Integer, List<IRule<T>>>(optSize);

        // init the rules
        List<IRule<T>> ruleList = null;

        for (int i = 0, length = rules.length; i < length; i++) {
            IRule<T> rule = rules[i];

            switch (rule.getType()) {
                case TAG:
                    // Get the rule list for this path
                    ruleList = tagRuleMap.get(rule.getLocationPath());

                    // If there wasn't already a rule list, create and add it
                    if (ruleList == null) {
                        ruleList = new ArrayList<IRule<T>>(3);
                        tagRuleMap.put(rule.getLocationPath().hashCode(), ruleList);
                    }
                    break;

                case ATTRIBUTE:
                    // Get the rule list for this path
                    ruleList = attrRuleMap.get(rule.getLocationPath());

                    // If there wasn't already a rule list, create and add it
                    if (ruleList == null) {
                        ruleList = new ArrayList<IRule<T>>(3);
                        attrRuleMap
                                .put(rule.getLocationPath().hashCode(), ruleList);
                    }
                    break;

                case CHARACTER:
                    // Get the rule list for this path
                    ruleList = charRuleMap.get(rule.getLocationPath());

                    // If there wasn't already a rule list, create and add it
                    if (ruleList == null) {
                        ruleList = new ArrayList<IRule<T>>(3);
                        charRuleMap
                                .put(rule.getLocationPath().hashCode(), ruleList);
                    }
                    break;
            }

            // Add the rule to the list for the given path
            ruleList.add(rule);
        }

        if (DEBUG)
            log("Initialized %d TAG rules, %d ATTRIBUTE rules and %d CHARACTER rules.",
                    tagRuleMap.size(), attrRuleMap.size(), charRuleMap.size());
    }

    /**
     * Uses the underlying {@link org.xmlpull.v1.XmlPullParser} to begin parsing through the
     * XML content from the given stream. This method's implementation is
     * simple, acting like a traffic-cop responding to
     * {@link org.xmlpull.v1.XmlPullParser#START_TAG}, {@link org.xmlpull.v1.XmlPullParser#TEXT},
     * {@link org.xmlpull.v1.XmlPullParser#END_TAG} and {@link org.xmlpull.v1.XmlPullParser#END_DOCUMENT}
     * events by calling the appropriate <code>doXXX</code> methods.
     * <p/>
     * Developers creating a subclass of {@link com.thebuzzmedia.sjxp.XMLParser} are meant to override
     * one of the {@link #doStartTag(Object)}, {@link #doText(Object)},
     * {@link #doEndTag(Object)} and {@link #doEndDocument(Object)} methods to
     * add custom behavior and not necessarily override this central method.
     * <h3>Stopping Parsing</h3>
     * Parsing can be safely stopped by calling {@link #stop()}. This allows
     * {@link IRule} implementations control over stopping parsing, for example,
     * if an arbitrary threshold is hit. A followup call to any of the
     * <code>parse</code> methods will reset the stopped state.
     *
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     *
     * @throws java.io.IOException
     *             if an error occurs with reading from the underlying
     *             {@link java.io.InputStream} given to one of the public
     *             <code>parse</code> methods.
     * @throws org.xmlpull.v1.XmlPullParserException
     *             if an error occurs while parsing the XML content from the
     *             underlying stream; typically resulting from malformed or
     *             invalid XML.
     */
    protected void doParse(T userObject) throws IOException,
            XmlPullParserException {
        location.clear();
        continueParsing = true;

        if (DEBUG)
            log("Parsing starting...");

        long startTime = System.currentTimeMillis();

        while (continueParsing) {
            switch (xpp.next()) {
                case XmlPullParser.START_TAG:
                    doStartTag(userObject);
                    break;

                case XmlPullParser.TEXT:
                    doText(userObject);
                    break;

                case XmlPullParser.END_TAG:
                    doEndTag(userObject);
                    break;

                case XmlPullParser.END_DOCUMENT:
                    continueParsing = false;
                    doEndDocument(userObject);
                    break;
            }
        }

        if (DEBUG) {
            long duration = System.currentTimeMillis() - startTime;
            log("Parse COMPLETE, elapsed time: %dms (approx %f seconds)",
                    duration, (double) duration / (double) 1000);
        }
    }

    /**
     * Used to process a {@link org.xmlpull.v1.XmlPullParser#START_TAG} event.
     * <p/>
     * By default this updates the internal location state of the parser,
     * processes all {@link IRule}s of type {@link Type#TAG} and processes all
     * {@link IRule}s of type {@link Type#ATTRIBUTE} that match the parser's
     * current location.
     *
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     */
    protected void doStartTag(T userObject) {
        // Update parser location
        location.push(xpp.getName(), xpp.getNamespace());

        if (DEBUG)
            log("START_TAG: %s %s:%s", location, xpp.getNamespace(), xpp.getName());

        // Get the rules for the current path
        List<IRule<T>> tagRuleList = tagRuleMap.get(location
                .getCachedHashCode());
        List<IRule<T>> attrRuleList = attrRuleMap.get(location
                .getCachedHashCode());

        // If there are no rules for the current path, then we are done.
        if ((tagRuleList == null || tagRuleList.isEmpty())
                && (attrRuleList == null || attrRuleList.isEmpty()))
            return;

        if (DEBUG)
            log("\t%d TAG rules and %d ATTR rules found for START_TAG...",
                    (tagRuleList == null ? 0 : tagRuleList.size()),
                    (attrRuleList == null ? 0 : attrRuleList.size()));

        // Process the TAG rules
        if (tagRuleList != null) {
            for (int i = 0, size = tagRuleList.size(); i < size; i++) {
                IRule<T> rule = tagRuleList.get(i);

                if (DEBUG)
                    log("\t\tRunning TAG Rule: %s", rule);

                rule.handleTag(this, true, userObject);
            }
        }

        // Process the ATTR rules
        if (attrRuleList != null) {
            for (int i = 0, size = attrRuleList.size(); i < size; i++) {
                IRule<T> rule = attrRuleList.get(i);

                if (DEBUG)
                    log("\t\tRunning ATTR Rule: %s", rule);

                String[] attrNames = rule.getAttributeNames();

                // Be safe, jump to the next rule if this one has no name
                // entries
                if (attrNames == null || attrNames.length == 0)
                    continue;

				/*
				 * PERFORMANCE: Generating the substrings is the fastest way to
				 * parse out the matching rules as it shares the same underlying
				 * char[] used to represent the entire location path or
				 * attribute name and just creates a new simple String instance
				 * with modified index/offset values that is GC'ed quickly and
				 * easily (uses a special package-protected String constructor).
				 *
				 * Using regexp to match, splitting the rule or just about any
				 * other approach would have been magnitudes more expensive both
				 * in memory and CPU requirements than doing a simple substring.
				 */
                for (int j = 0; j < attrNames.length; j++) {
                    String attrName = attrNames[j];
                    String localName = null;
                    String namespaceURI = null;

                    // Parse the namespaceURI out of the name if necessary
                    if (attrName.charAt(0) == '[') {
                        int endIndex = attrName.indexOf(']');

						/*
						 * Make sure the rule is valid so we avoid out of bounds
						 * and keep the caller informed when their rules are
						 * busted by failing fast.
						 */
                        if (endIndex <= 2)
                            throw new XMLParserException(
                                    "namespace URI for rule looks to be incomplete or empty for IRule: "
                                            + rule);

                        namespaceURI = attrName.substring(1, endIndex);
                    }

                    int startIndex = (namespaceURI == null ? 0 : namespaceURI
                            .length() + 2);

					/*
					 * Make sure the rule is valid so we avoid out of bounds and
					 * keep the caller informed when their rules are busted by
					 * failing fast.
					 */
                    if (attrName.length() - startIndex <= 1)
                        throw new XMLParserException(
                                "local name for rule looks to be missing for IRule: "
                                        + rule);

                    // Parse the local name
                    localName = attrName.substring(startIndex,
                            attrName.length());

                    // Give the parsed attribute value to the matching rule
                    rule.handleParsedAttribute(this, j,
                            xpp.getAttributeValue(namespaceURI, localName),
                            userObject);
                }
            }
        }
    }

    /**
     * Used to process a {@link org.xmlpull.v1.XmlPullParser#TEXT} event.
     * <p/>
     * By default this processes all {@link IRule}s of type
     * {@link Type#CHARACTER} that match the parser's current location.
     *
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     */
    protected void doText(T userObject) {
        if (DEBUG)
            log("TEXT: %s", location);

        // Get the rules for the current path
        List<IRule<T>> ruleList = charRuleMap.get(location.getCachedHashCode());

        // If there are no rules for the current path, then we are done.
        if (ruleList == null || ruleList.isEmpty())
            return;

        if (DEBUG)
            log("\t%d rules found for TEXT...", ruleList.size());

        String text = xpp.getText();

        // Give the parsed text to all matching IRules for this path
        for (int i = 0, size = ruleList.size(); i < size; i++) {
            IRule<T> rule = ruleList.get(i);

            if (DEBUG)
                log("\t\tRunning Rule: %s", rule);

            rule.handleParsedCharacters(this, text, userObject);
        }
    }

    /**
     * Used to process a {@link org.xmlpull.v1.XmlPullParser#END_TAG} event.
     *
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     */
    protected void doEndTag(T userObject) {
        // Get the rules for the current path
        List<IRule<T>> tagRuleList = tagRuleMap.get(location
                .getCachedHashCode());

        // If there are no rules for the current path, then we are done.
        if (tagRuleList != null && !tagRuleList.isEmpty()) {
            if (DEBUG)
                log("\t%d TAG rules found for END_TAG...", tagRuleList.size());

            // Process the TAG rules
            for (int i = 0, size = tagRuleList.size(); i < size; i++) {
                IRule<T> rule = tagRuleList.get(i);

                if (DEBUG)
                    log("\t\tRunning TAG Rule: %s", rule);

                rule.handleTag(this, false, userObject);
            }
        }

        // Update parser location
        location.pop();

        if (DEBUG)
            log("END_TAG: %s", location);
    }

    /**
     * Used to process a {@link org.xmlpull.v1.XmlPullParser#END_DOCUMENT} event.
     * <p/>
     * By default this method simply logs a debug statement if debugging is
     * enabled, but this stub is provided to make overriding the default
     * behavior easier if desired.
     *
     * @param userObject
     *            The user-supplied object passed through from this parse method
     *            to the matching {@link IRule}'s <code>handleXXX</code> method
     *            when a match is found, or <code>null</code> if no user object
     *            is needed. Passing through a user-object is just meant as a
     *            convenience for giving the handler methods on the
     *            {@link IRule}'s access to objects like DAOs that can be used
     *            to persist or process parsed data easily.
     */
    protected void doEndDocument(T userObject) {
        if (DEBUG)
            log("END_DOCUMENT, Parsing COMPLETE");
    }

    /**
     * Simple and fast class used to mock the behavior of a stack in the form of
     * a string for the purposes of "pushing" and "popping" the parser's current
     * location within an XML document as it processes START and END_TAG events.
     * <p/>
     * Performance is optimized by using a {@link StringBuilder} who's length is
     * chopped (which just adjusts an <code>int</code> value) to simulate a
     * "pop" off the top.
     * <h3>Performance</h3>
     * As of SJXP 2.0 String object creation and char[] duplication (e.g.
     * {@link System#arraycopy(Object, int, Object, int, int)}) has been
     * completely removed and replaced with using simple integer hash codes.
     * <p/>
     * The performance improvement is huge over the original toString-based
     * method of matching {@link IRule}'s <code>locationPath</code>s against the
     * parser's current location.
     *
     * @author Riyad Kalla (software@thebuzzmedia.com)
     */
    class Location {
        private static final int HASH_CODE_CACHE_SIZE = 512;

        private int hashCode;
        private Integer[] hashCodeCache;

        private StringBuilder path;
        private List<Integer> lengthList;

        /**
         * Creates a new empty location.
         */
        public Location() {
            hashCode = 0;
            hashCodeCache = new Integer[HASH_CODE_CACHE_SIZE];

            path = new StringBuilder(256);
            lengthList = new ArrayList<Integer>(16);
        }

        /**
         * Overridden to calculate the hash code of this location using the
         * exact same hash code calculation that {@link String#hashCode()} uses.
         * This allows us to say a <code>String</code> with the content
         * "/library/book/title" is equal to an instance of this class
         * representing the same location when doing lookups in a {@link java.util.Map}.
         * <p/>
         * This method calculates the hash code and then caches it, followup
         * calls to {@link #push(String, String)} or {@link #pop()} invalidate
         * the cached hash code allowing it to be recalculated again on the next
         * call.
         */
        @Override
        public int hashCode() {
			/*
			 * If the hash code is already 0 and our path is empty, there is
			 * nothing to compute so the hash code stays 0. Otherwise we drop
			 * into the for-loop and calculate the String-equivalent hash code.
			 */
            if (hashCode == 0 && path.length() > 0) {
                for (int i = 0, length = path.length(); i < length; i++) {
                    hashCode = 31 * hashCode + path.charAt(i);
                }
            }

            return hashCode;
        }

        /**
         * Used to get a cached {@link Integer} version of the <code>int</code>
         * {@link #hashCode()} return value.
         * <p/>
         * To avoid unnecessary {@link Integer} allocations, this method caches
         * up to a certain number of {@link Integer} instances, re-using them
         * every time the same hash code value comes back up and creating new
         * instances when it doesn't.
         * <p/>
         * If a larger number of {@link Integer} instances are created than the
         * underlying cache can hold, then a new instance will be created and
         * returned like normal.
         * <h3>Design</h3>
         * The reason this works so well for parsing XML is because of the
         * nested, tag-matching structure of XML. When considering unique paths
         * inside of an XML doc (e.g. "/library", "/library/book", etc.) there
         * are typically not that many; maybe 20, 50 or less than a 100 in most
         * cases.
         * <p/>
         * Once the hash code {@link Integer} values for these unique paths is
         * created and cached, once we re-encounter that path again and again,
         * we don't need to recreate that hash code {@link Integer}, we can just
         * use the one from the previous occurrence.
         *
         * @return a cached {@link Integer} version of the <code>int</code>
         *         {@link #hashCode()} return value.
         */
        public Integer getCachedHashCode() {
            // Recalculate the hash code
            hashCode();

            // Figure out the index, in our cache, where this value WOULD be.
            int index = hashCode % hashCodeCache.length;

            // Absolute value only
            if (index < 0)
                index = -index;

            // Get the Integer we think represents our value.
            Integer value = hashCodeCache[index];

            // If we haven't created an Integer for this value yet, do it now.
            if (value == null)
                hashCodeCache[index] = (value = Integer.valueOf(hashCode));
			/*
			 * If a collision has occurred and we have filled up our cache
			 * already and the Integer we grabbed doesn't represent our int
			 * value, forget the cache and just create a new Integer the old
			 * fashion way and return it.
			 *
			 * The hope is that the cache is always large enough that we only
			 * ever hit it and have no misses like this.
			 */
            else if (hashCode != value.intValue())
                value = Integer.valueOf(hashCode);

            return value;
        }

        /**
         * Used to clear all the internal state of the location.
         */
        public void clear() {
            hashCode = 0;
            hashCodeCache = new Integer[HASH_CODE_CACHE_SIZE];

            path.setLength(0);
            lengthList.clear();
        }

        /**
         * "Pushes" a new local name and optional namespace URI onto the "stack"
         * by appending it to the current location path that represents the
         * parser's location inside of the XML doc.
         *
         * @param localName
         *            The local name of the tag (e.g. "title").
         * @param namespaceURI
         *            Optionally, the full qualifying namespace URI for this
         *            tag.
         */
        public void push(String localName, String namespaceURI) {
            // Clear the hash code cache first to be safe.
            hashCode = 0;

            // Remember the length before we inserted this last entry
            lengthList.add(path.length());

            // Add separator
            path.append('/');

            // Add the namespace URI if there is one.
            if (namespaceURI != null && namespaceURI.length() > 0)
                path.append('[').append(namespaceURI).append(']');

            // Append the local name
            path.append(localName);
        }

        /**
         * "Pops" the last pushed path element off the "stack" by re-adjusting
         * the {@link StringBuilder}'s length to what it was before the last
         * element was appended.
         * <p/>
         * This effectively chops the last element off the path without doing a
         * more costly {@link StringBuilder#delete(int, int)} operation that
         * would incur a call to
         * {@link System#arraycopy(Object, int, Object, int, int)} by simply
         * adjusting a single <code>int</code> counter inside of
         * {@link StringBuilder}.
         */
        public void pop() {
            // Clear the hash code cache first to be safe.
            hashCode = 0;

            // Get the length before the last insertion
            Integer lastLength = lengthList.remove(lengthList.size() - 1);

            // 'Pop' the last insertion by cropping the length to exclude it.
            path.setLength(lastLength);
        }
    }
}