// HTMLParser Library $Name: v1_6_20060319 $ - A java-based parser for HTML // http://sourceforge.org/projects/htmlparser // Copyright (C) 2004 Somik Raha // // Revision Control Information // // $Source: /cvsroot/htmlparser/htmlparser/src/org/htmlparser/util/NodeTreeWalker.java,v $ // $Author: ian_macfarlane $ // $Date: 2006/02/13 14:50:35 $ // $Revision: 1.1 $ // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // package org.htmlparser.util; import org.htmlparser.Node; /** * A class for walking a tree of {@link Node} objects, in either a depth-first or breadth-first manner. * The following two diagrams show the represent tree traversal with the two different methods. * <table> * <tr> * <th>Depth-first traversal</th> * <th>Breadth-first traversal</th> * </tr> * <tr> * <img src="http://htmlparser.sourceforge.net/tree-traversal-depth-first.gif" alt="Diagram showing depth-first tree traversal" width="300" height="300" /> * </tr> * <tr> * <img src="http://htmlparser.sourceforge.net/tree-traversal-breadth-first.gif" alt="Diagram showing breadth-first tree traversal" width="300" height="300" /> * </tr> * </table> * @author ian_macfarlane */ public class NodeTreeWalker implements NodeIterator { /** * The root Node element which defines the scope of the current tree to walk. */ protected Node mRootNode; /** * The current Node element, which will be a child of the root Node, or null. */ protected Node mCurrentNode; /** * The next Node element after the current Node element. * Stored for internal use only. */ protected Node mNextNode; /** * The maximum depth (child-parent links) from which this NodeTreeWalker may be removed from the root Node. * A value of -1 indicates that there is no depth restriction. */ protected int mMaxDepth; /** * Whether the tree traversal method used is depth-first (default) or breadth-first. */ protected boolean mDepthFirst; /** * Creates a new instance of NodeTreeWalker using depth-first tree traversal, without limits on how deep it may traverse. * @param root Node The Node to set as the root of the tree. * @throws NullPointerException if root Node is null. */ public NodeTreeWalker(Node rootNode) { this(rootNode, true, -1); } /** * Creates a new instance of NodeTreeWalker using the specified type of tree traversal, without limits on how deep it may traverse. * @param rootNode The Node to set as the root of the tree. * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal. * @throws NullPointerException if rootNode is null. */ public NodeTreeWalker(Node rootNode, boolean depthFirst) { this(rootNode, depthFirst, -1); } /** * Creates a new instance of NodeTreeWalker using the specified type of tree traversal and maximum depth from the root Node to traverse. * @param rootNode The Node to set as the root of the tree. * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal. * @param maxDepth The maximum depth from the root Node that this NodeTreeWalker may traverse. This must be > 0 or equal to -1. * @throws NullPointerException if rootNode is null. * @throws IllegalArgumentException maxDepth is not > 0 or equal to -1. */ public NodeTreeWalker(Node rootNode, boolean depthFirst, int maxDepth) { //check maxDepth is valid if ( ! ((maxDepth >= 1) || (maxDepth == -1)))//if not one of these valid possibilities throw new IllegalArgumentException("Paramater maxDepth must be > 0 or equal to -1."); initRootNode(rootNode);//this method also checks if rootNode is valid this.mDepthFirst = depthFirst; this.mMaxDepth = maxDepth; } /** * Whether the NodeTreeWalker is currently set to use depth-first or breadth-first tree traversal. * @return True if depth-first tree-traversal is used, or false if breadth-first tree-traversal is being used. */ public boolean isDepthFirst() { return (this.mDepthFirst); } /** * Sets whether the NodeTreeWalker should use depth-first or breadth-first tree traversal. * @param depthFirst Whether to use depth-first (true) or breadth-first (false) tree traversal. */ public void setDepthFirst(boolean depthFirst) { if (this.mDepthFirst != depthFirst)//if we are changing search pattern this.mNextNode = null; this.mDepthFirst = depthFirst; } /** * The maximum depth (number of child-parent links) below the root Node that this NodeTreeWalker may traverse. * @return The maximum depth that this NodeTreeWalker can traverse to. */ public int getMaxDepth() { return (this.mMaxDepth); } /** * Removes any restrictions in place that prevent this NodeTreeWalker from traversing beyond a certain depth. */ public void removeMaxDepthRestriction() { this.mMaxDepth = -1; } /** * Get the root Node that defines the scope of the tree to traverse. * @return The root Node. */ public Node getRootNode() { return (this.mRootNode); } /** * Get the Node in the tree that the NodeTreeWalker is current at. * @return The current Node. */ public Node getCurrentNode() { return (this.mCurrentNode); } /** * Sets the current Node as the root Node. * Resets the current position in the tree. * @throws NullPointerException if the current Node is null (i.e. if the tree traversal has not yet begun). */ public void setCurrentNodeAsRootNode() throws NullPointerException { if (this.mCurrentNode == null) throw new NullPointerException("Current Node is null, cannot set as root Node."); initRootNode(this.mCurrentNode); } /** * Sets the specified Node as the root Node. * Resets the current position in the tree. * @param rootNode The Node to set as the root of the tree. * @throws NullPointerException if rootNode is null. */ public void setRootNode(Node rootNode) throws NullPointerException { initRootNode(rootNode); } /** * Resets the current position in the tree, * such that calling <code>nextNode()</code> will return the first Node again. */ public void reset() { this.mCurrentNode = null; this.mNextNode = null; } /** * Traverses to the next Node from the current Node, using either depth-first or breadth-first tree traversal as appropriate. * @return The next Node from the current Node. */ public Node nextNode() { if (this.mNextNode != null)//check if we've already found the next Node by calling hasMoreNodes() { this.mCurrentNode = this.mNextNode; this.mNextNode = null;//reset mNextNode } else { //Check if we have started traversing yet. If not, start with first child (for either traversal method). if (this.mCurrentNode == null) this.mCurrentNode = this.mRootNode.getFirstChild(); else { if (this.mDepthFirst) this.mCurrentNode = getNextNodeDepthFirst(); else this.mCurrentNode = getNextNodeBreadthFirst(); } } return (this.mCurrentNode); } /** * Get the number of places down that the current Node is from the root Node. * Returns 1 if current Node is a child of the root Node. * Returns 0 if this NodeTreeWalker has not yet traversed to any Nodes. * @return The depth the current Node is from the root Node. */ public int getCurrentNodeDepth() { int depth = 0; if (this.mCurrentNode != null)//if we are not at the root Node. { Node traverseNode = this.mCurrentNode; while (traverseNode != this.mRootNode) { ++depth; traverseNode = traverseNode.getParent(); } } return (depth); } /** * Returns whether or not there are more nodes available based on the current configuration of this NodeTreeWalker. * @return True if there are more Nodes available, based on the current configuration, or false otherwise. */ public boolean hasMoreNodes() { if (this.mNextNode == null)//if we've already generated mNextNode { if (this.mCurrentNode == null) this.mNextNode = this.mRootNode.getFirstChild(); else { if (this.mDepthFirst) this.mNextNode = getNextNodeDepthFirst(); else this.mNextNode = getNextNodeBreadthFirst(); } } return (this.mNextNode != null); } /** * Sets the root Node to be the given Node. * Resets the current position in the tree. * @param rootNode The Node to set as the root of the tree. * @throws NullPointerException if rootNode is null. */ protected void initRootNode(Node rootNode) throws NullPointerException { if (rootNode == null) throw new NullPointerException("Root Node cannot be null."); this.mRootNode = rootNode; this.mCurrentNode = null; this.mNextNode = null; } /** * Traverses to the next Node from the current Node using depth-first tree traversal * @return The next Node from the current Node using depth-first tree traversal. */ protected Node getNextNodeDepthFirst() { //loosely based on http://www.myarch.com/treeiter/traditways.jhtml int currentDepth = getCurrentNodeDepth(); Node traverseNode = null; if ((this.mMaxDepth == -1) || (currentDepth < this.mMaxDepth))//if it is less than max depth, then getting first child won't be more than max depth { traverseNode = this.mCurrentNode.getFirstChild(); if (traverseNode != null) return (traverseNode); } traverseNode = this.mCurrentNode; Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later while ((traverseNode != this.mRootNode) && (tempNextSibling = traverseNode.getNextSibling()) == null)//CANNOT assign traverseNode as root Node traverseNode = traverseNode.getParent();// use child-parent link to get to the parent level return (tempNextSibling);//null if ran out of Node's } /** * Traverses to the next Node from the current Node using breadth-first tree traversal * @return The next Node from the current Node using breadth-first tree traversal. */ protected Node getNextNodeBreadthFirst() { Node traverseNode; //see if the mCurrentNode has a sibling after it traverseNode = this.mCurrentNode.getNextSibling(); if (traverseNode != null) return (traverseNode); int depth = getCurrentNodeDepth(); //try and find the next Node at the same depth that is not a sibling NodeList traverseNodeList; //step up to the parent Node to look through its children traverseNode = this.mCurrentNode.getParent(); int currentDepth = depth - 1; while(currentDepth > 0)//this is safe as we've tried getNextSibling already { Node tempNextSibling = null;//keeping a reference to this this saves calling getNextSibling once later //go to first parent with nextSibling, then to that sibling while(((tempNextSibling = traverseNode.getNextSibling()) == null) && (traverseNode != this.mRootNode))//CAN assign traverseNode as root Node { traverseNode = traverseNode.getParent(); --currentDepth; } //if have traversed back to the root Node, skip to next part where it finds the first Node at the next depth down if (traverseNode == this.mRootNode) break; traverseNode = tempNextSibling; if (traverseNode != null) { //go through children of that sibling traverseNodeList = traverseNode.getChildren(); while((traverseNodeList != null) && (traverseNodeList.size() != 0)) { traverseNode = traverseNode.getFirstChild(); ++currentDepth; if (currentDepth == depth) return (traverseNode);//found the next Node at the current depth else traverseNodeList = traverseNode.getChildren(); } // while((traverseNodeList != null) && (traverseNodeList.size() != 0)) } // if (traverseNode != null) } // while(currentDepth > 0) //step to the next depth down //check first whether we are about to go past max depth if (this.mMaxDepth != -1)//if -1, then there is no max depth restriction { if (depth >= this.mMaxDepth) return (null);//can't go past max depth } traverseNode = this.mRootNode.getFirstChild(); ++depth;//look for next depth currentDepth = 1; while(currentDepth > 0) { //go through children of that sibling traverseNodeList = traverseNode.getChildren(); while((traverseNodeList != null) && (traverseNodeList.size() != 0)) { traverseNode = traverseNode.getFirstChild(); ++currentDepth; if (currentDepth == depth) return (traverseNode);//found the next Node at the current depth else traverseNodeList = traverseNode.getChildren(); } // while((traverseNodeList != null) && (traverseNodeList.size() != 0)) //go to first parent with nextSibling, then to that sibling while((traverseNode.getNextSibling() == null) && (traverseNode != this.mRootNode)) { traverseNode = traverseNode.getParent(); --currentDepth; } traverseNode = traverseNode.getNextSibling(); if (traverseNode == null)//if null (i.e. reached end of tree), return null return (null); } // while(currentDepth > 0) //otherwise, finished searching, return null return (null); } // todo // previousNode() // getPreviousNodeDepthFirst() // getPreviousNodeBreadthFirst() // hasPreviousNodes() ? // these should be specificed in an interface - suggest something like ReversableNodeIterator (extends NodeIterator) // possible optimisations: when doing mNextNode, we should save mCurrentNode as previousNode, and vice versa }