NodeSetHelper.java example

Explorer
eXist-1.4.x-master
/*
 * eXist Open Source Native XML Database
 * Copyright (C) 2001-2007 The eXist Project
 * http://exist-db.org
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *  
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 * 
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software Foundation
 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 *  
 *  $Id$
 */
package org.exist.dom;

import org.exist.numbering.NodeId;
import org.exist.storage.DBBroker;
import org.exist.xquery.Constants;
import org.exist.xquery.Expression;
import org.exist.xquery.XPathException;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;

import java.util.Iterator;

/**
 * Collection of static methods operating on node sets.
 * 
 * @author wolf
 */
public class NodeSetHelper {

    /**
     * For two given sets of potential parent and child nodes, find those nodes
     * from the child set that actually have parents in the parent set, i.e. the
     * parent-child relationship is true.
     * 
     * The method returns either the matching descendant or ancestor nodes,
     * depending on the mode constant.
     * 
     * If mode is {@link NodeSet#DESCENDANT}, the returned node set will contain all
     * child nodes found in this node set for each parent node. If mode is
     * {@link NodeSet#ANCESTOR}, the returned set will contain those parent nodes, for
     * which children have been found.
     * 
     * @param dl
     *            a node set containing potential child nodes
     * @param al
     *            a node set containing potential parent nodes
     * @param mode
     *            selection mode
     * @param contextId
     *            used to track context nodes when evaluating predicate
     *            expressions. If contextId != {@link Expression#NO_CONTEXT_ID},
     *            the current context will be added to each result of the
     *            selection.
     */
    public static NodeSet selectParentChild(NodeSet dl, NodeSet al, int mode,
                                            int contextId) {
        ExtArrayNodeSet result = new ExtArrayNodeSet();
        DocumentImpl lastDoc = null;
        switch (mode) {
            case NodeSet.DESCENDANT:
                for (Iterator i = dl.iterator(); i.hasNext();) {
                    int sizeHint = Constants.NO_SIZE_HINT;
                    NodeProxy child = (NodeProxy) i.next();
                    if (lastDoc == null || child.getDocument() != lastDoc) {
                        lastDoc = child.getDocument();
                        sizeHint = dl.getSizeHint(lastDoc);
                    }
                    NodeProxy parent = al.parentWithChild(child, true, false,
                                                          NodeProxy.UNKNOWN_NODE_LEVEL);
                    if (parent != null) {
                        if (Expression.NO_CONTEXT_ID != contextId)
                            child.deepCopyContext(parent, contextId);
                        else
                            child.copyContext(parent);
                        result.add(child, sizeHint);
                    }
                }
                break;
            case NodeSet.ANCESTOR:
                for (Iterator i = dl.iterator(); i.hasNext();) {
                    int sizeHint = Constants.NO_SIZE_HINT;
                    NodeProxy child = (NodeProxy) i.next();
                    if (lastDoc == null || child.getDocument() != lastDoc) {
                        lastDoc = child.getDocument();
                        sizeHint = al.getSizeHint(lastDoc);
                    }
                    NodeProxy parent = al.parentWithChild(child, true, false,
                                                          NodeProxy.UNKNOWN_NODE_LEVEL);
                    if (parent != null) {
                        if (Expression.NO_CONTEXT_ID != contextId)
                            parent.deepCopyContext(child, contextId);
                        else
                            parent.copyContext(child);
                        parent.addMatches(child);
                        result.add(parent, sizeHint);
                    }
                }
                break;
            default:
                throw new IllegalArgumentException("Bad 'mode' argument");
        }
        result.sort();
        return result;
    }

    /**
     * For two given sets of potential ancestor and descendant nodes, find those
     * nodes from the descendant set that actually have ancestors in the
     * ancestor set, i.e. the ancestor-descendant relationship is true.
     * 
     * The method returns either the matching descendant or ancestor nodes,
     * depending on the mode constant.
     * 
     * If mode is {@link NodeSet#DESCENDANT}, the returned node set will contain all
     * descendant nodes found in this node set for each ancestor. If mode is
     * {@link NodeSet#ANCESTOR}, the returned set will contain those ancestor nodes,
     * for which descendants have been found.
     * 
     * @param dl
     *            a node set containing potential descendant nodes
     * @param al
     *            a node set containing potential ancestor nodes
     * @param mode
     *            selection mode
     * @param includeSelf
     *            if true, check if the ancestor node itself is contained in the
     *            set of descendant nodes (descendant-or-self axis)
     * @param contextId
     *            used to track context nodes when evaluating predicate
     *            expressions. If contextId != {@link Expression#NO_CONTEXT_ID},
     *            the current context will be added to each result of the
     *            selection.
     * 
     */
    public static NodeSet selectAncestorDescendant(NodeSet dl, NodeSet al,
                                                   int mode, boolean includeSelf, int contextId) {
        ExtArrayNodeSet result = new ExtArrayNodeSet();
        DocumentImpl lastDoc = null;
        switch (mode) {
            case NodeSet.DESCENDANT:
                for (Iterator i = dl.iterator(); i.hasNext();) {
                    int sizeHint = Constants.NO_SIZE_HINT;
                    NodeProxy descendant = (NodeProxy) i.next();
                    // get a size hint for every new document encountered
                    if (lastDoc == null || descendant.getDocument() != lastDoc) {
                        lastDoc = descendant.getDocument();
                        sizeHint = dl.getSizeHint(lastDoc);
                    }
                    NodeProxy ancestor = al.parentWithChild(descendant.getDocument(),
                                                            descendant.getNodeId(),
                                                            false, includeSelf);
                    if (ancestor != null) {
                        if (Expression.NO_CONTEXT_ID != contextId)
                            descendant.addContextNode(contextId, ancestor);
                        else
                            descendant.copyContext(ancestor);
                        result.add(descendant, sizeHint);
                    }
                }
                break;
            case NodeSet.ANCESTOR:
                for (Iterator i = dl.iterator(); i.hasNext();) {
                    int sizeHint = Constants.NO_SIZE_HINT;
                    NodeProxy descendant = (NodeProxy) i.next();
                    // get a size hint for every new document encountered
                    if (lastDoc == null || descendant.getDocument() != lastDoc) {
                        lastDoc = descendant.getDocument();
                        sizeHint = al.getSizeHint(lastDoc);
                    }
                    NodeProxy ancestor = al.parentWithChild(descendant.getDocument(),
                                                            descendant.getNodeId(), false, includeSelf);
                    if (ancestor != null) {
                        if (Expression.NO_CONTEXT_ID != contextId)
                            ancestor.addContextNode(contextId, descendant);
                        else
                            ancestor.copyContext(descendant);
                        result.add(ancestor, sizeHint);
                    }
                }
                break;
            default:
                throw new IllegalArgumentException("Bad 'mode' argument");
        }
        return result;
    }

    /**
     * For two sets of potential ancestor and descendant nodes, return all the
     * real ancestors having a descendant in the descendant set.
     * 
     * @param al
     *            node set containing potential ancestors
     * @param dl
     *            node set containing potential descendants
     * @param includeSelf
     *            if true, check if the ancestor node itself is contained in
     *            this node set (ancestor-or-self axis)
     * @param contextId
     *            used to track context nodes when evaluating predicate
     *            expressions. If contextId != {@link Expression#NO_CONTEXT_ID},
     *            the current context will be added to each result of the of the
     *            selection.
     */
    public static NodeSet selectAncestors(NodeSet al, NodeSet dl,
                                          boolean includeSelf, int contextId) {
        NodeSet result = new NewArrayNodeSet();
        for (Iterator i = dl.iterator(); i.hasNext();) {
            NodeProxy descendant = (NodeProxy) i.next();
            NodeSet ancestors = ancestorsForChild(al, descendant, false, includeSelf);
            for (Iterator j = ancestors.iterator(); j.hasNext();) {
                NodeProxy ancestor = (NodeProxy) j.next();
                if (ancestor != null) {
                    NodeProxy temp = result.get(ancestor);
                    if (temp == null) {
                        if (Expression.IGNORE_CONTEXT != contextId) {
                            if (Expression.NO_CONTEXT_ID != contextId)
                                ancestor.addContextNode(contextId, descendant);
                            else
                                ancestor.copyContext(descendant);
                        }
                        ancestor.addMatches(descendant);
                        result.add(ancestor);
                    } else if (Expression.NO_CONTEXT_ID != contextId) {
                        temp.addContextNode(contextId, descendant);
                    }
                }
            }
        }
        return result;
    }

    /**
     * Return all nodes contained in the node set that are ancestors of the node
     * p.
     */
    private static NodeSet ancestorsForChild(NodeSet ancestors,
                                             NodeProxy child, boolean directParent, boolean includeSelf) {
        NodeSet result = new NewArrayNodeSet(5);
        NodeId nodeId = child.getNodeId();
        NodeProxy temp = ancestors.get(child.getDocument(), nodeId);
        if (includeSelf && temp != null)
            result.add(temp);
        while (nodeId != null && nodeId != NodeId.DOCUMENT_NODE) {
        	nodeId = nodeId.getParentId();
            temp = ancestors.get(child.getDocument(), nodeId);
            if (temp != null)
                result.add(temp);
            else if (directParent)
                return result;
        }
        return result;
    }

    /**
     * Select all nodes from the passed set of potential siblings, which are
     * preceding siblings of the nodes in the other set.
     * 
     * @param candidates
     *            the node set to check
     * @param references
     *            a node set containing potential siblings
     * @param contextId
     *            used to track context nodes when evaluating predicate
     *            expressions. If contextId != {@link Expression#NO_CONTEXT_ID},
     *            the current context will be added to each result of the of the
     *            selection.
     */
    public static NodeSet selectPrecedingSiblings(NodeSet candidates,
                                                  NodeSet references, int contextId) {
        if (references.isEmpty() || candidates.isEmpty())
            return NodeSet.EMPTY_SET;
        NodeSet result = new ExtArrayNodeSet();
        NodeSetIterator iReferences = references.iterator();
        NodeSetIterator iCandidates = candidates.iterator();
        NodeProxy reference = (NodeProxy) iReferences.next();
        NodeProxy candidate = (NodeProxy) iCandidates.next();
        NodeProxy firstCandidate = null;
        while (true) {
            // first, try to find nodes belonging to the same doc
            if (reference.getDocument().getDocId() < candidate.getDocument()
                .getDocId()) {
                firstCandidate = null;
                if (iReferences.hasNext())
                    reference = (NodeProxy) iReferences.next();
                else
                    break;
            } else if (reference.getDocument().getDocId() > candidate
                       .getDocument().getDocId()) {
                firstCandidate = null;
                if (iCandidates.hasNext())
                    candidate = (NodeProxy) iCandidates.next();
                else
                    break;
            } else {
                // same document: check if the nodes have the same parent
                int cmp = candidate.getNodeId().getParentId().compareTo(reference.getNodeId().getParentId());
                if (cmp > 0 && candidate.getNodeId().getTreeLevel() <= reference.getNodeId().getTreeLevel()) {
                    // wrong parent: proceed
                    firstCandidate = null;
                    if (iReferences.hasNext())
                        reference = (NodeProxy) iReferences.next();
                    else
                        break;
                } else if (cmp < 0  || (cmp > 0 && candidate.getNodeId().getTreeLevel() >= reference.getNodeId().getTreeLevel())) {
                	//Why did I have to invert the test ? ----------------------------^^^^^
                    // wrong parent: proceed
                    firstCandidate = null;
                    if (iCandidates.hasNext())
                        candidate = (NodeProxy) iCandidates.next();
                    else
                        break;
                } else {
                    if (firstCandidate == null)
                        firstCandidate = candidate;
                    
                    // found two nodes with the same parent
                    // now, compare the ids: a node is a following sibling
                    // if its id is greater than the id of the other node
                    cmp = candidate.getNodeId().compareTo(reference.getNodeId());
                    if (cmp < 0) {
                        // found a preceding sibling
                        NodeProxy t = result.get(candidate);
                        if (t == null) {
                            if (Expression.IGNORE_CONTEXT != contextId) {
                                if (Expression.NO_CONTEXT_ID == contextId) {
                                    candidate.copyContext(reference);
                                } else {
                                    candidate.addContextNode(contextId,
                                                             reference);
                                }
                            }
                            result.add(candidate);
                        } else if (contextId > Expression.NO_CONTEXT_ID){
                            t.addContextNode(contextId, reference);
                        }
                        if (iCandidates.hasNext())
                            candidate = (NodeProxy) iCandidates.next();
                        else
                            break;
                    } else if (cmp > 0) {
                        // found a following sibling
                        if (iCandidates.hasNext())
                            // TODO : break ?
                            candidate = (NodeProxy) iCandidates.next();
                        else
                            break;
                        // equal nodes: proceed with next node
                    } else {
                        if (iReferences.hasNext()) {
                            reference = (NodeProxy) iReferences.next();
                            iCandidates.setPosition(firstCandidate);
                            candidate = (NodeProxy) iCandidates.next();
                        } else
                            break;
                    }
                }
            }
        }
        return result;
    }

    /**
     * Select all nodes from the passed set of potential siblings, which are
     * following siblings of the nodes in the other set.
     * 
     * @param candidates
     *            the node set to check
     * @param references
     *            a node set containing potential siblings
     * @param contextId
     *            used to track context nodes when evaluating predicate
     *            expressions. If contextId != {@link Expression#NO_CONTEXT_ID},
     *            the current context will be added to each result of the of the
     *            selection.
     */
    public static NodeSet selectFollowingSiblings(NodeSet candidates,
                                                  NodeSet references, int contextId) {
        if (references.isEmpty() || candidates.isEmpty())
            return NodeSet.EMPTY_SET;
        NodeSet result = new ExtArrayNodeSet();
        NodeSetIterator iReferences = references.iterator();
        NodeSetIterator iCandidates = candidates.iterator();
        NodeProxy reference = (NodeProxy) iReferences.next();
        NodeProxy candidate = (NodeProxy) iCandidates.next();
        NodeProxy firstCandidate = null;
        // TODO : review : don't care about preceding siblings
        while (true) {
            // first, try to find nodes belonging to the same doc
            if (reference.getDocument().getDocId() < candidate.getDocument()
                .getDocId()) {
                firstCandidate = null;
                if (iReferences.hasNext())
                    reference = (NodeProxy) iReferences.next();
                else
                    break;
            } else if (reference.getDocument().getDocId() > candidate
                       .getDocument().getDocId()) {
                firstCandidate = null;
                if (iCandidates.hasNext())
                    candidate = (NodeProxy) iCandidates.next();
                else
                    break;
            } else {
                // same document: check if the nodes have the same parent
                int cmp = candidate.getNodeId().getParentId().compareTo(reference.getNodeId().getParentId());
                if (cmp > 0 && candidate.getNodeId().getTreeLevel() <= reference.getNodeId().getTreeLevel()) {
                    //Do not proceed to the next "parent" if the candidate is a descendant  
                    // wrong parent: proceed
                    firstCandidate = null;
                    if (iReferences.hasNext())
                        reference = (NodeProxy) iReferences.next();
                    else
                        break;
                } else if (cmp < 0  || (cmp > 0 && candidate.getNodeId().getTreeLevel() >= reference.getNodeId().getTreeLevel())) {
                	//Why did I have to invert the test ? ----------------------------^^^^^
                	// wrong parent: proceed
                    firstCandidate = null;
                    if (iCandidates.hasNext())
                        candidate = (NodeProxy) iCandidates.next();
                    else
                        break;
                } else {
                    if (firstCandidate == null)
                        firstCandidate = candidate;
                    
                    cmp = candidate.getNodeId().compareTo(reference.getNodeId());
                    
                    // found two nodes with the same parent
                    // now, compare the ids: a node is a following sibling
                    // if its id is greater than the id of the other node
                    if (cmp < 0) {
                        // found a preceding sibling
                        if (iCandidates.hasNext())
                            candidate = (NodeProxy) iCandidates.next();
                        else
                            break;
                    } else if (cmp > 0) {
                        // found a following sibling
                        NodeProxy t = result.get(candidate);
                        if (t == null) {
                            if (Expression.IGNORE_CONTEXT != contextId) {
                                if (Expression.NO_CONTEXT_ID == contextId) {
                                    candidate.copyContext(reference);
                                } else {
                                    candidate.addContextNode(contextId,
                                                             reference);
                                }
                            }
                            result.add(candidate);
                        } else {
                            t.addContextNode(contextId, reference);
                        }
                        result.add(candidate);
                        if (iCandidates.hasNext())
                            candidate = (NodeProxy) iCandidates.next();
                        else if (iReferences.hasNext()) {
                            reference = (NodeProxy) iReferences.next();
                            iCandidates.setPosition(firstCandidate);
                            candidate = (NodeProxy) iCandidates.next();
                        } 
                        else
                            break;
                        // equal nodes: proceed with next node
                    } else {
                        if (iCandidates.hasNext())
                            candidate = (NodeProxy) iCandidates.next();
                        else
                            break;
                    }
                }
            }
        }
        return result;
    }

    /**
     * TODO: doesn't work!!!
     */
    public static NodeSet selectPreceding(NodeSet references, NodeSet candidates)
        throws XPathException {
        if (candidates.isEmpty() || references.isEmpty())
            return NodeSet.EMPTY_SET;
        NodeSet result = new NewArrayNodeSet();
        for (Iterator iReferences = references.iterator(); iReferences
                 .hasNext();) {
            NodeProxy reference = (NodeProxy) iReferences.next();
            for (Iterator iCandidates = candidates.iterator(); iCandidates
                     .hasNext();) {
                NodeProxy candidate = (NodeProxy) iCandidates.next();
                if (candidate.before(reference, true)) {
                    // TODO : add transverse context
                    candidate.addContextNode(Expression.NO_CONTEXT_ID,
                                             reference);
                    result.add(candidate);
                }
            }
        }
        return result;
    }

    /**
     * TODO: doesn't work!!!
     */
    public static NodeSet selectFollowing(NodeSet references, NodeSet candidates)
        throws XPathException {
        if (candidates.isEmpty() || references.isEmpty())
            return NodeSet.EMPTY_SET;
        NodeSet result = new ExtArrayNodeSet();
        for (Iterator iReferences = references.iterator(); iReferences
                 .hasNext();) {
            NodeProxy reference = (NodeProxy) iReferences.next();
            for (Iterator iCandidates = candidates.iterator(); iCandidates
                     .hasNext();) {
                NodeProxy candidate = (NodeProxy) iCandidates.next();
                if (candidate.after(reference, true)) {
                    // TODO : add transverse context
                    candidate.addContextNode(Expression.NO_CONTEXT_ID,
                                             reference);
                    result.add(candidate);
                }
            }
        }
        return result;
    }

    public static NodeSet directSelectAttributes(DBBroker broker, NodeSet candidates,
                                                 org.exist.xquery.NodeTest test, int contextId) {
        if (candidates.isEmpty())
            return NodeSet.EMPTY_SET;
        NodeSet result = new ExtArrayNodeSet();
        for (Iterator iCandidates = candidates.iterator(); iCandidates
                 .hasNext();) {
            NodeProxy candidate = (NodeProxy) iCandidates.next();
            result.addAll(candidate.directSelectAttribute(broker, test, contextId));
        }
        return result;
    }

    public final static void copyChildren(Document new_doc, Node node,
                                          Node new_node) {
        NodeList children = node.getChildNodes();
        Node new_child;
        for (int i = 0; i < children.getLength(); i++) {
        	Node child = children.item(i);
            if (child == null)
                continue;
            switch (child.getNodeType()) {
                case Node.ELEMENT_NODE: {
                    new_child = copyNode(new_doc, child);
                    new_node.appendChild(new_child);
                    break;
                }
                case Node.ATTRIBUTE_NODE: {
                    new_child = copyNode(new_doc, child);
                    ((Element) new_node).setAttributeNode((Attr) new_child);
                    break;
                }
                case Node.TEXT_NODE: {
                    new_child = copyNode(new_doc, child);
                    new_node.appendChild(new_child);
                    break;
                }
                    // TODO : error for any other one -pb
            }
        }
    }

    public final static Node copyNode(Document new_doc, Node node) {
        Node new_node;
        switch (node.getNodeType()) {
            case Node.ELEMENT_NODE:
                new_node = new_doc.createElementNS(node.getNamespaceURI(), node
                                                   .getNodeName());
                copyChildren(new_doc, node, new_node);
                return new_node;
            case Node.TEXT_NODE:
                new_node = new_doc.createTextNode(((Text) node).getData());
                return new_node;
            case Node.ATTRIBUTE_NODE:
                new_node = new_doc.createAttributeNS(node.getNamespaceURI(), node
                                                     .getNodeName());
                ((Attr) new_node).setValue(((Attr) node).getValue());
                return new_node;
            default:
                // TODO : error ? -pb
                return null;
        }
    }
}