/* * reserved comment block * DO NOT REMOVE OR ALTER! */ /* * Copyright 1999-2004 The Apache Software Foundation. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* * $Id: DOM2DTM.java,v 1.2.4.1 2005/09/15 08:15:10 suresh_emailid Exp $ */ package com.sun.org.apache.xml.internal.dtm.ref.dom2dtm; import java.util.Vector; import javax.xml.transform.SourceLocator; import javax.xml.transform.dom.DOMSource; import com.sun.org.apache.xml.internal.dtm.DTM; import com.sun.org.apache.xml.internal.dtm.DTMManager; import com.sun.org.apache.xml.internal.dtm.DTMWSFilter; import com.sun.org.apache.xml.internal.dtm.ref.DTMDefaultBaseIterators; import com.sun.org.apache.xml.internal.dtm.ref.DTMManagerDefault; import com.sun.org.apache.xml.internal.dtm.ref.ExpandedNameTable; import com.sun.org.apache.xml.internal.dtm.ref.IncrementalSAXSource; import com.sun.org.apache.xml.internal.res.XMLErrorResources; import com.sun.org.apache.xml.internal.res.XMLMessages; import com.sun.org.apache.xml.internal.utils.FastStringBuffer; import com.sun.org.apache.xml.internal.utils.QName; import com.sun.org.apache.xml.internal.utils.StringBufferPool; import com.sun.org.apache.xml.internal.utils.TreeWalker; import com.sun.org.apache.xml.internal.utils.XMLCharacterRecognizer; import com.sun.org.apache.xml.internal.utils.XMLString; import com.sun.org.apache.xml.internal.utils.XMLStringFactory; import org.w3c.dom.Attr; import org.w3c.dom.Document; import org.w3c.dom.DocumentType; import org.w3c.dom.Element; import org.w3c.dom.Entity; import org.w3c.dom.NamedNodeMap; import org.w3c.dom.Node; import org.xml.sax.ContentHandler; /** The <code>DOM2DTM</code> class serves up a DOM's contents via the * DTM API. * * Note that it doesn't necessarily represent a full Document * tree. You can wrap a DOM2DTM around a specific node and its subtree * and the right things should happen. (I don't _think_ we currently * support DocumentFrgment nodes as roots, though that might be worth * considering.) * * Note too that we do not currently attempt to track document * mutation. If you alter the DOM after wrapping DOM2DTM around it, * all bets are off. * */ public class DOM2DTM extends DTMDefaultBaseIterators { static final boolean JJK_DEBUG=false; static final boolean JJK_NEWCODE=true; /** Manefest constant */ static final String NAMESPACE_DECL_NS="http://www.w3.org/XML/1998/namespace"; /** The current position in the DOM tree. Last node examined for * possible copying to DTM. */ transient private Node m_pos; /** The current position in the DTM tree. Who children get appended to. */ private int m_last_parent=0; /** The current position in the DTM tree. Who children reference as their * previous sib. */ private int m_last_kid=NULL; /** The top of the subtree. * %REVIEW%: 'may not be the same as m_context if "//foo" pattern.' * */ transient private Node m_root; /** True iff the first element has been processed. This is used to control synthesis of the implied xml: namespace declaration node. */ boolean m_processedFirstElement=false; /** true if ALL the nodes in the m_root subtree have been processed; * false if our incremental build has not yet finished scanning the * DOM tree. */ transient private boolean m_nodesAreProcessed; /** The node objects. The instance part of the handle indexes * directly into this vector. Each DTM node may actually be * composed of several DOM nodes (for example, if logically-adjacent * Text/CDATASection nodes in the DOM have been coalesced into a * single DTM Text node); this table points only to the first in * that sequence. */ protected Vector m_nodes = new Vector(); /** * Construct a DOM2DTM object from a DOM node. * * @param mgr The DTMManager who owns this DTM. * @param domSource the DOM source that this DTM will wrap. * @param dtmIdentity The DTM identity ID for this DTM. * @param whiteSpaceFilter The white space filter for this DTM, which may * be null. * @param xstringfactory XMLString factory for creating character content. * @param doIndexing true if the caller considers it worth it to use * indexing schemes. */ public DOM2DTM(DTMManager mgr, DOMSource domSource, int dtmIdentity, DTMWSFilter whiteSpaceFilter, XMLStringFactory xstringfactory, boolean doIndexing) { super(mgr, domSource, dtmIdentity, whiteSpaceFilter, xstringfactory, doIndexing); // Initialize DOM navigation m_pos=m_root = domSource.getNode(); // Initialize DTM navigation m_last_parent=m_last_kid=NULL; m_last_kid=addNode(m_root, m_last_parent,m_last_kid, NULL); // Apparently the domSource root may not actually be the // Document node. If it's an Element node, we need to immediately // add its attributes. Adapted from nextNode(). // %REVIEW% Move this logic into addNode and recurse? Cleaner! // // (If it's an EntityReference node, we're probably scrod. For now // I'm just hoping nobody is ever quite that foolish... %REVIEW%) // // %ISSUE% What about inherited namespaces in this case? // Do we need to special-case initialize them into the DTM model? if(ELEMENT_NODE == m_root.getNodeType()) { NamedNodeMap attrs=m_root.getAttributes(); int attrsize=(attrs==null) ? 0 : attrs.getLength(); if(attrsize>0) { int attrIndex=NULL; // start with no previous sib for(int i=0;i<attrsize;++i) { // No need to force nodetype in this case; // addNode() will take care of switching it from // Attr to Namespace if necessary. attrIndex=addNode(attrs.item(i),0,attrIndex,NULL); m_firstch.setElementAt(DTM.NULL,attrIndex); } // Terminate list of attrs, and make sure they aren't // considered children of the element m_nextsib.setElementAt(DTM.NULL,attrIndex); // IMPORTANT: This does NOT change m_last_parent or m_last_kid! } // if attrs exist } //if(ELEMENT_NODE) // Initialize DTM-completed status m_nodesAreProcessed = false; } /** * Construct the node map from the node. * * @param node The node that is to be added to the DTM. * @param parentIndex The current parent index. * @param previousSibling The previous sibling index. * @param forceNodeType If not DTM.NULL, overrides the DOM node type. * Used to force nodes to Text rather than CDATASection when their * coalesced value includes ordinary Text nodes (current DTM behavior). * * @return The index identity of the node that was added. */ protected int addNode(Node node, int parentIndex, int previousSibling, int forceNodeType) { int nodeIndex = m_nodes.size(); // Have we overflowed a DTM Identity's addressing range? if(m_dtmIdent.size() == (nodeIndex>>>DTMManager.IDENT_DTM_NODE_BITS)) { try { if(m_mgr==null) throw new ClassCastException(); // Handle as Extended Addressing DTMManagerDefault mgrD=(DTMManagerDefault)m_mgr; int id=mgrD.getFirstFreeDTMID(); mgrD.addDTM(this,id,nodeIndex); m_dtmIdent.addElement(id<<DTMManager.IDENT_DTM_NODE_BITS); } catch(ClassCastException e) { // %REVIEW% Wrong error message, but I've been told we're trying // not to add messages right not for I18N reasons. // %REVIEW% Should this be a Fatal Error? error(XMLMessages.createXMLMessage(XMLErrorResources.ER_NO_DTMIDS_AVAIL, null));//"No more DTM IDs are available"; } } m_size++; // ensureSize(nodeIndex); int type; if(NULL==forceNodeType) type = node.getNodeType(); else type=forceNodeType; // %REVIEW% The Namespace Spec currently says that Namespaces are // processed in a non-namespace-aware manner, by matching the // QName, even though there is in fact a namespace assigned to // these nodes in the DOM. If and when that changes, we will have // to consider whether we check the namespace-for-namespaces // rather than the node name. // // %TBD% Note that the DOM does not necessarily explicitly declare // all the namespaces it uses. DOM Level 3 will introduce a // namespace-normalization operation which reconciles that, and we // can request that users invoke it or otherwise ensure that the // tree is namespace-well-formed before passing the DOM to Xalan. // But if they don't, what should we do about it? We probably // don't want to alter the source DOM (and may not be able to do // so if it's read-only). The best available answer might be to // synthesize additional DTM Namespace Nodes that don't correspond // to DOM Attr Nodes. if (Node.ATTRIBUTE_NODE == type) { String name = node.getNodeName(); if (name.startsWith("xmlns:") || name.equals("xmlns")) { type = DTM.NAMESPACE_NODE; } } m_nodes.addElement(node); m_firstch.setElementAt(NOTPROCESSED,nodeIndex); m_nextsib.setElementAt(NOTPROCESSED,nodeIndex); m_prevsib.setElementAt(previousSibling,nodeIndex); m_parent.setElementAt(parentIndex,nodeIndex); if(DTM.NULL != parentIndex && type != DTM.ATTRIBUTE_NODE && type != DTM.NAMESPACE_NODE) { // If the DTM parent had no children, this becomes its first child. if(NOTPROCESSED == m_firstch.elementAt(parentIndex)) m_firstch.setElementAt(nodeIndex,parentIndex); } String nsURI = node.getNamespaceURI(); // Deal with the difference between Namespace spec and XSLT // definitions of local name. (The former says PIs don't have // localnames; the latter says they do.) String localName = (type == Node.PROCESSING_INSTRUCTION_NODE) ? node.getNodeName() : node.getLocalName(); // Hack to make DOM1 sort of work... if(((type == Node.ELEMENT_NODE) || (type == Node.ATTRIBUTE_NODE)) && null == localName) localName = node.getNodeName(); // -sb ExpandedNameTable exnt = m_expandedNameTable; // %TBD% Nodes created with the old non-namespace-aware DOM // calls createElement() and createAttribute() will never have a // localname. That will cause their expandedNameID to be just the // nodeType... which will keep them from being matched // successfully by name. Since the DOM makes no promise that // those will participate in namespace processing, this is // officially accepted as Not Our Fault. But it might be nice to // issue a diagnostic message! if(node.getLocalName()==null && (type==Node.ELEMENT_NODE || type==Node.ATTRIBUTE_NODE)) { // warning("DOM 'level 1' node "+node.getNodeName()+" won't be mapped properly in DOM2DTM."); } int expandedNameID = (null != localName) ? exnt.getExpandedTypeID(nsURI, localName, type) : exnt.getExpandedTypeID(type); m_exptype.setElementAt(expandedNameID,nodeIndex); indexNode(expandedNameID, nodeIndex); if (DTM.NULL != previousSibling) m_nextsib.setElementAt(nodeIndex,previousSibling); // This should be done after m_exptype has been set, and probably should // always be the last thing we do if (type == DTM.NAMESPACE_NODE) declareNamespaceInContext(parentIndex,nodeIndex); return nodeIndex; } /** * Get the number of nodes that have been added. */ public int getNumberOfNodes() { return m_nodes.size(); } /** * This method iterates to the next node that will be added to the table. * Each call to this method adds a new node to the table, unless the end * is reached, in which case it returns null. * * @return The true if a next node is found or false if * there are no more nodes. */ protected boolean nextNode() { // Non-recursive one-fetch-at-a-time depth-first traversal with // attribute/namespace nodes and white-space stripping. // Navigating the DOM is simple, navigating the DTM is simple; // keeping track of both at once is a trifle baroque but at least // we've avoided most of the special cases. if (m_nodesAreProcessed) return false; // %REVIEW% Is this local copy Really Useful from a performance // point of view? Or is this a false microoptimization? Node pos=m_pos; Node next=null; int nexttype=NULL; // Navigate DOM tree do { // Look down to first child. if (pos.hasChildNodes()) { next = pos.getFirstChild(); // %REVIEW% There's probably a more elegant way to skip // the doctype. (Just let it go and Suppress it? if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) next=next.getNextSibling(); // Push DTM context -- except for children of Entity References, // which have no DTM equivalent and cause no DTM navigation. if(ENTITY_REFERENCE_NODE!=pos.getNodeType()) { m_last_parent=m_last_kid; m_last_kid=NULL; // Whitespace-handler context stacking if(null != m_wsfilter) { short wsv = m_wsfilter.getShouldStripSpace(makeNodeHandle(m_last_parent),this); boolean shouldStrip = (DTMWSFilter.INHERIT == wsv) ? getShouldStripWhitespace() : (DTMWSFilter.STRIP == wsv); pushShouldStripWhitespace(shouldStrip); } // if(m_wsfilter) } } // If that fails, look up and right (but not past root!) else { if(m_last_kid!=NULL) { // Last node posted at this level had no more children // If it has _no_ children, we need to record that. if(m_firstch.elementAt(m_last_kid)==NOTPROCESSED) m_firstch.setElementAt(NULL,m_last_kid); } while(m_last_parent != NULL) { // %REVIEW% There's probably a more elegant way to // skip the doctype. (Just let it go and Suppress it? next = pos.getNextSibling(); if(next!=null && DOCUMENT_TYPE_NODE==next.getNodeType()) next=next.getNextSibling(); if(next!=null) break; // Found it! // No next-sibling found. Pop the DOM. pos=pos.getParentNode(); if(pos==null) { // %TBD% Should never arise, but I want to be sure of that... if(JJK_DEBUG) { System.out.println("***** DOM2DTM Pop Control Flow problem"); for(;;); // Freeze right here! } } // The only parents in the DTM are Elements. However, // the DOM could contain EntityReferences. If we // encounter one, pop it _without_ popping DTM. if(pos!=null && ENTITY_REFERENCE_NODE == pos.getNodeType()) { // Nothing needs doing if(JJK_DEBUG) System.out.println("***** DOM2DTM popping EntRef"); } else { popShouldStripWhitespace(); // Fix and pop DTM if(m_last_kid==NULL) m_firstch.setElementAt(NULL,m_last_parent); // Popping from an element else m_nextsib.setElementAt(NULL,m_last_kid); // Popping from anything else m_last_parent=m_parent.elementAt(m_last_kid=m_last_parent); } } if(m_last_parent==NULL) next=null; } if(next!=null) nexttype=next.getNodeType(); // If it's an entity ref, advance past it. // // %REVIEW% Should we let this out the door and just suppress it? // More work, but simpler code, more likely to be correct, and // it doesn't happen very often. We'd get rid of the loop too. if (ENTITY_REFERENCE_NODE == nexttype) pos=next; } while (ENTITY_REFERENCE_NODE == nexttype); // Did we run out of the tree? if(next==null) { m_nextsib.setElementAt(NULL,0); m_nodesAreProcessed = true; m_pos=null; if(JJK_DEBUG) { System.out.println("***** DOM2DTM Crosscheck:"); for(int i=0;i<m_nodes.size();++i) System.out.println(i+":\t"+m_firstch.elementAt(i)+"\t"+m_nextsib.elementAt(i)); } return false; } // Text needs some special handling: // // DTM may skip whitespace. This is handled by the suppressNode flag, which // when true will keep the DTM node from being created. // // DTM only directly records the first DOM node of any logically-contiguous // sequence. The lastTextNode value will be set to the last node in the // contiguous sequence, and -- AFTER the DTM addNode -- can be used to // advance next over this whole block. Should be simpler than special-casing // the above loop for "Was the logically-preceeding sibling a text node". // // Finally, a DTM node should be considered a CDATASection only if all the // contiguous text it covers is CDATASections. The first Text should // force DTM to Text. boolean suppressNode=false; Node lastTextNode=null; nexttype=next.getNodeType(); // nexttype=pos.getNodeType(); if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) { // If filtering, initially assume we're going to suppress the node suppressNode=((null != m_wsfilter) && getShouldStripWhitespace()); // Scan logically contiguous text (siblings, plus "flattening" // of entity reference boundaries). Node n=next; while(n!=null) { lastTextNode=n; // Any Text node means DTM considers it all Text if(TEXT_NODE == n.getNodeType()) nexttype=TEXT_NODE; // Any non-whitespace in this sequence blocks whitespace // suppression suppressNode &= XMLCharacterRecognizer.isWhiteSpace(n.getNodeValue()); n=logicalNextDOMTextNode(n); } } // Special handling for PIs: Some DOMs represent the XML // Declaration as a PI. This is officially incorrect, per the DOM // spec, but is considered a "wrong but tolerable" temporary // workaround pending proper handling of these fields in DOM Level // 3. We want to recognize and reject that case. else if(PROCESSING_INSTRUCTION_NODE==nexttype) { suppressNode = (pos.getNodeName().toLowerCase().equals("xml")); } if(!suppressNode) { // Inserting next. NOTE that we force the node type; for // coalesced Text, this records CDATASections adjacent to // ordinary Text as Text. int nextindex=addNode(next,m_last_parent,m_last_kid, nexttype); m_last_kid=nextindex; if(ELEMENT_NODE == nexttype) { int attrIndex=NULL; // start with no previous sib // Process attributes _now_, rather than waiting. // Simpler control flow, makes NS cache available immediately. NamedNodeMap attrs=next.getAttributes(); int attrsize=(attrs==null) ? 0 : attrs.getLength(); if(attrsize>0) { for(int i=0;i<attrsize;++i) { // No need to force nodetype in this case; // addNode() will take care of switching it from // Attr to Namespace if necessary. attrIndex=addNode(attrs.item(i), nextindex,attrIndex,NULL); m_firstch.setElementAt(DTM.NULL,attrIndex); // If the xml: prefix is explicitly declared // we don't need to synthesize one. // // NOTE that XML Namespaces were not originally // defined as being namespace-aware (grrr), and // while the W3C is planning to fix this it's // safer for now to test the QName and trust the // parsers to prevent anyone from redefining the // reserved xmlns: prefix if(!m_processedFirstElement && "xmlns:xml".equals(attrs.item(i).getNodeName())) m_processedFirstElement=true; } // Terminate list of attrs, and make sure they aren't // considered children of the element } // if attrs exist if(!m_processedFirstElement) { // The DOM might not have an explicit declaration for the // implicit "xml:" prefix, but the XPath data model // requires that this appear as a Namespace Node so we // have to synthesize one. You can think of this as // being a default attribute defined by the XML // Namespaces spec rather than by the DTD. attrIndex=addNode(new DOM2DTMdefaultNamespaceDeclarationNode( (Element)next,"xml",NAMESPACE_DECL_NS, makeNodeHandle(((attrIndex==NULL)?nextindex:attrIndex)+1) ), nextindex,attrIndex,NULL); m_firstch.setElementAt(DTM.NULL,attrIndex); m_processedFirstElement=true; } if(attrIndex!=NULL) m_nextsib.setElementAt(DTM.NULL,attrIndex); } //if(ELEMENT_NODE) } // (if !suppressNode) // Text postprocessing: Act on values stored above if(TEXT_NODE == nexttype || CDATA_SECTION_NODE == nexttype) { // %TBD% If nexttype was forced to TEXT, patch the DTM node next=lastTextNode; // Advance the DOM cursor over contiguous text } // Remember where we left off. m_pos=next; return true; } /** * Return an DOM node for the given node. * * @param nodeHandle The node ID. * * @return A node representation of the DTM node. */ public Node getNode(int nodeHandle) { int identity = makeNodeIdentity(nodeHandle); return (Node) m_nodes.elementAt(identity); } /** * Get a Node from an identity index. * * NEEDSDOC @param nodeIdentity * * NEEDSDOC ($objectName$) @return */ protected Node lookupNode(int nodeIdentity) { return (Node) m_nodes.elementAt(nodeIdentity); } /** * Get the next node identity value in the list, and call the iterator * if it hasn't been added yet. * * @param identity The node identity (index). * @return identity+1, or DTM.NULL. */ protected int getNextNodeIdentity(int identity) { identity += 1; if (identity >= m_nodes.size()) { if (!nextNode()) identity = DTM.NULL; } return identity; } /** * Get the handle from a Node. * <p>%OPT% This will be pretty slow.</p> * * <p>%OPT% An XPath-like search (walk up DOM to root, tracking path; * walk down DTM reconstructing path) might be considerably faster * on later nodes in large documents. That might also imply improving * this call to handle nodes which would be in this DTM but * have not yet been built, which might or might not be a Good Thing.</p> * * %REVIEW% This relies on being able to test node-identity via * object-identity. DTM2DOM proxying is a great example of a case where * that doesn't work. DOM Level 3 will provide the isSameNode() method * to fix that, but until then this is going to be flaky. * * @param node A node, which may be null. * * @return The node handle or <code>DTM.NULL</code>. */ private int getHandleFromNode(Node node) { if (null != node) { int len = m_nodes.size(); boolean isMore; int i = 0; do { for (; i < len; i++) { if (m_nodes.elementAt(i) == node) return makeNodeHandle(i); } isMore = nextNode(); len = m_nodes.size(); } while(isMore || i < len); } return DTM.NULL; } /** Get the handle from a Node. This is a more robust version of * getHandleFromNode, intended to be usable by the public. * * <p>%OPT% This will be pretty slow.</p> * * %REVIEW% This relies on being able to test node-identity via * object-identity. DTM2DOM proxying is a great example of a case where * that doesn't work. DOM Level 3 will provide the isSameNode() method * to fix that, but until then this is going to be flaky. * * @param node A node, which may be null. * * @return The node handle or <code>DTM.NULL</code>. */ public int getHandleOfNode(Node node) { if (null != node) { // Is Node actually within the same document? If not, don't search! // This would be easier if m_root was always the Document node, but // we decided to allow wrapping a DTM around a subtree. if((m_root==node) || (m_root.getNodeType()==DOCUMENT_NODE && m_root==node.getOwnerDocument()) || (m_root.getNodeType()!=DOCUMENT_NODE && m_root.getOwnerDocument()==node.getOwnerDocument()) ) { // If node _is_ in m_root's tree, find its handle // // %OPT% This check may be improved significantly when DOM // Level 3 nodeKey and relative-order tests become // available! for(Node cursor=node; cursor!=null; cursor= (cursor.getNodeType()!=ATTRIBUTE_NODE) ? cursor.getParentNode() : ((org.w3c.dom.Attr)cursor).getOwnerElement()) { if(cursor==m_root) // We know this node; find its handle. return getHandleFromNode(node); } // for ancestors of node } // if node and m_root in same Document } // if node!=null return DTM.NULL; } /** * Retrieves an attribute node by by qualified name and namespace URI. * * @param nodeHandle int Handle of the node upon which to look up this attribute.. * @param namespaceURI The namespace URI of the attribute to * retrieve, or null. * @param name The local name of the attribute to * retrieve. * @return The attribute node handle with the specified name ( * <code>nodeName</code>) or <code>DTM.NULL</code> if there is no such * attribute. */ public int getAttributeNode(int nodeHandle, String namespaceURI, String name) { // %OPT% This is probably slower than it needs to be. if (null == namespaceURI) namespaceURI = ""; int type = getNodeType(nodeHandle); if (DTM.ELEMENT_NODE == type) { // Assume that attributes immediately follow the element. int identity = makeNodeIdentity(nodeHandle); while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { // Assume this can not be null. type = _type(identity); // %REVIEW% // Should namespace nodes be retrievable DOM-style as attrs? // If not we need a separate function... which may be desirable // architecturally, but which is ugly from a code point of view. // (If we REALLY insist on it, this code should become a subroutine // of both -- retrieve the node, then test if the type matches // what you're looking for.) if (type == DTM.ATTRIBUTE_NODE || type==DTM.NAMESPACE_NODE) { Node node = lookupNode(identity); String nodeuri = node.getNamespaceURI(); if (null == nodeuri) nodeuri = ""; String nodelocalname = node.getLocalName(); if (nodeuri.equals(namespaceURI) && name.equals(nodelocalname)) return makeNodeHandle(identity); } else // if (DTM.NAMESPACE_NODE != type) { break; } } } return DTM.NULL; } /** * Get the string-value of a node as a String object * (see http://www.w3.org/TR/xpath#data-model * for the definition of a node's string-value). * * @param nodeHandle The node ID. * * @return A string object that represents the string-value of the given node. */ public XMLString getStringValue(int nodeHandle) { int type = getNodeType(nodeHandle); Node node = getNode(nodeHandle); // %TBD% If an element only has one text node, we should just use it // directly. if(DTM.ELEMENT_NODE == type || DTM.DOCUMENT_NODE == type || DTM.DOCUMENT_FRAGMENT_NODE == type) { FastStringBuffer buf = StringBufferPool.get(); String s; try { getNodeData(node, buf); s = (buf.length() > 0) ? buf.toString() : ""; } finally { StringBufferPool.free(buf); } return m_xstrf.newstr( s ); } else if(TEXT_NODE == type || CDATA_SECTION_NODE == type) { // If this is a DTM text node, it may be made of multiple DOM text // nodes -- including navigating into Entity References. DOM2DTM // records the first node in the sequence and requires that we // pick up the others when we retrieve the DTM node's value. // // %REVIEW% DOM Level 3 is expected to add a "whole text" // retrieval method which performs this function for us. FastStringBuffer buf = StringBufferPool.get(); while(node!=null) { buf.append(node.getNodeValue()); node=logicalNextDOMTextNode(node); } String s=(buf.length() > 0) ? buf.toString() : ""; StringBufferPool.free(buf); return m_xstrf.newstr( s ); } else return m_xstrf.newstr( node.getNodeValue() ); } /** * Determine if the string-value of a node is whitespace * * @param nodeHandle The node Handle. * * @return Return true if the given node is whitespace. */ public boolean isWhitespace(int nodeHandle) { int type = getNodeType(nodeHandle); Node node = getNode(nodeHandle); if(TEXT_NODE == type || CDATA_SECTION_NODE == type) { // If this is a DTM text node, it may be made of multiple DOM text // nodes -- including navigating into Entity References. DOM2DTM // records the first node in the sequence and requires that we // pick up the others when we retrieve the DTM node's value. // // %REVIEW% DOM Level 3 is expected to add a "whole text" // retrieval method which performs this function for us. FastStringBuffer buf = StringBufferPool.get(); while(node!=null) { buf.append(node.getNodeValue()); node=logicalNextDOMTextNode(node); } boolean b = buf.isWhitespace(0, buf.length()); StringBufferPool.free(buf); return b; } return false; } /** * Retrieve the text content of a DOM subtree, appending it into a * user-supplied FastStringBuffer object. Note that attributes are * not considered part of the content of an element. * <p> * There are open questions regarding whitespace stripping. * Currently we make no special effort in that regard, since the standard * DOM doesn't yet provide DTD-based information to distinguish * whitespace-in-element-context from genuine #PCDATA. Note that we * should probably also consider xml:space if/when we address this. * DOM Level 3 may solve the problem for us. * <p> * %REVIEW% Actually, since this method operates on the DOM side of the * fence rather than the DTM side, it SHOULDN'T do * any special handling. The DOM does what the DOM does; if you want * DTM-level abstractions, use DTM-level methods. * * @param node Node whose subtree is to be walked, gathering the * contents of all Text or CDATASection nodes. * @param buf FastStringBuffer into which the contents of the text * nodes are to be concatenated. */ protected static void getNodeData(Node node, FastStringBuffer buf) { switch (node.getNodeType()) { case Node.DOCUMENT_FRAGMENT_NODE : case Node.DOCUMENT_NODE : case Node.ELEMENT_NODE : { for (Node child = node.getFirstChild(); null != child; child = child.getNextSibling()) { getNodeData(child, buf); } } break; case Node.TEXT_NODE : case Node.CDATA_SECTION_NODE : case Node.ATTRIBUTE_NODE : // Never a child but might be our starting node buf.append(node.getNodeValue()); break; case Node.PROCESSING_INSTRUCTION_NODE : // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); break; default : // ignore break; } } /** * Given a node handle, return its DOM-style node name. This will * include names such as #text or #document. * * @param nodeHandle the id of the node. * @return String Name of this node, which may be an empty string. * %REVIEW% Document when empty string is possible... * %REVIEW-COMMENT% It should never be empty, should it? */ public String getNodeName(int nodeHandle) { Node node = getNode(nodeHandle); // Assume non-null. return node.getNodeName(); } /** * Given a node handle, return the XPath node name. This should be * the name as described by the XPath data model, NOT the DOM-style * name. * * @param nodeHandle the id of the node. * @return String Name of this node, which may be an empty string. */ public String getNodeNameX(int nodeHandle) { String name; short type = getNodeType(nodeHandle); switch (type) { case DTM.NAMESPACE_NODE : { Node node = getNode(nodeHandle); // assume not null. name = node.getNodeName(); if(name.startsWith("xmlns:")) { name = QName.getLocalPart(name); } else if(name.equals("xmlns")) { name = ""; } } break; case DTM.ATTRIBUTE_NODE : case DTM.ELEMENT_NODE : case DTM.ENTITY_REFERENCE_NODE : case DTM.PROCESSING_INSTRUCTION_NODE : { Node node = getNode(nodeHandle); // assume not null. name = node.getNodeName(); } break; default : name = ""; } return name; } /** * Given a node handle, return its XPath-style localname. * (As defined in Namespaces, this is the portion of the name after any * colon character). * * @param nodeHandle the id of the node. * @return String Local name of this node. */ public String getLocalName(int nodeHandle) { if(JJK_NEWCODE) { int id=makeNodeIdentity(nodeHandle); if(NULL==id) return null; Node newnode=(Node)m_nodes.elementAt(id); String newname=newnode.getLocalName(); if (null == newname) { // XSLT treats PIs, and possibly other things, as having QNames. String qname = newnode.getNodeName(); if('#'==qname.charAt(0)) { // Match old default for this function // This conversion may or may not be necessary newname=""; } else { int index = qname.indexOf(':'); newname = (index < 0) ? qname : qname.substring(index + 1); } } return newname; } else { String name; short type = getNodeType(nodeHandle); switch (type) { case DTM.ATTRIBUTE_NODE : case DTM.ELEMENT_NODE : case DTM.ENTITY_REFERENCE_NODE : case DTM.NAMESPACE_NODE : case DTM.PROCESSING_INSTRUCTION_NODE : { Node node = getNode(nodeHandle); // assume not null. name = node.getLocalName(); if (null == name) { String qname = node.getNodeName(); int index = qname.indexOf(':'); name = (index < 0) ? qname : qname.substring(index + 1); } } break; default : name = ""; } return name; } } /** * Given a namespace handle, return the prefix that the namespace decl is * mapping. * Given a node handle, return the prefix used to map to the namespace. * * <p> %REVIEW% Are you sure you want "" for no prefix? </p> * <p> %REVIEW-COMMENT% I think so... not totally sure. -sb </p> * * @param nodeHandle the id of the node. * @return String prefix of this node's name, or "" if no explicit * namespace prefix was given. */ public String getPrefix(int nodeHandle) { String prefix; short type = getNodeType(nodeHandle); switch (type) { case DTM.NAMESPACE_NODE : { Node node = getNode(nodeHandle); // assume not null. String qname = node.getNodeName(); int index = qname.indexOf(':'); prefix = (index < 0) ? "" : qname.substring(index + 1); } break; case DTM.ATTRIBUTE_NODE : case DTM.ELEMENT_NODE : { Node node = getNode(nodeHandle); // assume not null. String qname = node.getNodeName(); int index = qname.indexOf(':'); prefix = (index < 0) ? "" : qname.substring(0, index); } break; default : prefix = ""; } return prefix; } /** * Given a node handle, return its DOM-style namespace URI * (As defined in Namespaces, this is the declared URI which this node's * prefix -- or default in lieu thereof -- was mapped to.) * * <p>%REVIEW% Null or ""? -sb</p> * * @param nodeHandle the id of the node. * @return String URI value of this node's namespace, or null if no * namespace was resolved. */ public String getNamespaceURI(int nodeHandle) { if(JJK_NEWCODE) { int id=makeNodeIdentity(nodeHandle); if(id==NULL) return null; Node node=(Node)m_nodes.elementAt(id); return node.getNamespaceURI(); } else { String nsuri; short type = getNodeType(nodeHandle); switch (type) { case DTM.ATTRIBUTE_NODE : case DTM.ELEMENT_NODE : case DTM.ENTITY_REFERENCE_NODE : case DTM.NAMESPACE_NODE : case DTM.PROCESSING_INSTRUCTION_NODE : { Node node = getNode(nodeHandle); // assume not null. nsuri = node.getNamespaceURI(); // %TBD% Handle DOM1? } break; default : nsuri = null; } return nsuri; } } /** Utility function: Given a DOM Text node, determine whether it is * logically followed by another Text or CDATASection node. This may * involve traversing into Entity References. * * %REVIEW% DOM Level 3 is expected to add functionality which may * allow us to retire this. */ private Node logicalNextDOMTextNode(Node n) { Node p=n.getNextSibling(); if(p==null) { // Walk out of any EntityReferenceNodes that ended with text for(n=n.getParentNode(); n!=null && ENTITY_REFERENCE_NODE == n.getNodeType(); n=n.getParentNode()) { p=n.getNextSibling(); if(p!=null) break; } } n=p; while(n!=null && ENTITY_REFERENCE_NODE == n.getNodeType()) { // Walk into any EntityReferenceNodes that start with text if(n.hasChildNodes()) n=n.getFirstChild(); else n=n.getNextSibling(); } if(n!=null) { // Found a logical next sibling. Is it text? int ntype=n.getNodeType(); if(TEXT_NODE != ntype && CDATA_SECTION_NODE != ntype) n=null; } return n; } /** * Given a node handle, return its node value. This is mostly * as defined by the DOM, but may ignore some conveniences. * <p> * * @param nodeHandle The node id. * @return String Value of this node, or null if not * meaningful for this node type. */ public String getNodeValue(int nodeHandle) { // The _type(nodeHandle) call was taking the lion's share of our // time, and was wrong anyway since it wasn't coverting handle to // identity. Inlined it. int type = _exptype(makeNodeIdentity(nodeHandle)); type=(NULL != type) ? getNodeType(nodeHandle) : NULL; if(TEXT_NODE!=type && CDATA_SECTION_NODE!=type) return getNode(nodeHandle).getNodeValue(); // If this is a DTM text node, it may be made of multiple DOM text // nodes -- including navigating into Entity References. DOM2DTM // records the first node in the sequence and requires that we // pick up the others when we retrieve the DTM node's value. // // %REVIEW% DOM Level 3 is expected to add a "whole text" // retrieval method which performs this function for us. Node node = getNode(nodeHandle); Node n=logicalNextDOMTextNode(node); if(n==null) return node.getNodeValue(); FastStringBuffer buf = StringBufferPool.get(); buf.append(node.getNodeValue()); while(n!=null) { buf.append(n.getNodeValue()); n=logicalNextDOMTextNode(n); } String s = (buf.length() > 0) ? buf.toString() : ""; StringBufferPool.free(buf); return s; } /** * A document type declaration information item has the following properties: * * 1. [system identifier] The system identifier of the external subset, if * it exists. Otherwise this property has no value. * * @return the system identifier String object, or null if there is none. */ public String getDocumentTypeDeclarationSystemIdentifier() { Document doc; if (m_root.getNodeType() == Node.DOCUMENT_NODE) doc = (Document) m_root; else doc = m_root.getOwnerDocument(); if (null != doc) { DocumentType dtd = doc.getDoctype(); if (null != dtd) { return dtd.getSystemId(); } } return null; } /** * Return the public identifier of the external subset, * normalized as described in 4.2.2 External Entities [XML]. If there is * no external subset or if it has no public identifier, this property * has no value. * * @return the public identifier String object, or null if there is none. */ public String getDocumentTypeDeclarationPublicIdentifier() { Document doc; if (m_root.getNodeType() == Node.DOCUMENT_NODE) doc = (Document) m_root; else doc = m_root.getOwnerDocument(); if (null != doc) { DocumentType dtd = doc.getDoctype(); if (null != dtd) { return dtd.getPublicId(); } } return null; } /** * Returns the <code>Element</code> whose <code>ID</code> is given by * <code>elementId</code>. If no such element exists, returns * <code>DTM.NULL</code>. Behavior is not defined if more than one element * has this <code>ID</code>. Attributes (including those * with the name "ID") are not of type ID unless so defined by DTD/Schema * information available to the DTM implementation. * Implementations that do not know whether attributes are of type ID or * not are expected to return <code>DTM.NULL</code>. * * <p>%REVIEW% Presumably IDs are still scoped to a single document, * and this operation searches only within a single document, right? * Wouldn't want collisions between DTMs in the same process.</p> * * @param elementId The unique <code>id</code> value for an element. * @return The handle of the matching element. */ public int getElementById(String elementId) { Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) ? (Document) m_root : m_root.getOwnerDocument(); if(null != doc) { Node elem = doc.getElementById(elementId); if(null != elem) { int elemHandle = getHandleFromNode(elem); if(DTM.NULL == elemHandle) { int identity = m_nodes.size()-1; while (DTM.NULL != (identity = getNextNodeIdentity(identity))) { Node node = getNode(identity); if(node == elem) { elemHandle = getHandleFromNode(elem); break; } } } return elemHandle; } } return DTM.NULL; } /** * The getUnparsedEntityURI function returns the URI of the unparsed * entity with the specified name in the same document as the context * node (see [3.3 Unparsed Entities]). It returns the empty string if * there is no such entity. * <p> * XML processors may choose to use the System Identifier (if one * is provided) to resolve the entity, rather than the URI in the * Public Identifier. The details are dependent on the processor, and * we would have to support some form of plug-in resolver to handle * this properly. Currently, we simply return the System Identifier if * present, and hope that it a usable URI or that our caller can * map it to one. * TODO: Resolve Public Identifiers... or consider changing function name. * <p> * If we find a relative URI * reference, XML expects it to be resolved in terms of the base URI * of the document. The DOM doesn't do that for us, and it isn't * entirely clear whether that should be done here; currently that's * pushed up to a higher level of our application. (Note that DOM Level * 1 didn't store the document's base URI.) * TODO: Consider resolving Relative URIs. * <p> * (The DOM's statement that "An XML processor may choose to * completely expand entities before the structure model is passed * to the DOM" refers only to parsed entities, not unparsed, and hence * doesn't affect this function.) * * @param name A string containing the Entity Name of the unparsed * entity. * * @return String containing the URI of the Unparsed Entity, or an * empty string if no such entity exists. */ public String getUnparsedEntityURI(String name) { String url = ""; Document doc = (m_root.getNodeType() == Node.DOCUMENT_NODE) ? (Document) m_root : m_root.getOwnerDocument(); if (null != doc) { DocumentType doctype = doc.getDoctype(); if (null != doctype) { NamedNodeMap entities = doctype.getEntities(); if(null == entities) return url; Entity entity = (Entity) entities.getNamedItem(name); if(null == entity) return url; String notationName = entity.getNotationName(); if (null != notationName) // then it's unparsed { // The draft says: "The XSLT processor may use the public // identifier to generate a URI for the entity instead of the URI // specified in the system identifier. If the XSLT processor does // not use the public identifier to generate the URI, it must use // the system identifier; if the system identifier is a relative // URI, it must be resolved into an absolute URI using the URI of // the resource containing the entity declaration as the base // URI [RFC2396]." // So I'm falling a bit short here. url = entity.getSystemId(); if (null == url) { url = entity.getPublicId(); } else { // This should be resolved to an absolute URL, but that's hard // to do from here. } } } } return url; } /** * 5. [specified] A flag indicating whether this attribute was actually * specified in the start-tag of its element, or was defaulted from the * DTD. * * @param attributeHandle the attribute handle * @return <code>true</code> if the attribute was specified; * <code>false</code> if it was defaulted. */ public boolean isAttributeSpecified(int attributeHandle) { int type = getNodeType(attributeHandle); if (DTM.ATTRIBUTE_NODE == type) { Attr attr = (Attr)getNode(attributeHandle); return attr.getSpecified(); } return false; } /** Bind an IncrementalSAXSource to this DTM. NOT RELEVANT for DOM2DTM, since * we're wrapped around an existing DOM. * * @param source The IncrementalSAXSource that we want to recieve events from * on demand. */ public void setIncrementalSAXSource(IncrementalSAXSource source) { } /** getContentHandler returns "our SAX builder" -- the thing that * someone else should send SAX events to in order to extend this * DTM model. * * @return null if this model doesn't respond to SAX events, * "this" if the DTM object has a built-in SAX ContentHandler, * the IncrmentalSAXSource if we're bound to one and should receive * the SAX stream via it for incremental build purposes... * */ public org.xml.sax.ContentHandler getContentHandler() { return null; } /** * Return this DTM's lexical handler. * * %REVIEW% Should this return null if constrution already done/begun? * * @return null if this model doesn't respond to lexical SAX events, * "this" if the DTM object has a built-in SAX ContentHandler, * the IncrementalSAXSource if we're bound to one and should receive * the SAX stream via it for incremental build purposes... */ public org.xml.sax.ext.LexicalHandler getLexicalHandler() { return null; } /** * Return this DTM's EntityResolver. * * @return null if this model doesn't respond to SAX entity ref events. */ public org.xml.sax.EntityResolver getEntityResolver() { return null; } /** * Return this DTM's DTDHandler. * * @return null if this model doesn't respond to SAX dtd events. */ public org.xml.sax.DTDHandler getDTDHandler() { return null; } /** * Return this DTM's ErrorHandler. * * @return null if this model doesn't respond to SAX error events. */ public org.xml.sax.ErrorHandler getErrorHandler() { return null; } /** * Return this DTM's DeclHandler. * * @return null if this model doesn't respond to SAX Decl events. */ public org.xml.sax.ext.DeclHandler getDeclHandler() { return null; } /** @return true iff we're building this model incrementally (eg * we're partnered with a IncrementalSAXSource) and thus require that the * transformation and the parse run simultaneously. Guidance to the * DTMManager. * */ public boolean needsTwoThreads() { return false; } // ========== Direct SAX Dispatch, for optimization purposes ======== /** * Returns whether the specified <var>ch</var> conforms to the XML 1.0 definition * of whitespace. Refer to <A href="http://www.w3.org/TR/1998/REC-xml-19980210#NT-S"> * the definition of <CODE>S</CODE></A> for details. * @param ch Character to check as XML whitespace. * @return =true if <var>ch</var> is XML whitespace; otherwise =false. */ private static boolean isSpace(char ch) { return XMLCharacterRecognizer.isWhiteSpace(ch); // Take the easy way out for now. } /** * Directly call the * characters method on the passed ContentHandler for the * string-value of the given node (see http://www.w3.org/TR/xpath#data-model * for the definition of a node's string-value). Multiple calls to the * ContentHandler's characters methods may well occur for a single call to * this method. * * @param nodeHandle The node ID. * @param ch A non-null reference to a ContentHandler. * * @throws org.xml.sax.SAXException */ public void dispatchCharactersEvents( int nodeHandle, org.xml.sax.ContentHandler ch, boolean normalize) throws org.xml.sax.SAXException { if(normalize) { XMLString str = getStringValue(nodeHandle); str = str.fixWhiteSpace(true, true, false); str.dispatchCharactersEvents(ch); } else { int type = getNodeType(nodeHandle); Node node = getNode(nodeHandle); dispatchNodeData(node, ch, 0); // Text coalition -- a DTM text node may represent multiple // DOM nodes. if(TEXT_NODE == type || CDATA_SECTION_NODE == type) { while( null != (node=logicalNextDOMTextNode(node)) ) { dispatchNodeData(node, ch, 0); } } } } /** * Retrieve the text content of a DOM subtree, appending it into a * user-supplied FastStringBuffer object. Note that attributes are * not considered part of the content of an element. * <p> * There are open questions regarding whitespace stripping. * Currently we make no special effort in that regard, since the standard * DOM doesn't yet provide DTD-based information to distinguish * whitespace-in-element-context from genuine #PCDATA. Note that we * should probably also consider xml:space if/when we address this. * DOM Level 3 may solve the problem for us. * <p> * %REVIEW% Note that as a DOM-level operation, it can be argued that this * routine _shouldn't_ perform any processing beyond what the DOM already * does, and that whitespace stripping and so on belong at the DTM level. * If you want a stripped DOM view, wrap DTM2DOM around DOM2DTM. * * @param node Node whose subtree is to be walked, gathering the * contents of all Text or CDATASection nodes. */ protected static void dispatchNodeData(Node node, org.xml.sax.ContentHandler ch, int depth) throws org.xml.sax.SAXException { switch (node.getNodeType()) { case Node.DOCUMENT_FRAGMENT_NODE : case Node.DOCUMENT_NODE : case Node.ELEMENT_NODE : { for (Node child = node.getFirstChild(); null != child; child = child.getNextSibling()) { dispatchNodeData(child, ch, depth+1); } } break; case Node.PROCESSING_INSTRUCTION_NODE : // %REVIEW% case Node.COMMENT_NODE : if(0 != depth) break; // NOTE: Because this operation works in the DOM space, it does _not_ attempt // to perform Text Coalition. That should only be done in DTM space. case Node.TEXT_NODE : case Node.CDATA_SECTION_NODE : case Node.ATTRIBUTE_NODE : String str = node.getNodeValue(); if(ch instanceof CharacterNodeHandler) { ((CharacterNodeHandler)ch).characters(node); } else { ch.characters(str.toCharArray(), 0, str.length()); } break; // /* case Node.PROCESSING_INSTRUCTION_NODE : // // warning(XPATHErrorResources.WG_PARSING_AND_PREPARING); // break; */ default : // ignore break; } } TreeWalker m_walker = new TreeWalker(null); /** * Directly create SAX parser events from a subtree. * * @param nodeHandle The node ID. * @param ch A non-null reference to a ContentHandler. * * @throws org.xml.sax.SAXException */ public void dispatchToEvents(int nodeHandle, org.xml.sax.ContentHandler ch) throws org.xml.sax.SAXException { TreeWalker treeWalker = m_walker; ContentHandler prevCH = treeWalker.getContentHandler(); if(null != prevCH) { treeWalker = new TreeWalker(null); } treeWalker.setContentHandler(ch); try { Node node = getNode(nodeHandle); treeWalker.traverse(node); } finally { treeWalker.setContentHandler(null); } } public interface CharacterNodeHandler { public void characters(Node node) throws org.xml.sax.SAXException; } /** * For the moment all the run time properties are ignored by this * class. * * @param property a <code>String</code> value * @param value an <code>Object</code> value */ public void setProperty(String property, Object value) { } /** * No source information is available for DOM2DTM, so return * <code>null</code> here. * * @param node an <code>int</code> value * @return null */ public SourceLocator getSourceLocatorFor(int node) { return null; } }