XPathAPI.java example

Explorer
mappingtools-master
package com.openMap1.mapper.util;

import org.eclipse.emf.ecore.EObject;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import com.openMap1.mapper.core.MapperException;
import com.openMap1.mapper.core.NamespaceSet;
import com.openMap1.mapper.core.Xpth;
import com.openMap1.mapper.core.namespace;
import com.openMap1.mapper.impl.ElementDefImpl;
import com.openMap1.mapper.mapping.linkCondition;
import com.openMap1.mapper.AssocEndMapping;
import com.openMap1.mapper.ConditionTest;
import com.openMap1.mapper.CrossCondition;
import com.openMap1.mapper.Mapping;

import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathConstants;

import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;

/**
 * class used by the XOReader to follow XPaths
 * in the input instance, with various performance tweaks,
 * 
 * @author robert
 *
 */

public class XPathAPI
{
	  
	  /** 
	   * version of selectNodeVector with a time for performance tuning 
	   * */
	  public static Vector<Node> selectNodeVector(Timer timer, Node contextNode, String str, NamespaceSet context)
	  throws XPathExpressionException,MapperException
	  {
		  timer.start(Timer.XPATH);
		  Vector<Node> nodes = selectNodeVector(contextNode, str, context);
		  timer.stop(Timer.XPATH);
		  return nodes;
	  }

 /**
   * Use an XPath string to select a Vector of nodes.
   * XPath namespace prefixes are resolved from the namespaceNode.
   * 
   * If  the namespace set contains a namespace with prefix "", then
   * that prefix is changed to an non-empty one in the namespace set
   * and in the XPath.
   * This conversion assumes that all element names have form = qualified.
   *
   * @param contextNode The node to start searching from.
   * @param str A valid XPath string.
   * @param namespaceNode The node from which prefixes in the XPath will be resolved to namespaces; note this argument is not used
   * @return A Vector nodes,  never null.
   */
  public static Vector<Node> selectNodeVector(Node contextNode, String pathString, NamespaceSet context)
    throws XPathExpressionException,MapperException
  {
	  
	  /* if the path ends in the special virtual attribute introduced to track the ordinal
	   * position of an Element beneath its parent, because the attribute does not exist
	   * use the shorter path to the element, and return a Nodelist of the elements. */
	  String path = pathString;
	  if (pathString.endsWith(ElementDefImpl.ELEMENT_POSITION_ATTRIBUTE))
	  {
		  // strip off the virtual attribute name and '/@'
		  int len = pathString.length() - (ElementDefImpl.ELEMENT_POSITION_ATTRIBUTE).length() - 2;
		  if (len < 0) path = "."; // common case when pathString is just '@..'; and len = -1
		  else path = pathString.substring(0, len);
	  }
	  
    // Execute the XPath, and have it return the result, in two distinct cases:
    boolean hasDefaultNamespace = ((context.getByPrefix("") != null) && 
    		(!context.getByPrefix("").URI().equals("no target namespace")));

    // (1) if there is no default namespace in the set
    if (!hasDefaultNamespace)
    {
    	return fastNodeVector(contextNode,path,context);
    }
    
    /* (2) if there is a default namespace, change it to have a prefix, 
     * because XPath does not work without them */
    else if (hasDefaultNamespace)
    {
    	String prefix = context.nonClashPrefix();
    	NamespaceSet newContext = context.withPrefixForDefaultNamespace(prefix);
    	String newPath = new Xpth(context,path).convertPrefixes(newContext).stringForm();
    	return fastNodeVector(contextNode,newPath,newContext);
    }
    return new Vector<Node>();
  }
  
  /**
   * retrieve the nodes at the end of an XPath , treating certain cases without
   * using XPath, as a speedup
   * @param contextNode
   * @param str
   * @param context
   * @return
   * @throws MapperException
   * FIXME - I don't think this implementation is careful enough about namespaces 
   */
  private static Vector<Node> fastNodeVector(Node contextNode, String str, NamespaceSet context)
  throws MapperException
  {
	  Vector<Node> nodes = new Vector<Node>();
	  // don't need to follow the trivial 'stay still' XPath
	  if ((str.equals("."))|(str.equals("self::node()"))) nodes.add(contextNode);

	  else
	  {
		  boolean ascending = ((str.startsWith("parent"))||
				  				(str.startsWith("ancestor"))||
				  				(str.startsWith("..")));

		  StringTokenizer s1 = new StringTokenizer(str,"/");
		  int steps = s1.countTokens();
		  if (str.startsWith("/")) steps = 0; // don't try fast evaluation of absolute paths

		  StringTokenizer s3 = new StringTokenizer(str,"@");
		  boolean hasAttribute = ((s3.countTokens() == 2)|(str.startsWith("@")));

		  boolean elementStart = (contextNode instanceof Element);

		  // For one-step descending XPaths to an Element, with no namespace prefix, fast evaluation 
		  if ((steps == 1) && (!hasAttribute) && (!ascending) && (elementStart))
		  {
			  Vector<Element> els = XMLUtil.namedChildElements((Element)contextNode, noPrefix(str));
			  for (int i = 0; i < els.size(); i++) nodes.add(els.get(i));				  
		  }

		  
		  // For two-step descending XPaths to an Element, with no namespace prefix, fast evaluation 
		  else if ((steps == 2) && (!hasAttribute) && (!ascending) && (elementStart))
		  {
			  String step1 = noPrefix(s1.nextToken());
			  String step2 = noPrefix(s1.nextToken());
			  Vector<Element> children = XMLUtil.namedChildElements((Element)contextNode, step1);
			  for (int k = 0; k < children.size();k++)
			  {
				  Element child = children.get(k);
				  Vector<Element> els = XMLUtil.namedChildElements(child, step2);
				  for (int i = 0; i < els.size(); i++) nodes.add(els.get(i));				  
			  }
		  }
		  
		  
		  /* single step to an attribute; this has been bypassed because it goes wrong when the attribute
		   * is in a namespace
		  else if ((steps == 1) && (hasAttribute) && (!ascending) && (elementStart))
		  {
			  Attr att = ((Element)contextNode).getAttributeNode(str.substring(1));
			  nodes.add(att);
		  }
		  */

		  // All other cases; follow the XPath
		  else try
		  {
			    XPath xp = XPathFactory.newInstance().newXPath();
		        xp.setNamespaceContext(context);
		        NodeList nl = (NodeList)xp.evaluate(str, contextNode, XPathConstants.NODESET);    	
			    for (int i = 0; i < nl.getLength();i++) nodes.add(nl.item(i));		  			  				  
		  }
		 catch (Exception ex) {throw new MapperException(ex.getMessage());}
	  }
	  
	  // diagnostics for mysterious failures
	  boolean tracing = false;
	  if (tracing)
	  {
		  if (nodes.size() > 0) message("FastNodeVector " + str + "; " + nodes.size());
		  else if ((nodes.size() == 0) && (contextNode instanceof Element))
		  {
			  String cands = "";
			  Vector<Element> els = XMLUtil.childElements((Element)contextNode);
			  for (int i = 0; i < els.size(); i++) cands = cands + els.get(i).getLocalName() + "; " ;
			  message("*** Child elements of " + ((Element)contextNode).getLocalName() + ": " + cands
					  + " when looking for " + noPrefix(str));
		  }
	  }
		  
	  return nodes;
  }
  
  private static String noPrefix(String s)
  {
	  StringTokenizer st = new StringTokenizer(s,":");
	  String result = st.nextToken();
	  if (st.hasMoreTokens()) result = st.nextToken();
	  return result;
  }
  


  
  //----------------------------------------------------------------------------------------------
  //      delivering the node set in the presence of indexed link conditions
  //----------------------------------------------------------------------------------------------
  
  /**
   * follow a cross path from a node, where there may be link conditions which have been indexed
   * for efficient retrieval of a small set of nodes.
   * If there are indexes, use one of them to find a small node set, 
   * and then check that each node in the node set can be reached by the XPath.
   * 
   * The link conditions will all need to be tested again, because 
   * (a) only one condition will be used for the indexed retrieval, and
   * (b) sometimes (when there are further conditions on the link condition paths) the indexing
   * lets in some nodes which do not satisfy the link condition used for indexing
   * 
   * @param contextNode the start node of the path
   * @param str string form of the XPath
   * @param context Namespace set - used by XPath to handle e.g. changed namespace prefixes
   * @param parent the parent object in the mapping set;
   * either the mapping for which this is a cross path; and which may have 
   * one or more cross conditions (= link conditions);
   * or  the condition which this is a nested condition on
   * @param pathIsToLHS true if the XPath leads to the node on the LHS of any 
   * cross conditions (for property mappings, this is the property node; for association
   * mappings, this is the association node)
   * @param nodeIndex The set of all nodes in the document, indexed by (1) the XPath
   * from the node to the string value in a link condition (outer key) and the string value 
   * (inner key)
   * @return a set of nodes - should be small - which are reachable by the XPath and are highly likely
   * to satisfy the selected link condition; and include all those which do
   * @throws XPathExpressionException
   */
  public static Vector<Node> indexedSelectNodeVector(Timer timer, Node contextNode, String str, NamespaceSet context,
		  EObject parent, boolean pathIsToLHS, 
		  Hashtable<String,Hashtable<String,Hashtable<String,Vector<Node>>>> nodeIndex)
  throws XPathExpressionException,MapperException
  {
	  timer.start(Timer.INDEXED_TOTAL);
	  
	  /* find out which path from the condition end to a node containing the string value 
	   * (if any) is best to use for the indexed retrieval,
	   * giving the smallest node set; and find its node set*/
	  Vector<Node> nodesForBestIndex = nodesForBestIndex(contextNode, str, context,
			   parent, pathIsToLHS, 
			   nodeIndex);

	  Vector<Node> res = new Vector<Node>();

	  //no useful index was found; just follow the XPath to get all nodes
	  if (nodesForBestIndex == null)
	  {
		  timer.start(Timer.UNINDEXED_XPATH);
		  res = selectNodeVector(contextNode, str, context);
		  timer.stop(Timer.UNINDEXED_XPATH);
	  }
	  
	  //check all nodes in the smallest node set, to see if they can be reached by the XPath
	  else 
	  {
		  timer.start(Timer.INDEXED_XPATH);
		  for (Iterator<Node> it = nodesForBestIndex.iterator();it.hasNext();)
		  { 
			  Node candidate = it.next();
			  if (canReachByPath(contextNode,candidate,str,context)) res.add(candidate);
		  }
		  timer.stop(Timer.INDEXED_XPATH);		  
	  }
	  timer.stop(Timer.INDEXED_TOTAL);
	  return res;	  
  }
  
  /**
   * 
   * @param contextNode the start node of the XPath
   * @param str  string form of the XPath
   * @param context the current set of namespaceset
   * @param parent the parent object in the mapping set;
   * either the mapping for which this is a cross path; and which may have 
   * one or more cross conditions (= link conditions);
   * or  the condition which this is a nested condition on
   * @param pathIsToLHS true if the XPath leads to the node on the LHS of any 
   * cross conditions (for property mappings, this is the property node; for association
   * mappings, this is the association node)
   * @param nodeIndex The set of all nodes in the document, indexed by (1) the XPath
   * from the node to the string value in a link condition (outer key) and the string value 
   * (inner key)
   * @return
   * @throws XPathExpressionException
   * @throws MapperException
   */
  private static Vector<Node> nodesForBestIndex(Node contextNode, String str, NamespaceSet context,
		  EObject parent, boolean pathIsToLHS, 
		  Hashtable<String,Hashtable<String,Hashtable<String,Vector<Node>>>> nodeIndex)
  throws XPathExpressionException,MapperException
  {
	  Mapping mapping = null;
	  if (parent instanceof Mapping) mapping = (Mapping)parent;
	  else throw new MapperException("Cannot yet do indexed node retrievals for nested link conditions");

	  // end (0 for property mappings, 1 or 2 for association ends) is needed to construct linkConditions
	  int end = 0;
	  if (parent instanceof AssocEndMapping) end = ((AssocEndMapping)parent).getEnd();

	  /* find out which path from the condition end to a node containing the string value 
	   * (if any) is best to use for the indexed retrieval,
	   * giving the smallest node set; and find its node set*/
	  int smallestNodeSetFound = -1; 
	  Vector<Node> nodesForBestIndex = new Vector<Node>();
	  for (Iterator<CrossCondition> it = mapping.getCrossConditions().iterator(); it.hasNext();)
	  {
		  CrossCondition crossCondition = it.next();
		  linkCondition linkCond = new linkCondition(crossCondition,end);
		  
		  // find which string value, if any, can be passed to the index
		  String valueSought = null;
		  String indexedRelPath = null;
		  String indexedRootPath = null;

		  /* only consider equality link conditions in which no function 
		   * is applied to the string value of the target node  */
		  if ((pathIsToLHS)&&(crossCondition.getLeftFunction().equals(""))
				  && (crossCondition.getTest().equals(ConditionTest.EQUALS)))
		  {
			  indexedRootPath = linkCond.rootToLHSNode().stringForm();
			  indexedRelPath = linkCond.lhsEndToLeftValue().stringForm();
			  valueSought = linkCond.rightValue(contextNode, context);	
		  }
		  else if ((!pathIsToLHS)&&(crossCondition.getRightFunction().equals(""))
				  && (crossCondition.getTest().equals(ConditionTest.EQUALS)))
		  {
			  indexedRootPath = linkCond.rootToRHSEnd().stringForm();
			  indexedRelPath = linkCond.rhsEndToRightValue().stringForm();
			  valueSought = linkCond.leftValue(contextNode, context);			  
		  }
		  
		  // find how many nodes satisfy the index condition, to choose the smallest set
		  if (valueSought != null)
		  {
			  Hashtable<String,Hashtable<String,Vector<Node>>> indexForAbsPath = nodeIndex.get(indexedRootPath);
			  if (indexForAbsPath != null)
			  {
				  Hashtable<String,Vector<Node>> indexForRelPath = indexForAbsPath.get(indexedRelPath);
				  if (indexForRelPath != null)
				  {
					  Vector<Node> nodesForValue = indexForRelPath.get(valueSought);
					  // if any index eliminates all candidate nodes, return an empty node set
					  if (nodesForValue == null) 
					  {
						  return new Vector<Node>();
					  }

					  int size = nodesForValue.size();
					  // best candidate index so far...
					  if ((smallestNodeSetFound == -1)|(size < smallestNodeSetFound))
					  {
						  smallestNodeSetFound = size;
						  nodesForBestIndex = nodesForValue;
					  }				  
				  }				  
			  }
		  }		  
	  } // end of loop over link conditions on the path

	  if (smallestNodeSetFound == -1) nodesForBestIndex = null;
	  return nodesForBestIndex;
  }
  
  
  /**
   * 
   * @param mapping a property mapping or association end mapping
   * @param pathIsToLHS true if the path leads to the LHS of cross-conditions
   * (= the property node or association node, not the object node)
   * i.e only false for the second leg of following an association mapping
   * @return true if any cross-conditions in the mapping allow the XOReader to use 
   * an index, rather than follow the XPath
   * @throws MapperException
   */
  public static boolean isIndexed(Mapping mapping, boolean pathIsToLHS)
  throws MapperException
  {
	  boolean indexed = false;

	  for (Iterator<CrossCondition> it = mapping.getCrossConditions().iterator(); it.hasNext();)
	  {
		  CrossCondition crossCondition = it.next();

		  /* only consider equality link conditions in which no function 
		   * is applied to the string value of the target node  */
		  if ((pathIsToLHS)&&(crossCondition.getLeftFunction().equals(""))
				  && (crossCondition.getTest().equals(ConditionTest.EQUALS)))
		  {
			  indexed = true;
		  }
		  else if ((!pathIsToLHS)&&(crossCondition.getRightFunction().equals(""))
				  && (crossCondition.getTest().equals(ConditionTest.EQUALS)))
		  {
			  indexed = true;
		  }
		  
	  }
	  return indexed;
  }


  /**
   * 
   * @param start the start node of an XPath
   * @param end the end node of an XPath
   * @param path string form of an XPath
   * @param context the namespace Set
   * @return true if the end node can be reached by the path from the 
   * start node. There are restrictions on the form of the path
   * (which could be removed given time):
   * (1) It must consist of a set of ascending steps (possibly empty)
   * followed by a set of descending steps
   * (2) If the ascending steps contain an 'ancestor' step, it must be the last of them
   * and must define a node name (i.e not be 'ancestor::node()')
   * (3) If the descending steps contain as 'descendant' step, it must be the first of them
   * The test is made in three stages:
   * (a) the initial ascending steps fit
   * (b) both nodes are under the path apex
   * (c) the descending steps work
   */
  public static boolean canReachByPath(Node start, Node end, String pathString, NamespaceSet context)
  throws MapperException
  {
	  Xpth path = new Xpth(context,pathString);
	  int firstDescendingStep = 0;
	  Node current = start;
	  String apexName = current.getNodeName();
	  boolean ascending = true;
	  
	  /* (a) match ascending steps. They should all be 'parent' except the last, 
	   * which may be 'ancestor' (-or-self) */
	  for (int s = 0; s < path.size(); s++)
	  {
		  String axis = path.step(s).axis();
		  String test = path.step(s).nodeTest();
		  if (axis.equals("parent"))
		  {
			  current = parentElement(current);
			  if (current == null) return false; // ran out at the top of the document
			  if ((test.equals("node()"))|(namespaceEqualName(current,test,context))) 
			  	{apexName = current.getNodeName();}
			  else return false; // could not match a parent step
		  }
		  else if (Xpth.ascending(axis))
		  {
			  // fail to match an ancestor step
			  if (!hasAncestorOrSelf(current,test,context)) return false;
			  apexName = test;
		  }
		  else if (Xpth.descending(axis))
		  {
			  if (ascending) firstDescendingStep = s;
			  ascending = false;
		  }
	  }
	  
	  // (b) check the end node is somewhere under the apex node
	  if (!hasAncestorOrSelf(end,apexName,context)) return false;
	  
	  /* (c) check descending steps in reverse order. 
	   * They should all be 'child' except the last of the reverse order */
	  current = end;
	  for (int s = path.size()-1; s > firstDescendingStep-1; s--)
	  {
		  String axis = path.step(s).axis();
		  String test = path.step(s).nodeTest();		  
		  if (Xpth.descending(axis))
		  {
			  // this test works for Elements and attributes
			  if ((test.equals("node()"))|(namespaceEqualName(current,test,context))) 
				  {current = parentElement(current);} // pass; prepare for next step up
			  else return false; // could not match a child or descendant step
		  }
		  else return false; // some other axis, such as 'sibling'
	  }
	  
	  // survived all tests
	  return true; 
  }
  
  /**
   * 
   * @param node
   * @return the parent element of the Node; or null if its parent node
   * is not an Element
   */
  private static Element parentElement(Node node)
  {
	  Node par = null;
	  Element parent = null;
	  if (node instanceof Element) par = node.getParentNode(); 
	  if ((par != null) && (par instanceof Element)) parent = (Element)par;
	  else if (node instanceof Attr) parent = ((Attr)node).getOwnerElement();
	  return parent;
  }
  
  
  /**
   * @param el an node
   * @param name a name
   * @return true if the node, or any of its ancestors, has the name
   */
  private static boolean hasAncestorOrSelf(Node el, String name, NamespaceSet context)
  {
	  if (el == null) return false; // you have run off the top of the document
	  if (namespaceEqualName(el,name,context)) return true;
	  return hasAncestorOrSelf(parentElement(el),name, context);
  }
  
  /**
   * 
   * @param nd a node in a document
   * @param name a node name, using namespace prefixes as in the mapping set (not the document)
   * @param context the set of namespaces with prefixes as in the mapping set
   * @return true if the node name matches the suplied name, taking account of 
   * possible different namespace prefixes in the document and the mapping set
   */
  private static boolean namespaceEqualName(Node nd, String name, NamespaceSet context)
  {
	  /* if the node is in no namespace, or the namespace has no prefix in the mapping set
	   * the converted name will be the local name. */
	  String convertedName = "";
	  if (nd instanceof Element) {convertedName = XMLUtil.getLocalName((Element)nd);}
	  else if (nd instanceof Attr) {convertedName = nd.getLocalName();}
	  if (convertedName == null) {System.out.println("Null local name for node '" + nd.getNodeName() + "'");}
		  

	  String nsURI = nd.getNamespaceURI();
	  // if the node is in a namespace..
	  if (nsURI != null)
	  {
		  namespace ns = context.getByURI(nsURI); // find the namespace in the mapping set
		  // if the namespace URI is not in the namespace set of the mapping set, the names cannot match
		  if (ns == null) return false;
		  /* if the namespace prefix in the mapping set is non-empty, add it before the local name; 
		   * otherwise, use just the local name */
		  if (!ns.prefix().equals("")) convertedName = ns.prefix() + ":" + convertedName;
	  }
	  
	  // compare the converted name with the name from the mapping set
	  return (convertedName.equals(name));
  }
  
  static void message(String s) {System.out.println(s);}
  

}