package com.openMap1.mapper.util;
import org.eclipse.emf.ecore.EObject;
import org.w3c.dom.Attr;
import org.w3c.dom.Element;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import com.openMap1.mapper.core.MapperException;
import com.openMap1.mapper.core.NamespaceSet;
import com.openMap1.mapper.core.Xpth;
import com.openMap1.mapper.core.namespace;
import com.openMap1.mapper.impl.ElementDefImpl;
import com.openMap1.mapper.mapping.linkCondition;
import com.openMap1.mapper.AssocEndMapping;
import com.openMap1.mapper.ConditionTest;
import com.openMap1.mapper.CrossCondition;
import com.openMap1.mapper.Mapping;
import javax.xml.xpath.XPath;
import javax.xml.xpath.XPathFactory;
import javax.xml.xpath.XPathExpressionException;
import javax.xml.xpath.XPathConstants;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.StringTokenizer;
import java.util.Vector;
/**
* class used by the XOReader to follow XPaths
* in the input instance, with various performance tweaks,
*
* @author robert
*
*/
public class XPathAPI
{
/**
* version of selectNodeVector with a time for performance tuning
* */
public static Vector<Node> selectNodeVector(Timer timer, Node contextNode, String str, NamespaceSet context)
throws XPathExpressionException,MapperException
{
timer.start(Timer.XPATH);
Vector<Node> nodes = selectNodeVector(contextNode, str, context);
timer.stop(Timer.XPATH);
return nodes;
}
/**
* Use an XPath string to select a Vector of nodes.
* XPath namespace prefixes are resolved from the namespaceNode.
*
* If the namespace set contains a namespace with prefix "", then
* that prefix is changed to an non-empty one in the namespace set
* and in the XPath.
* This conversion assumes that all element names have form = qualified.
*
* @param contextNode The node to start searching from.
* @param str A valid XPath string.
* @param namespaceNode The node from which prefixes in the XPath will be resolved to namespaces; note this argument is not used
* @return A Vector nodes, never null.
*/
public static Vector<Node> selectNodeVector(Node contextNode, String pathString, NamespaceSet context)
throws XPathExpressionException,MapperException
{
/* if the path ends in the special virtual attribute introduced to track the ordinal
* position of an Element beneath its parent, because the attribute does not exist
* use the shorter path to the element, and return a Nodelist of the elements. */
String path = pathString;
if (pathString.endsWith(ElementDefImpl.ELEMENT_POSITION_ATTRIBUTE))
{
// strip off the virtual attribute name and '/@'
int len = pathString.length() - (ElementDefImpl.ELEMENT_POSITION_ATTRIBUTE).length() - 2;
if (len < 0) path = "."; // common case when pathString is just '@..'; and len = -1
else path = pathString.substring(0, len);
}
// Execute the XPath, and have it return the result, in two distinct cases:
boolean hasDefaultNamespace = ((context.getByPrefix("") != null) &&
(!context.getByPrefix("").URI().equals("no target namespace")));
// (1) if there is no default namespace in the set
if (!hasDefaultNamespace)
{
return fastNodeVector(contextNode,path,context);
}
/* (2) if there is a default namespace, change it to have a prefix,
* because XPath does not work without them */
else if (hasDefaultNamespace)
{
String prefix = context.nonClashPrefix();
NamespaceSet newContext = context.withPrefixForDefaultNamespace(prefix);
String newPath = new Xpth(context,path).convertPrefixes(newContext).stringForm();
return fastNodeVector(contextNode,newPath,newContext);
}
return new Vector<Node>();
}
/**
* retrieve the nodes at the end of an XPath , treating certain cases without
* using XPath, as a speedup
* @param contextNode
* @param str
* @param context
* @return
* @throws MapperException
* FIXME - I don't think this implementation is careful enough about namespaces
*/
private static Vector<Node> fastNodeVector(Node contextNode, String str, NamespaceSet context)
throws MapperException
{
Vector<Node> nodes = new Vector<Node>();
// don't need to follow the trivial 'stay still' XPath
if ((str.equals("."))|(str.equals("self::node()"))) nodes.add(contextNode);
else
{
boolean ascending = ((str.startsWith("parent"))||
(str.startsWith("ancestor"))||
(str.startsWith("..")));
StringTokenizer s1 = new StringTokenizer(str,"/");
int steps = s1.countTokens();
if (str.startsWith("/")) steps = 0; // don't try fast evaluation of absolute paths
StringTokenizer s3 = new StringTokenizer(str,"@");
boolean hasAttribute = ((s3.countTokens() == 2)|(str.startsWith("@")));
boolean elementStart = (contextNode instanceof Element);
// For one-step descending XPaths to an Element, with no namespace prefix, fast evaluation
if ((steps == 1) && (!hasAttribute) && (!ascending) && (elementStart))
{
Vector<Element> els = XMLUtil.namedChildElements((Element)contextNode, noPrefix(str));
for (int i = 0; i < els.size(); i++) nodes.add(els.get(i));
}
// For two-step descending XPaths to an Element, with no namespace prefix, fast evaluation
else if ((steps == 2) && (!hasAttribute) && (!ascending) && (elementStart))
{
String step1 = noPrefix(s1.nextToken());
String step2 = noPrefix(s1.nextToken());
Vector<Element> children = XMLUtil.namedChildElements((Element)contextNode, step1);
for (int k = 0; k < children.size();k++)
{
Element child = children.get(k);
Vector<Element> els = XMLUtil.namedChildElements(child, step2);
for (int i = 0; i < els.size(); i++) nodes.add(els.get(i));
}
}
/* single step to an attribute; this has been bypassed because it goes wrong when the attribute
* is in a namespace
else if ((steps == 1) && (hasAttribute) && (!ascending) && (elementStart))
{
Attr att = ((Element)contextNode).getAttributeNode(str.substring(1));
nodes.add(att);
}
*/
// All other cases; follow the XPath
else try
{
XPath xp = XPathFactory.newInstance().newXPath();
xp.setNamespaceContext(context);
NodeList nl = (NodeList)xp.evaluate(str, contextNode, XPathConstants.NODESET);
for (int i = 0; i < nl.getLength();i++) nodes.add(nl.item(i));
}
catch (Exception ex) {throw new MapperException(ex.getMessage());}
}
// diagnostics for mysterious failures
boolean tracing = false;
if (tracing)
{
if (nodes.size() > 0) message("FastNodeVector " + str + "; " + nodes.size());
else if ((nodes.size() == 0) && (contextNode instanceof Element))
{
String cands = "";
Vector<Element> els = XMLUtil.childElements((Element)contextNode);
for (int i = 0; i < els.size(); i++) cands = cands + els.get(i).getLocalName() + "; " ;
message("*** Child elements of " + ((Element)contextNode).getLocalName() + ": " + cands
+ " when looking for " + noPrefix(str));
}
}
return nodes;
}
private static String noPrefix(String s)
{
StringTokenizer st = new StringTokenizer(s,":");
String result = st.nextToken();
if (st.hasMoreTokens()) result = st.nextToken();
return result;
}
//----------------------------------------------------------------------------------------------
// delivering the node set in the presence of indexed link conditions
//----------------------------------------------------------------------------------------------
/**
* follow a cross path from a node, where there may be link conditions which have been indexed
* for efficient retrieval of a small set of nodes.
* If there are indexes, use one of them to find a small node set,
* and then check that each node in the node set can be reached by the XPath.
*
* The link conditions will all need to be tested again, because
* (a) only one condition will be used for the indexed retrieval, and
* (b) sometimes (when there are further conditions on the link condition paths) the indexing
* lets in some nodes which do not satisfy the link condition used for indexing
*
* @param contextNode the start node of the path
* @param str string form of the XPath
* @param context Namespace set - used by XPath to handle e.g. changed namespace prefixes
* @param parent the parent object in the mapping set;
* either the mapping for which this is a cross path; and which may have
* one or more cross conditions (= link conditions);
* or the condition which this is a nested condition on
* @param pathIsToLHS true if the XPath leads to the node on the LHS of any
* cross conditions (for property mappings, this is the property node; for association
* mappings, this is the association node)
* @param nodeIndex The set of all nodes in the document, indexed by (1) the XPath
* from the node to the string value in a link condition (outer key) and the string value
* (inner key)
* @return a set of nodes - should be small - which are reachable by the XPath and are highly likely
* to satisfy the selected link condition; and include all those which do
* @throws XPathExpressionException
*/
public static Vector<Node> indexedSelectNodeVector(Timer timer, Node contextNode, String str, NamespaceSet context,
EObject parent, boolean pathIsToLHS,
Hashtable<String,Hashtable<String,Hashtable<String,Vector<Node>>>> nodeIndex)
throws XPathExpressionException,MapperException
{
timer.start(Timer.INDEXED_TOTAL);
/* find out which path from the condition end to a node containing the string value
* (if any) is best to use for the indexed retrieval,
* giving the smallest node set; and find its node set*/
Vector<Node> nodesForBestIndex = nodesForBestIndex(contextNode, str, context,
parent, pathIsToLHS,
nodeIndex);
Vector<Node> res = new Vector<Node>();
//no useful index was found; just follow the XPath to get all nodes
if (nodesForBestIndex == null)
{
timer.start(Timer.UNINDEXED_XPATH);
res = selectNodeVector(contextNode, str, context);
timer.stop(Timer.UNINDEXED_XPATH);
}
//check all nodes in the smallest node set, to see if they can be reached by the XPath
else
{
timer.start(Timer.INDEXED_XPATH);
for (Iterator<Node> it = nodesForBestIndex.iterator();it.hasNext();)
{
Node candidate = it.next();
if (canReachByPath(contextNode,candidate,str,context)) res.add(candidate);
}
timer.stop(Timer.INDEXED_XPATH);
}
timer.stop(Timer.INDEXED_TOTAL);
return res;
}
/**
*
* @param contextNode the start node of the XPath
* @param str string form of the XPath
* @param context the current set of namespaceset
* @param parent the parent object in the mapping set;
* either the mapping for which this is a cross path; and which may have
* one or more cross conditions (= link conditions);
* or the condition which this is a nested condition on
* @param pathIsToLHS true if the XPath leads to the node on the LHS of any
* cross conditions (for property mappings, this is the property node; for association
* mappings, this is the association node)
* @param nodeIndex The set of all nodes in the document, indexed by (1) the XPath
* from the node to the string value in a link condition (outer key) and the string value
* (inner key)
* @return
* @throws XPathExpressionException
* @throws MapperException
*/
private static Vector<Node> nodesForBestIndex(Node contextNode, String str, NamespaceSet context,
EObject parent, boolean pathIsToLHS,
Hashtable<String,Hashtable<String,Hashtable<String,Vector<Node>>>> nodeIndex)
throws XPathExpressionException,MapperException
{
Mapping mapping = null;
if (parent instanceof Mapping) mapping = (Mapping)parent;
else throw new MapperException("Cannot yet do indexed node retrievals for nested link conditions");
// end (0 for property mappings, 1 or 2 for association ends) is needed to construct linkConditions
int end = 0;
if (parent instanceof AssocEndMapping) end = ((AssocEndMapping)parent).getEnd();
/* find out which path from the condition end to a node containing the string value
* (if any) is best to use for the indexed retrieval,
* giving the smallest node set; and find its node set*/
int smallestNodeSetFound = -1;
Vector<Node> nodesForBestIndex = new Vector<Node>();
for (Iterator<CrossCondition> it = mapping.getCrossConditions().iterator(); it.hasNext();)
{
CrossCondition crossCondition = it.next();
linkCondition linkCond = new linkCondition(crossCondition,end);
// find which string value, if any, can be passed to the index
String valueSought = null;
String indexedRelPath = null;
String indexedRootPath = null;
/* only consider equality link conditions in which no function
* is applied to the string value of the target node */
if ((pathIsToLHS)&&(crossCondition.getLeftFunction().equals(""))
&& (crossCondition.getTest().equals(ConditionTest.EQUALS)))
{
indexedRootPath = linkCond.rootToLHSNode().stringForm();
indexedRelPath = linkCond.lhsEndToLeftValue().stringForm();
valueSought = linkCond.rightValue(contextNode, context);
}
else if ((!pathIsToLHS)&&(crossCondition.getRightFunction().equals(""))
&& (crossCondition.getTest().equals(ConditionTest.EQUALS)))
{
indexedRootPath = linkCond.rootToRHSEnd().stringForm();
indexedRelPath = linkCond.rhsEndToRightValue().stringForm();
valueSought = linkCond.leftValue(contextNode, context);
}
// find how many nodes satisfy the index condition, to choose the smallest set
if (valueSought != null)
{
Hashtable<String,Hashtable<String,Vector<Node>>> indexForAbsPath = nodeIndex.get(indexedRootPath);
if (indexForAbsPath != null)
{
Hashtable<String,Vector<Node>> indexForRelPath = indexForAbsPath.get(indexedRelPath);
if (indexForRelPath != null)
{
Vector<Node> nodesForValue = indexForRelPath.get(valueSought);
// if any index eliminates all candidate nodes, return an empty node set
if (nodesForValue == null)
{
return new Vector<Node>();
}
int size = nodesForValue.size();
// best candidate index so far...
if ((smallestNodeSetFound == -1)|(size < smallestNodeSetFound))
{
smallestNodeSetFound = size;
nodesForBestIndex = nodesForValue;
}
}
}
}
} // end of loop over link conditions on the path
if (smallestNodeSetFound == -1) nodesForBestIndex = null;
return nodesForBestIndex;
}
/**
*
* @param mapping a property mapping or association end mapping
* @param pathIsToLHS true if the path leads to the LHS of cross-conditions
* (= the property node or association node, not the object node)
* i.e only false for the second leg of following an association mapping
* @return true if any cross-conditions in the mapping allow the XOReader to use
* an index, rather than follow the XPath
* @throws MapperException
*/
public static boolean isIndexed(Mapping mapping, boolean pathIsToLHS)
throws MapperException
{
boolean indexed = false;
for (Iterator<CrossCondition> it = mapping.getCrossConditions().iterator(); it.hasNext();)
{
CrossCondition crossCondition = it.next();
/* only consider equality link conditions in which no function
* is applied to the string value of the target node */
if ((pathIsToLHS)&&(crossCondition.getLeftFunction().equals(""))
&& (crossCondition.getTest().equals(ConditionTest.EQUALS)))
{
indexed = true;
}
else if ((!pathIsToLHS)&&(crossCondition.getRightFunction().equals(""))
&& (crossCondition.getTest().equals(ConditionTest.EQUALS)))
{
indexed = true;
}
}
return indexed;
}
/**
*
* @param start the start node of an XPath
* @param end the end node of an XPath
* @param path string form of an XPath
* @param context the namespace Set
* @return true if the end node can be reached by the path from the
* start node. There are restrictions on the form of the path
* (which could be removed given time):
* (1) It must consist of a set of ascending steps (possibly empty)
* followed by a set of descending steps
* (2) If the ascending steps contain an 'ancestor' step, it must be the last of them
* and must define a node name (i.e not be 'ancestor::node()')
* (3) If the descending steps contain as 'descendant' step, it must be the first of them
* The test is made in three stages:
* (a) the initial ascending steps fit
* (b) both nodes are under the path apex
* (c) the descending steps work
*/
public static boolean canReachByPath(Node start, Node end, String pathString, NamespaceSet context)
throws MapperException
{
Xpth path = new Xpth(context,pathString);
int firstDescendingStep = 0;
Node current = start;
String apexName = current.getNodeName();
boolean ascending = true;
/* (a) match ascending steps. They should all be 'parent' except the last,
* which may be 'ancestor' (-or-self) */
for (int s = 0; s < path.size(); s++)
{
String axis = path.step(s).axis();
String test = path.step(s).nodeTest();
if (axis.equals("parent"))
{
current = parentElement(current);
if (current == null) return false; // ran out at the top of the document
if ((test.equals("node()"))|(namespaceEqualName(current,test,context)))
{apexName = current.getNodeName();}
else return false; // could not match a parent step
}
else if (Xpth.ascending(axis))
{
// fail to match an ancestor step
if (!hasAncestorOrSelf(current,test,context)) return false;
apexName = test;
}
else if (Xpth.descending(axis))
{
if (ascending) firstDescendingStep = s;
ascending = false;
}
}
// (b) check the end node is somewhere under the apex node
if (!hasAncestorOrSelf(end,apexName,context)) return false;
/* (c) check descending steps in reverse order.
* They should all be 'child' except the last of the reverse order */
current = end;
for (int s = path.size()-1; s > firstDescendingStep-1; s--)
{
String axis = path.step(s).axis();
String test = path.step(s).nodeTest();
if (Xpth.descending(axis))
{
// this test works for Elements and attributes
if ((test.equals("node()"))|(namespaceEqualName(current,test,context)))
{current = parentElement(current);} // pass; prepare for next step up
else return false; // could not match a child or descendant step
}
else return false; // some other axis, such as 'sibling'
}
// survived all tests
return true;
}
/**
*
* @param node
* @return the parent element of the Node; or null if its parent node
* is not an Element
*/
private static Element parentElement(Node node)
{
Node par = null;
Element parent = null;
if (node instanceof Element) par = node.getParentNode();
if ((par != null) && (par instanceof Element)) parent = (Element)par;
else if (node instanceof Attr) parent = ((Attr)node).getOwnerElement();
return parent;
}
/**
* @param el an node
* @param name a name
* @return true if the node, or any of its ancestors, has the name
*/
private static boolean hasAncestorOrSelf(Node el, String name, NamespaceSet context)
{
if (el == null) return false; // you have run off the top of the document
if (namespaceEqualName(el,name,context)) return true;
return hasAncestorOrSelf(parentElement(el),name, context);
}
/**
*
* @param nd a node in a document
* @param name a node name, using namespace prefixes as in the mapping set (not the document)
* @param context the set of namespaces with prefixes as in the mapping set
* @return true if the node name matches the suplied name, taking account of
* possible different namespace prefixes in the document and the mapping set
*/
private static boolean namespaceEqualName(Node nd, String name, NamespaceSet context)
{
/* if the node is in no namespace, or the namespace has no prefix in the mapping set
* the converted name will be the local name. */
String convertedName = "";
if (nd instanceof Element) {convertedName = XMLUtil.getLocalName((Element)nd);}
else if (nd instanceof Attr) {convertedName = nd.getLocalName();}
if (convertedName == null) {System.out.println("Null local name for node '" + nd.getNodeName() + "'");}
String nsURI = nd.getNamespaceURI();
// if the node is in a namespace..
if (nsURI != null)
{
namespace ns = context.getByURI(nsURI); // find the namespace in the mapping set
// if the namespace URI is not in the namespace set of the mapping set, the names cannot match
if (ns == null) return false;
/* if the namespace prefix in the mapping set is non-empty, add it before the local name;
* otherwise, use just the local name */
if (!ns.prefix().equals("")) convertedName = ns.prefix() + ":" + convertedName;
}
// compare the converted name with the name from the mapping set
return (convertedName.equals(name));
}
static void message(String s) {System.out.println(s);}
}