package client.net.sf.saxon.ce.trans; import client.net.sf.saxon.ce.Configuration; import client.net.sf.saxon.ce.Controller; import client.net.sf.saxon.ce.expr.*; import client.net.sf.saxon.ce.expr.instruct.SlotManager; import client.net.sf.saxon.ce.expr.sort.LocalOrderComparer; import client.net.sf.saxon.ce.lib.StringCollator; import client.net.sf.saxon.ce.om.*; import client.net.sf.saxon.ce.pattern.Pattern; import client.net.sf.saxon.ce.tree.iter.AxisIterator; import client.net.sf.saxon.ce.tree.iter.EmptyIterator; import client.net.sf.saxon.ce.tree.iter.ListIterator; import client.net.sf.saxon.ce.tree.iter.SingleNodeIterator; import client.net.sf.saxon.ce.type.AtomicType; import client.net.sf.saxon.ce.type.BuiltInAtomicType; import client.net.sf.saxon.ce.type.BuiltInType; import client.net.sf.saxon.ce.type.Type; import client.net.sf.saxon.ce.value.AtomicValue; import client.net.sf.saxon.ce.value.DoubleValue; import client.net.sf.saxon.ce.value.NumericValue; import java.util.*; /** * KeyManager manages the set of key definitions in a stylesheet, and the indexes * associated with these key definitions. It handles xsl:sort-key as well as xsl:key * definitions. * * <p>The memory management in this class is subtle, with extensive use of weak references. * The idea is that an index should continue to exist in memory so long as both the compiled * stylesheet and the source document exist in memory: if either is removed, the index should * go too. The document itself holds no reference to the index. The compiled stylesheet (which * owns the KeyManager) holds a weak reference to the index. The index, of course, holds strong * references to the nodes in the document. The Controller holds a strong reference to the * list of indexes used for each document, so that indexes remain in memory for the duration * of a transformation even if the documents themselves are garbage collected.</p> * * <p>Potentially there is a need for more than one index for a given key name, depending * on the primitive type of the value provided to the key() function. An index is built * corresponding to the type of the requested value; if subsequently the key() function is * called with the same name and a different type of value, then a new index is built.</p> * * <p>For XSLT-defined keys, equality matching follows the rules of the eq operator, which means * that untypedAtomic values are treated as strings. In backwards compatibility mode, <i>all</i> * values are converted to strings.</p> * * <p>This class is also used for internal indexes constructed (a) to support the idref() function, * and (b) (in Saxon-EE only) to support filter expressions of the form /a/b/c[d=e], where the * path expression being filtered must be a single-document context-free path rooted at a document node, * where exactly one of d and e must be dependent on the focus, and where certain other conditions apply * such as the filter predicate not being positional. The operator in this case may be either "=" or "eq". * If it is "eq", then the semantics are very similar to xsl:key indexes, except that use of non-comparable * types gives an error rather than a non-match. If the operator is "=", however, then the rules for * handling untypedAtomic values are different: these must be converted to the type of the other operand. * In this situation the following rules apply. Assume that the predicate is [use=value], where use is * dependent on the focus (the indexed value), and value is the sought value.</p> * * <ul> * <li>If value is a type other than untypedAtomic, say T, then we build an index for type T, in which any * untypedAtomic values that arise in evaluating "use" are converted to type T. A conversion failure results * in an error. A value of a type that is not comparable to T also results in an error.</li> * <li>If value is untypedAtomic, then we build an index for every type actually encountered in evaluating * the use expression (treating untypedAtomic as string), and then search each of these indexes. (Note that * it is not an error if the use expression returns a mixture of say numbers and dates, provided that the * sought value is untypedAtomic).</li> * </ul> * * @author Michael H. Kay */ public class KeyManager { private HashMap<StructuredQName, KeyDefinitionSet> keyMap; // one entry for each named key; the entry contains // a KeyDefinitionSet holding the key definitions with that name private transient HashMap<DocumentInfo, HashMap<Long, Object>> docIndexes; // one entry for each document that is in memory; // the entry contains a HashMap mapping the fingerprint of // the key name plus the primitive item type // to the HashMap that is the actual index // of key/value pairs. /** * Create a KeyManager and initialise variables */ public KeyManager() { keyMap = new HashMap<StructuredQName, KeyDefinitionSet>(10); docIndexes = new HashMap<DocumentInfo, HashMap<Long, Object>>(10); } /** * Pre-register a key definition. This simply registers that a key with a given name exists, * without providing any details. * @param keyName the name of the key to be pre-registered */ public void preRegisterKeyDefinition(StructuredQName keyName) { KeyDefinitionSet keySet = keyMap.get(keyName); if (keySet==null) { keySet = new KeyDefinitionSet(keyName, keyMap.size()); keyMap.put(keyName, keySet); } } /** * Register a key definition. Note that multiple key definitions with the same name are * allowed * @param keyName Structured QName representing the name of the key * @param keydef The details of the key's definition * @param config The configuration * @throws XPathException if this key definition is inconsistent with existing key definitions having the same name */ public void addKeyDefinition(StructuredQName keyName, KeyDefinition keydef, Configuration config) throws XPathException { KeyDefinitionSet keySet = keyMap.get(keyName); if (keySet==null) { keySet = new KeyDefinitionSet(keyName, keyMap.size()); keyMap.put(keyName, keySet); } keySet.addKeyDefinition(keydef); boolean backwardsCompatible = keySet.isBackwardsCompatible(); if (backwardsCompatible) { // In backwards compatibility mode, convert all the use-expression results to sequences of strings List v = keySet.getKeyDefinitions(); for (int i=0; i<v.size(); i++) { KeyDefinition kd = (KeyDefinition)v.get(i); kd.setBackwardsCompatible(true); if (!kd.getBody().getItemType(config.getTypeHierarchy()).equals(BuiltInAtomicType.STRING)) { Expression exp = new AtomicSequenceConverter(kd.getBody(), BuiltInAtomicType.STRING); kd.setBody(exp); } } } } /** * Get all the key definitions that match a particular name * @param qName The name of the required key * @return The set of key definitions of the named key if there are any, or null otherwise. */ public KeyDefinitionSet getKeyDefinitionSet(StructuredQName qName) { return keyMap.get(qName); } /** * Build the index for a particular document for a named key * @param keySet The set of key definitions with this name * @param itemType the type of the values to be indexed. * @param foundItemTypes Optional (may be null). If supplied, a set that is to be populated with * the set of primitive types actually found among the "use" values. * @param doc The source document in question * @param context The dynamic context * @return the index in question, as a HashMap mapping a key value onto a ArrayList of nodes */ private synchronized HashMap buildIndex(KeyDefinitionSet keySet, BuiltInAtomicType itemType, Set<BuiltInAtomicType> foundItemTypes, DocumentInfo doc, XPathContext context) throws XPathException { List<KeyDefinition> definitions = keySet.getKeyDefinitions(); HashMap<Object, List<NodeInfo>> index = new HashMap<Object, List<NodeInfo>>(100); // There may be multiple xsl:key definitions with the same name. Index them all. for (int k=0; k<definitions.size(); k++) { constructIndex( doc, index, definitions.get(k), itemType, foundItemTypes, context, k == 0); } return index; } /** * Process one key definition to add entries to an index * @param doc the document to be indexed * @param index the index to be built * @param keydef the key definition used to build the index * @param soughtItemType the primitive type of the value that the user is searching for on the call * to the key() function that triggered this index to be built * @param foundItemTypes Optional (may be null): if supplied, a Set to be populated with the set of * primitive types actually found for the use expression * @param context the XPath dynamic evaluation context * @param isFirst true if this is the first index to be built for this key */ private void constructIndex( DocumentInfo doc, HashMap<Object, List<NodeInfo>> index, KeyDefinition keydef, BuiltInAtomicType soughtItemType, Set<BuiltInAtomicType> foundItemTypes, XPathContext context, boolean isFirst) throws XPathException { //System.err.println("build index for doc " + doc.getDocumentNumber()); Pattern match = keydef.getMatch(); //NodeInfo curr; XPathContextMajor xc = context.newContext(); // The use expression (or sequence constructor) may contain local variables. SlotManager map = keydef.getStackFrameMap(); if (map != null) { xc.openStackFrame(map); } SequenceIterator iter = match.selectNodes(doc, xc); while (true) { Item item = iter.next(); if (item == null) { break; } processKeyNode((NodeInfo)item, soughtItemType, foundItemTypes, keydef, index, xc, isFirst); } } /** * Process one matching node, adding entries to the index if appropriate * @param curr the node being processed * @param soughtItemType the primitive item type of the argument to the key() function that triggered * this index to be built * @param foundItemTypes Optional (may be null): if supplied, a Set to be populated with the set of * primitive types actually found for the use expression * @param keydef the key definition * @param index the index being constructed * @param xc the context for evaluating expressions * @param isFirst indicates whether this is the first key definition with a given key name (which means * no sort of the resulting key entries is required) */ private void processKeyNode( NodeInfo curr, BuiltInAtomicType soughtItemType, Set<BuiltInAtomicType> foundItemTypes, KeyDefinition keydef, HashMap<Object, List<NodeInfo>> index, XPathContext xc, boolean isFirst) throws XPathException { // Make the node we are testing the context node, // with context position and context size set to 1 AxisIterator si = SingleNodeIterator.makeIterator(curr); si.next(); // need to position iterator at first node xc.setCurrentIterator(si); StringCollator collation = keydef.getCollation(); // Evaluate the "use" expression against this context node Expression use = keydef.getUse(); SequenceIterator useval = use.iterate(xc); while (true) { AtomicValue item = (AtomicValue)useval.next(); if (item == null) { break; } BuiltInAtomicType actualItemType = item.getPrimitiveType(); if (foundItemTypes != null) { foundItemTypes.add(actualItemType); } if (!Type.isComparable(actualItemType, soughtItemType, false)) { // the types aren't comparable // simply ignore this key value continue; } Object val; if (soughtItemType.equals(BuiltInAtomicType.UNTYPED_ATOMIC) || soughtItemType.equals(BuiltInAtomicType.STRING) || soughtItemType.equals(BuiltInAtomicType.ANY_URI)) { // If the supplied key value is untyped atomic, we build an index using the // actual type returned by the use expression // If the supplied key value is a string, there is no match unless the use expression // returns a string or an untyped atomic value if (collation == null) { val = item.getStringValue(); } else { val = collation.getCollationKey(item.getStringValue()); } } else { // Ignore NaN values if (item.isNaN()) { break; } try { AtomicValue av = item.convert(soughtItemType, true).asAtomic(); val = av.getXPathComparable(false, collation, xc); } catch (XPathException err) { // ignore values that can't be converted to the required type break; } } List<NodeInfo> nodes = index.get(val); if (nodes==null) { // this is the first node with this key value nodes = new ArrayList<NodeInfo>(4); index.put(val, nodes); nodes.add(curr); } else { // this is not the first node with this key value. // add the node to the list of nodes for this key, // unless it's already there if (isFirst) { // if this is the first index definition that we're processing, // then this node must be after all existing nodes in document // order, or the same node as the last existing node if (nodes.get(nodes.size()-1)!=curr) { nodes.add(curr); } } else { // otherwise, we need to insert the node at the correct // position in document order. This code does an insertion sort: // not ideal for performance, but it's very unusual to have more than // one key definition for a key. LocalOrderComparer comparer = LocalOrderComparer.getInstance(); boolean found = false; for (int i=0; i<nodes.size(); i++) { int d = comparer.compare(curr, nodes.get(i)); if (d<=0) { if (d==0) { // node already in list; do nothing } else { // add the node at this position nodes.add(i, curr); } found = true; break; } // else continue round the loop } // if we're still here, add the new node at the end if (!found) { nodes.add(curr); } } } } } /** * Get the nodes with a given key value * @param keySet The set of key definitions identified by the key name used in the call to the key() function * @param doc The source document in question * @param soughtValue The required key value * @param context The dynamic context, needed only the first time when the key is being built * @return an iteration of the selected nodes, always in document order with no duplicates */ public SequenceIterator selectByKey( KeyDefinitionSet keySet, DocumentInfo doc, AtomicValue soughtValue, XPathContext context) throws XPathException { //System.err.println("*********** USING KEY ************"); if (soughtValue == null) { return EmptyIterator.getInstance(); } List definitions = keySet.getKeyDefinitions(); // if (definitions == null) { // throw new XPathException("Key " + context.getNamePool().getDisplayName(keyNameFingerprint) + // " has not been defined", "XTDE1260", context); // } KeyDefinition definition = (KeyDefinition)definitions.get(0); // the itemType and collation and BC mode will be the same for all keys with the same name StringCollator collation = definition.getCollation(); if (keySet.isBackwardsCompatible()) { // if backwards compatibility is in force, treat all values as strings soughtValue = soughtValue.convert(BuiltInAtomicType.STRING, true).asAtomic(); } else { // If the key value is numeric, promote it to a double // TODO: this could result in two decimals comparing equal because they convert to the same double BuiltInAtomicType itemType = soughtValue.getPrimitiveType(); if (itemType.equals(BuiltInAtomicType.INTEGER) || itemType.equals(BuiltInAtomicType.DECIMAL) || itemType.equals(BuiltInAtomicType.FLOAT)) { soughtValue = new DoubleValue(((NumericValue)soughtValue).getDoubleValue()); } } // NOTE: This is much more elaborate than it needs to be. The option convertUntypedToOther // is used for an index used to support a general comparison. This reports an error if two // non-comparable values are compared. We could report an error immediately if foundItemTypes // includes a type that is not comparable to the soughtValue. In practice we only need a maximum // of two indexes: one for the sought item type, and one for untypedAtomic. HashSet<BuiltInAtomicType> foundItemTypes = null; AtomicValue value = soughtValue; // No special action needed for anyURI to string promotion (it just seems to work: tests idky44, 45) int keySetNumber = keySet.getKeySetNumber(); BuiltInAtomicType itemType = value.getPrimitiveType(); HashMap index; Object indexObject = getIndex(doc, keySetNumber, itemType); if (indexObject instanceof String) { // index is under construction XPathException de = new XPathException("Key definition is circular"); de.setXPathContext(context); de.setErrorCode("XTDE0640"); throw de; } index = (HashMap)indexObject; // If the index does not yet exist, then create it. if (index==null) { // Mark the index as being under construction, in case the definition is circular putIndex(doc, keySetNumber, itemType, "Under Construction", context); index = buildIndex(keySet, itemType, foundItemTypes, doc, context); putIndex(doc, keySetNumber, itemType, index, context); if (foundItemTypes != null) { // build indexes for each item type actually found for (Iterator<BuiltInAtomicType> f = foundItemTypes.iterator(); f.hasNext();) { BuiltInAtomicType t = f.next(); if (!t.equals(BuiltInAtomicType.STRING)) { putIndex(doc, keySetNumber, t, "Under Construction", context); index = buildIndex(keySet, t, null, doc, context); putIndex(doc, keySetNumber, t, index, context); } } } } if (foundItemTypes == null) { ArrayList nodes = (ArrayList)index.get(getCollationKey(value, itemType, collation, context)); if (nodes==null) { return EmptyIterator.getInstance(); } else { return new client.net.sf.saxon.ce.tree.iter.ListIterator(nodes); } } else { // we need to search the indexes for all possible types, and combine the results. SequenceIterator result = null; HashMap<Long, Object> docIndex = docIndexes.get(doc); if (docIndex != null) { for (Iterator<Long> i= index.keySet().iterator(); i.hasNext();) { long key = (i.next()).longValue(); if (((key >> 32)) == keySetNumber) { int typefp = (int)key; BuiltInAtomicType type = (BuiltInAtomicType)BuiltInType.getSchemaType(typefp); Object indexObject2 = getIndex(doc, keySetNumber, type); if (indexObject2 instanceof String) { // index is under construction XPathException de = new XPathException("Key definition is circular"); de.setXPathContext(context); de.setErrorCode("XTDE0640"); throw de; } HashMap index2 = (HashMap)indexObject2; // NOTE: we've been known to encounter a null index2 here, but it doesn't seem possible if (!index2.isEmpty()) { value = soughtValue.convert(type, true).asAtomic(); ArrayList nodes = (ArrayList)index2.get(getCollationKey(value, type, collation, context)); if (nodes != null) { if (result == null) { result = new ListIterator(nodes); } else { result = new UnionEnumeration(result, new ListIterator(nodes), LocalOrderComparer.getInstance()); } } } } } } if (result == null) { return EmptyIterator.getInstance(); } else { return result; } } } private static Object getCollationKey(AtomicValue value, BuiltInAtomicType itemType, StringCollator collation, XPathContext context) throws XPathException { Object val; if (itemType.equals(BuiltInAtomicType.STRING) || itemType.equals(BuiltInAtomicType.UNTYPED_ATOMIC) || itemType.equals(BuiltInAtomicType.ANY_URI)) { if (collation==null) { val = value.getStringValue(); } else { val = collation.getCollationKey(value.getStringValue()); } } else { val = value.getXPathComparable(false, collation, context); } return val; } /** * Save the index associated with a particular key, a particular item type, * and a particular document. This * needs to be done in such a way that the index is discarded by the garbage collector * if the document is discarded. We therefore use a WeakHashMap indexed on the DocumentInfo, * which returns HashMap giving the index for each key fingerprint. This index is itself another * HashMap. * The methods need to be synchronized because several concurrent transformations (which share * the same KeyManager) may be creating indexes for the same or different documents at the same * time. * @param doc the document being indexed * @param keyFingerprint represents the name of the key definition * @param itemType the primitive type of the values being indexed * @param index the index being saved * @param context the dynamic evaluation context */ private synchronized void putIndex(DocumentInfo doc, int keyFingerprint, AtomicType itemType, Object index, XPathContext context) { if (docIndexes==null) { // it's transient, so it will be null when reloading a compiled stylesheet docIndexes = new HashMap<DocumentInfo, HashMap<Long, Object>>(10); } HashMap<Long, Object> indexRef = docIndexes.get(doc); HashMap<Long, Object> indexList; if (indexRef==null) { indexList = new HashMap<Long, Object>(10); // Ensure there is a firm reference to the indexList for the duration of a transformation // But for keys associated with temporary trees, or documents that have been discarded from // the document pool, keep the reference within the document node itself. Controller controller = context.getController(); if (controller.getDocumentPool().contains(doc)) { context.getController().setUserData(doc, "saxon:key-index-list", indexList); } else { doc.setUserData("saxon:key-index-list", indexList); } docIndexes.put(doc, new HashMap<Long, Object>(indexList)); } else { indexList = indexRef; } indexList.put(Long.valueOf(((long)keyFingerprint)<<32 | itemType.getFingerprint()), index); } /** * Get the index associated with a particular key, a particular source document, * and a particular primitive item type * @param doc the document whose index is required * @param keyFingerprint the name of the key definition * @param itemType the primitive item type of the values being indexed * @return either an index (as a HashMap), or the String "under construction", or null */ private synchronized Object getIndex(DocumentInfo doc, int keyFingerprint, AtomicType itemType) { if (docIndexes==null) { // it's transient, so it will be null when reloading a compiled stylesheet docIndexes = new HashMap<DocumentInfo, HashMap<Long, Object>>(10); } HashMap<Long, Object> docIndex = docIndexes.get(doc); if (docIndex==null) return null; return docIndex.get(Long.valueOf(((long)keyFingerprint)<<32 | itemType.getFingerprint())); } } // This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.