package client.net.sf.saxon.ce.expr; import client.net.sf.saxon.ce.event.SequenceReceiver; import client.net.sf.saxon.ce.expr.instruct.Block; import client.net.sf.saxon.ce.expr.instruct.Choose; import client.net.sf.saxon.ce.expr.instruct.ValueOf; import client.net.sf.saxon.ce.om.Item; import client.net.sf.saxon.ce.om.NodeInfo; import client.net.sf.saxon.ce.om.SequenceIterator; import client.net.sf.saxon.ce.pattern.NodeKindTest; import client.net.sf.saxon.ce.trans.XPathException; import client.net.sf.saxon.ce.tree.util.FastStringBuffer; import client.net.sf.saxon.ce.tree.util.Orphan; import client.net.sf.saxon.ce.type.ItemType; import client.net.sf.saxon.ce.type.Type; import client.net.sf.saxon.ce.type.TypeHierarchy; import client.net.sf.saxon.ce.value.Cardinality; /** * This class performs the first phase of processing in "constructing simple content": * it takes an input sequence, eliminates empty text nodes, and combines adjacent text nodes * into one. * @since 9.3 */ public class AdjacentTextNodeMerger extends UnaryExpression { public AdjacentTextNodeMerger(Expression p0) { super(p0); } @Override public Expression typeCheck(ExpressionVisitor visitor, ItemType contextItemType) throws XPathException { Expression e = super.typeCheck(visitor, contextItemType); if (e != this) { return e; } // This wrapper expression is unnecessary if the base expression cannot return text nodes, // or if it can return at most one item TypeHierarchy th = visitor.getConfiguration().getTypeHierarchy(); if (th.relationship(getBaseExpression().getItemType(th), NodeKindTest.TEXT) == TypeHierarchy.DISJOINT) { return getBaseExpression(); } if (!Cardinality.allowsMany(getBaseExpression().getCardinality())) { return getBaseExpression(); } // In a choose expression, we can push the wrapper down to the action branches (whence it may disappear) if (getBaseExpression() instanceof Choose) { Choose choose = (Choose) getBaseExpression(); Expression[] actions = choose.getActions(); for (int i=0; i<actions.length; i++) { AdjacentTextNodeMerger atm2 = new AdjacentTextNodeMerger(actions[i]); actions[i] = atm2.typeCheck(visitor, contextItemType); } return choose; } // In a Block expression, check whether adjacent text nodes can occur (used in test strmode089) // Code deleted: if (getBaseExpression() instanceof Block) { Block block = (Block) getBaseExpression(); Expression[] actions = block.getChildren(); boolean prevtext = false; boolean needed = false; boolean maybeEmpty = false; for (int i=0; i<actions.length; i++) { boolean maybetext; if (actions[i] instanceof ValueOf) { maybetext = true; Expression content = ((ValueOf)actions[i]).getContentExpression(); if (content instanceof StringLiteral) { // if it's empty, we could remove it now, but that's awkward and probably doesn't happen maybeEmpty |= ((StringLiteral)content).getStringValue().length() == 0; } else { maybeEmpty = true; } } else { maybetext = th.relationship(actions[i].getItemType(th), NodeKindTest.TEXT) != TypeHierarchy.DISJOINT; maybeEmpty |= maybetext; } if (prevtext && maybetext) { needed = true; break; // may contain adjacent text nodes } if (maybetext && Cardinality.allowsMany(actions[i].getCardinality())) { needed = true; break; // may contain adjacent text nodes } prevtext = maybetext; } if (!needed) { // We don't need to merge adjacent text nodes, we only need to remove empty ones. if (maybeEmpty) { return new EmptyTextNodeRemover(block); } else { return block; } } } return this; } /** * Determine the data type of the expression, if possible. The default * implementation for unary expressions returns the item type of the operand * @param th the type hierarchy cache * @return the item type of the items in the result sequence, insofar as this * is known statically. */ @Override public ItemType getItemType(TypeHierarchy th) { return getBaseExpression().getItemType(th); } @Override public int computeCardinality() { return getBaseExpression().getCardinality() | StaticProperty.ALLOWS_ZERO; } /** * An implementation of Expression must provide at least one of the methods evaluateItem(), iterate(), or process(). * This method indicates which of these methods is prefered. */ public int getImplementationMethod() { return Expression.PROCESS_METHOD | Expression.ITERATE_METHOD; } /** * Return an Iterator to iterate over the values of a sequence. The value of every * expression can be regarded as a sequence, so this method is supported for all * expressions. This default implementation handles iteration for expressions that * return singleton values: for non-singleton expressions, the subclass must * provide its own implementation. * @param context supplies the context for evaluation * @return a SequenceIterator that can be used to iterate over the result * of the expression * @throws client.net.sf.saxon.ce.trans.XPathException * if any dynamic error occurs evaluating the * expression */ @Override public SequenceIterator iterate(XPathContext context) throws XPathException { return new AdjacentTextNodeMergingIterator(getBaseExpression().iterate(context)); } /** * Process the instruction, without returning any tail calls * @param context The dynamic context, giving access to the current node, * the current variables, etc. */ public void process(XPathContext context, int locationId, int options) throws XPathException { SequenceReceiver out = context.getReceiver(); FastStringBuffer fsb = new FastStringBuffer(FastStringBuffer.MEDIUM); SequenceIterator iter = getBaseExpression().iterate(context); boolean prevText = false; while (true) { Item item = iter.next(); if (item == null) { break; } if (isTextNode(item)) { CharSequence s = item.getStringValueCS(); if (s.length() > 0) { fsb.append(s); prevText = true; } } else { if (prevText) { out.characters(fsb); } prevText = false; fsb.setLength(0); out.append(item, options); } } if (prevText) { out.characters(fsb); } } /** * AdjacentTextNodeMergingIterator is an iterator that eliminates zero-length text nodes * and merges adjacent text nodes from the underlying iterator */ public static class AdjacentTextNodeMergingIterator implements SequenceIterator { private SequenceIterator base; private Item current; private Item next; private int position = 0; public AdjacentTextNodeMergingIterator(SequenceIterator base) throws XPathException { this.base = base; next = base.next(); } public Item next() throws XPathException { current = next; if (current == null) { position = -1; return null; } if (next != null) { next = base.next(); } if (isTextNode(current)) { FastStringBuffer fsb = new FastStringBuffer(FastStringBuffer.MEDIUM); fsb.append(current.getStringValueCS()); while (next != null && isTextNode(next)) { fsb.append(next.getStringValueCS()); next = base.next(); } if (fsb.length() == 0) { return next(); } else { Orphan o = new Orphan(((NodeInfo)current).getConfiguration()); o.setNodeKind(Type.TEXT); o.setStringValue(fsb); current = o; position++; return current; } } else { position++; return current; } } public Item current() { return current; } public int position() { return position; } public SequenceIterator getAnother() throws XPathException { return new AdjacentTextNodeMergingIterator(base.getAnother()); } public int getProperties() { return 0; } } /** * Ask whether an item is a text node * @param item the item in question * @return true if the item is a node of kind text */ public static boolean isTextNode(Item item) { return item instanceof NodeInfo && ((NodeInfo)item).getNodeKind() == Type.TEXT; } } // This Source Code Form is subject to the terms of the Mozilla Public License, v. 2.0. // If a copy of the MPL was not distributed with this file, You can obtain one at http://mozilla.org/MPL/2.0/. // This Source Code Form is “Incompatible With Secondary Licenses”, as defined by the Mozilla Public License, v. 2.0.