/* * eXist Open Source Native XML Database * Copyright (C) 2001-09 The eXist Project * http://exist-db.org * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public License * as published by the Free Software Foundation; either version 2 * of the License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id$ */ package org.exist.xquery.functions.text; import java.util.ArrayList; import java.util.List; import org.exist.dom.Match; import org.exist.dom.NodeProxy; import org.exist.dom.QName; import org.exist.dom.TextImpl; import org.exist.memtree.MemTreeBuilder; import org.exist.util.FastQSort; import org.exist.util.XMLString; import org.exist.xquery.BasicFunction; import org.exist.xquery.Cardinality; import org.exist.xquery.FunctionCall; import org.exist.xquery.FunctionSignature; import org.exist.xquery.XPathException; import org.exist.xquery.XQueryContext; import org.exist.xquery.value.FunctionParameterSequenceType; import org.exist.xquery.value.FunctionReturnSequenceType; import org.exist.xquery.value.FunctionReference; import org.exist.xquery.value.NodeValue; import org.exist.xquery.value.Sequence; import org.exist.xquery.value.SequenceIterator; import org.exist.xquery.value.SequenceType; import org.exist.xquery.value.StringValue; import org.exist.xquery.value.Type; import org.exist.xquery.value.ValueSequence; import org.w3c.dom.DOMException; public class HighlightMatches extends BasicFunction { public final static FunctionSignature signature = new FunctionSignature( new QName("highlight-matches", TextModule.NAMESPACE_URI, TextModule.PREFIX), "Highlight matching strings within text nodes that resulted from a fulltext search. " + "When searching with one of the fulltext operators or functions, eXist keeps track of " + "the fulltext matches within the text. Usually, the serializer will mark those matches by enclosing them " + "into an 'exist:match' element. One can then use an XSLT stylesheet to replace those match elements " + "and highlight matches to the user. However, this is not always possible, so Instead of using an XSLT " + "to post-process the serialized output, the " + "highlight-matches function provides direct access to the matching portions of the text within XQuery. " + "The function takes a sequence of text nodes as first argument $source and a callback function (defined with " + "util:function) as second parameter. $parameters may contain a sequence of additional values that will be passed " + "to the callback functions third parameter. Text nodes without matches will be returned as they are. However, " + "if the text contains a match marker, the matching character sequence is reported to the callback function, and the " + "result of the function call is inserted into the resulting node set where the matching sequence occurred. For example, " + "you can use this to mark all matching terms with a <span class=\"highlight\">abc</span>.", new SequenceType[]{ new FunctionParameterSequenceType("source", Type.TEXT, Cardinality.ZERO_OR_MORE, "The sequence of text nodes"), new FunctionParameterSequenceType("callback-function-ref", Type.FUNCTION_REFERENCE, Cardinality.EXACTLY_ONE, "The callback function (defined with util:function)"), new FunctionParameterSequenceType("parameters", Type.ITEM, Cardinality.ZERO_OR_MORE, "The sequence of additional values that will be passed to the callback functions third parameter.") }, new FunctionReturnSequenceType(Type.NODE, Cardinality.ZERO_OR_MORE, "the source with the added highlights")); //private final static QName MATCH_ELEMENT = new QName("match", Serializer.EXIST_NS, "exist"); public HighlightMatches(XQueryContext context) { super(context, signature); } public Sequence eval(Sequence[] args, Sequence contextSequence) throws XPathException { if (args[0].isEmpty()) return Sequence.EMPTY_SEQUENCE; FunctionReference func = (FunctionReference) args[1].itemAt(0); FunctionCall call = func.getFunctionCall(); context.pushDocumentContext(); MemTreeBuilder builder = context.getDocumentBuilder(); ValueSequence result = new ValueSequence(); for (SequenceIterator i = args[0].iterate(); i.hasNext(); ) { NodeValue v = (NodeValue) i.nextItem(); if (v.getImplementationType() == NodeValue.IN_MEMORY_NODE) { result.add(v); } else { NodeProxy p = (NodeProxy) v; processText(builder, p, result, call, args[2]); } } context.popDocumentContext(); return result; } private final void processText(MemTreeBuilder builder, NodeProxy proxy, Sequence result, FunctionCall callback, Sequence extraArgs) throws DOMException, XPathException { TextImpl text = (TextImpl) proxy.getNode(); Match match = proxy.getMatches(); int nodeNr; if (match == null) { nodeNr = builder.characters(text.getXMLString()); result.add(builder.getDocument().getNode(nodeNr)); } else { List offsets = null; Match next = match; while (next != null) { if (next.getNodeId().equals(text.getNodeId())) { if (offsets == null) offsets = new ArrayList(); int freq = next.getFrequency(); for (int i = 0; i < freq; i++) { offsets.add(next.getOffset(i)); } } next = next.getNextMatch(); } if (offsets != null) { FastQSort.sort(offsets, 0, offsets.size() - 1); XMLString str = text.getXMLString(); Match.Offset offset; int pos = 0; for (int i = 0; i < offsets.size(); i++) { offset = (Match.Offset) offsets.get(i); if (offset.getOffset() > pos) { nodeNr = builder.characters(str.substring(pos, offset.getOffset() - pos)); result.add(builder.getDocument().getNode(nodeNr)); } Sequence params[] = { new StringValue(str.substring(offset.getOffset(), offset.getLength())), proxy, extraArgs }; result.addAll(callback.evalFunction(null, null, params)); pos = offset.getOffset() + offset.getLength(); } if (pos < str.length()) { nodeNr = builder.characters(str.substring(pos, str.length() - pos)); result.add(builder.getDocument().getNode(nodeNr)); } } else { nodeNr = builder.characters(text.getXMLString()); result.add(builder.getDocument().getNode(nodeNr)); } } } }