package lux.index.analysis; import lux.xml.Offsets; import net.sf.saxon.s9api.Processor; import net.sf.saxon.s9api.XdmNode; import net.sf.saxon.s9api.XdmNodeKind; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.analysis.TokenStream; /** * Extracts tokens from an s9api XML document tree (XdmNode) in order to make them * available to Lucene classes that accept TokenStreams, like the indexer and highlighter. */ public final class XmlTextTokenStream extends TextOffsetTokenStream { /** * Creates a TokenStream returning tokens drawn from the text content of the document. * @param fieldName nominally: the field to be analyzed; the analyzer receives this when the * token stream is reset at node boundaries * @param analyzer specifies what text processing to apply to node text * @param wrapped a TokenStream generated by the analyzer * @param doc tokens will be drawn from all of the text in this document * @param offsets if provided, character offsets are captured in this object * In theory this can be used for faster highlighting, but until that is proven, * this should always be null. * @param processor the Saxon XPath/XQuery processor that created the document doc */ public XmlTextTokenStream(String fieldName, Analyzer analyzer, TokenStream wrapped, XdmNode doc, Offsets offsets, Processor processor) { super(fieldName, analyzer, wrapped, doc, offsets, processor); if (qnameTokenFilter == wrapped) { // don't add qnames to our tokens setWrappedTokenStream (qnameTokenFilter.getInput()); } contentIter = new TextIterator(doc); } @Override public boolean updateNodeAtts() { AncestorIterator nodeAncestors = new AncestorIterator(curNode); while (nodeAncestors.hasNext()) { XdmNode e = (XdmNode) nodeAncestors.next(); assert (e.getNodeKind() == XdmNodeKind.ELEMENT); int namecode = e.getUnderlyingNode().getNameCode(); if (eltVis.get(namecode) == ElementVisibility.HIDDEN) { return false; } } return true; } } /* * This Source Code Form is subject to the terms of the Mozilla Public License, * v. 2.0. If a copy of the MPL was not distributed with this file, You can * obtain one at http://mozilla.org/MPL/2.0/. */