package lux.index.analysis; import java.io.IOException; import java.util.Map; import lux.index.attribute.QNameAttribute; import lux.xml.QName; import org.apache.lucene.analysis.TokenFilter; import org.apache.lucene.analysis.TokenStream; import org.apache.lucene.analysis.tokenattributes.CharTermAttribute; import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; import org.apache.lucene.util.CharsRef; /** * Expand the input term by adding additional terms at the same position, prefixed by the node names (QNames) * found in the QNameAttribute. The node name is serialized in reverse-Clark format: localname{namespace-uri} * if processing is namespace-aware. Otherwise the node name is serialized as a lexical QName: prefix:localname * without regard to any namespace uri binding. * TODO: remove the unused namespace-unaware processing, or put it in another class? */ final public class QNameTokenFilter extends TokenFilter { private final QNameAttribute qnameAtt = addAttribute(QNameAttribute.class); private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posAtt = addAttribute(PositionIncrementAttribute.class); private final ElementVisibility defVis; private final Map<String,ElementVisibility> elVis; private boolean namespaceAware; private CharsRef term; protected QNameTokenFilter(TokenStream input) { this (input, ElementVisibility.OPAQUE, null); } protected QNameTokenFilter(TokenStream input, ElementVisibility defVis, Map<String,ElementVisibility> elVis) { super(input); term = new CharsRef(); setNamespaceAware(true); this.defVis = defVis; this.elVis = elVis; } public final void reset (TokenStream inputAgain) { assert (input.getAttribute(CharTermAttribute.class) == inputAgain.getAttribute(CharTermAttribute.class)); } @Override public boolean incrementToken() throws IOException { if ((! qnameAtt.hasNext()) || qnameAtt.onFirst()) { if (!input.incrementToken()) { return false; } // make a copy of the current term so we can prefix it below term.copyChars(termAtt.buffer(), 0, termAtt.length()); } else { // set posIncr = 0 if this is not the first token emitted for this term posAtt.setPositionIncrement(0); } // emit <qname>:<term> QName qname = qnameAtt.next(); termAtt.setEmpty(); if (namespaceAware) { termAtt.append(qname.getEncodedName()); } else { if (qname.getPrefix().length() > 0) { termAtt.append(qname.getPrefix()).append(':'); } termAtt.append(qname.getLocalPart()); } termAtt.append(':'); termAtt.append(term); return true; } /** * @return if true, indexed QNames include the namespace URI; otherwise they include the prefix. */ public boolean isNamespaceAware() { return namespaceAware; } public void setNamespaceAware(boolean namespaceAware) { this.namespaceAware = namespaceAware; } public ElementVisibility getDefaultVisibility() { return defVis; } public Map<String, ElementVisibility> getElementVisibility() { return elVis; } public TokenStream getInput () { return input; } } /* * This Source Code Form is subject to the terms of the Mozilla Public License, * v. 2.0. If a copy of the MPL was not distributed with this file, You can * obtain one at http://mozilla.org/MPL/2.0/. */