NodeQueryBuilder.java example

Explorer
lux-master
- src
package lux.query.parser;

import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;

import lux.index.analysis.WildcardAnalyzer;

import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.QueryBuilder;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.w3c.dom.Element;

public class NodeQueryBuilder implements QueryBuilder {

    private final Map<String,String> nsMap;
    private final Analyzer analyzer;
    private final Analyzer wildcardAnalyzer;
    private final boolean namespaceAware;
    
    public NodeQueryBuilder(Analyzer analyzer, boolean namespaceAware) {
        this.analyzer = analyzer;
        this.wildcardAnalyzer = new WildcardAnalyzer();
        nsMap = new HashMap<String, String>();
        this.namespaceAware = namespaceAware;
    }

    @Override
    public Query getQuery(Element e) throws ParserException {
        String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
        String qName=DOMUtils.getAttributeWithInheritance(e,"qName");
        String text=DOMUtils.getText(e);
        float boost = DOMUtils.getAttribute (e, "boost", 1.0f);
        return parseQueryTerm(fieldName, qName, text, boost);
    }
    
    void bindNamespacePrefix (String prefix, String namespaceURI) {
        if (StringUtils.isEmpty(namespaceURI)) {
            nsMap.remove(prefix);
        } else {
            nsMap.put(prefix, namespaceURI);
        }
    }
    
    void clearNamespaces () {
        nsMap.clear();
    }
    
    Query parseQueryTerm(final String fieldName, final String qName, final String text, final float boost) throws ParserException {
        StringBuilder termText = new StringBuilder();
        boolean isWild = false;
        if (StringUtils.isNotEmpty(qName)) {
            if (qName.matches("[^{:]+:.*")) {
                String[] parts = qName.split(":", 2);
                String prefix = parts[0];
                String name = parts[1];
                if ("*".equals(prefix)) {
                    termText.append(name).append("*:");
                    isWild = true;
                } else {
                    String namespaceURI = nsMap.get(prefix);
                    if (namespaceURI == null) {
                        if (namespaceAware) {
                            throw new ParserException ("unbound namespace prefix '" + prefix + "'");
                        }
                        termText.append(qName).append(':');
                    } else {
                        termText.append(name).append('{').append(namespaceURI).append("}:");
                    }
                }
            } 
            else {
                termText.append(qName).append(':');
            }
        }
        int prefixLength = termText.length();

        Analyzer termAnalyzer;
        if ((text.indexOf('*') >= 0 || text.indexOf('?') >= 0)) {
            if (text.matches(".*\\s.*")) {
                // warning? we just ignore the "wildcards" if this is a phrase query
                termAnalyzer = analyzer;
            } else {
                isWild = true;
                termAnalyzer = wildcardAnalyzer;
            }
        } else {
            termAnalyzer = analyzer;
        }
        // We assume that we are never embedded in a SpanQuery of any sort, which is valid
        // because the query syntax doesn't provide any means of specifying Spans.
        PhraseQuery pq=new PhraseQuery();
        Term term = null;
        try {
            TokenStream ts = termAnalyzer.tokenStream(fieldName, new StringReader(text));
            CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
            ts.reset();
            if (ts.incrementToken()) {
                termText.append(termAtt.buffer(), 0, termAtt.length());
                term = new Term(fieldName, termText.toString());
                pq.add(term);
                while (ts.incrementToken()) {
                    termText.setLength(prefixLength);
                    termText.append(termAtt.buffer(), 0, termAtt.length());
                    // create from previous to save fieldName.intern overhead
                    term = new Term (fieldName, termText.toString()); 
                    pq.add(term);
                }
            }
            ts.end();
            ts.close();
        } 
        catch (IOException ioe) { }
        Query q;
        if (pq.getTerms().length > 1) {
            if (isWild) {
                throw new ParserException("wildcarded namespace prefix cannot be combined with a multi-word phrase");
            }
            q = pq;
        } else {
            if (term == null) {
                // if the analyzer threw all the text away, or it was empty to begin with
                // q = new WildcardQuery (new Term(fieldName, termText.toString() + "*"));
            	// q = new TermQuery (new Term(fieldName, termText.toString()));
            	// TODO: if we have value indexes, we could search explicitly for the empty value
            	return new MatchAllDocsQuery();
            }
            else if (isWild) {
                q = new WildcardQuery(term);
            } else {
                q = new TermQuery (term);
            }
        }
        q.setBoost(boost);
        return q;
    }

}