package lux.query.parser;
import java.io.IOException;
import java.io.StringReader;
import java.util.HashMap;
import java.util.Map;
import lux.index.analysis.WildcardAnalyzer;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.xml.DOMUtils;
import org.apache.lucene.queryparser.xml.ParserException;
import org.apache.lucene.queryparser.xml.QueryBuilder;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.WildcardQuery;
import org.w3c.dom.Element;
public class NodeQueryBuilder implements QueryBuilder {
private final Map<String,String> nsMap;
private final Analyzer analyzer;
private final Analyzer wildcardAnalyzer;
private final boolean namespaceAware;
public NodeQueryBuilder(Analyzer analyzer, boolean namespaceAware) {
this.analyzer = analyzer;
this.wildcardAnalyzer = new WildcardAnalyzer();
nsMap = new HashMap<String, String>();
this.namespaceAware = namespaceAware;
}
@Override
public Query getQuery(Element e) throws ParserException {
String fieldName=DOMUtils.getAttributeWithInheritanceOrFail(e,"fieldName");
String qName=DOMUtils.getAttributeWithInheritance(e,"qName");
String text=DOMUtils.getText(e);
float boost = DOMUtils.getAttribute (e, "boost", 1.0f);
return parseQueryTerm(fieldName, qName, text, boost);
}
void bindNamespacePrefix (String prefix, String namespaceURI) {
if (StringUtils.isEmpty(namespaceURI)) {
nsMap.remove(prefix);
} else {
nsMap.put(prefix, namespaceURI);
}
}
void clearNamespaces () {
nsMap.clear();
}
Query parseQueryTerm(final String fieldName, final String qName, final String text, final float boost) throws ParserException {
StringBuilder termText = new StringBuilder();
boolean isWild = false;
if (StringUtils.isNotEmpty(qName)) {
if (qName.matches("[^{:]+:.*")) {
String[] parts = qName.split(":", 2);
String prefix = parts[0];
String name = parts[1];
if ("*".equals(prefix)) {
termText.append(name).append("*:");
isWild = true;
} else {
String namespaceURI = nsMap.get(prefix);
if (namespaceURI == null) {
if (namespaceAware) {
throw new ParserException ("unbound namespace prefix '" + prefix + "'");
}
termText.append(qName).append(':');
} else {
termText.append(name).append('{').append(namespaceURI).append("}:");
}
}
}
else {
termText.append(qName).append(':');
}
}
int prefixLength = termText.length();
Analyzer termAnalyzer;
if ((text.indexOf('*') >= 0 || text.indexOf('?') >= 0)) {
if (text.matches(".*\\s.*")) {
// warning? we just ignore the "wildcards" if this is a phrase query
termAnalyzer = analyzer;
} else {
isWild = true;
termAnalyzer = wildcardAnalyzer;
}
} else {
termAnalyzer = analyzer;
}
// We assume that we are never embedded in a SpanQuery of any sort, which is valid
// because the query syntax doesn't provide any means of specifying Spans.
PhraseQuery pq=new PhraseQuery();
Term term = null;
try {
TokenStream ts = termAnalyzer.tokenStream(fieldName, new StringReader(text));
CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
ts.reset();
if (ts.incrementToken()) {
termText.append(termAtt.buffer(), 0, termAtt.length());
term = new Term(fieldName, termText.toString());
pq.add(term);
while (ts.incrementToken()) {
termText.setLength(prefixLength);
termText.append(termAtt.buffer(), 0, termAtt.length());
// create from previous to save fieldName.intern overhead
term = new Term (fieldName, termText.toString());
pq.add(term);
}
}
ts.end();
ts.close();
}
catch (IOException ioe) { }
Query q;
if (pq.getTerms().length > 1) {
if (isWild) {
throw new ParserException("wildcarded namespace prefix cannot be combined with a multi-word phrase");
}
q = pq;
} else {
if (term == null) {
// if the analyzer threw all the text away, or it was empty to begin with
// q = new WildcardQuery (new Term(fieldName, termText.toString() + "*"));
// q = new TermQuery (new Term(fieldName, termText.toString()));
// TODO: if we have value indexes, we could search explicitly for the empty value
return new MatchAllDocsQuery();
}
else if (isWild) {
q = new WildcardQuery(term);
} else {
q = new TermQuery (term);
}
}
q.setBoost(boost);
return q;
}
}