package ch.x42.terye.query;
import java.text.DateFormat;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.jcr.query.InvalidQueryException;
import ch.x42.terye.utils.DateUtils;
/**
* This class represents one query term (e.g. 'msg:hello' that will be
* translated into a Solr query statement.
*/
public class QueryTerm {
// a regex defining the format of allowed query terms
private static final Pattern pattern = Pattern
.compile("([-])?([a-zA-Z]*):(.*)");
private boolean negated;
private String property;
private String expression;
public QueryTerm(String statement) throws InvalidQueryException {
Matcher matcher = pattern.matcher(statement);
if (!matcher.matches()) {
// statement doesn't match the allowed format
throw new InvalidQueryException("Invalid query: " + statement);
}
negated = matcher.group(1) == null ? false : true;
property = matcher.group(2);
expression = matcher.group(3);
}
/**
* This method translates this term into and returns a Solr query string.
* Query terms represent a condition on a property name and since properties
* are stored as dynamic fields in Solr, the corresponding field name
* consists of the property name and a type suffix (e.g. "msg_String" for
* strings). Since we don't know the type of the property in advance (we
* cannot deduce it from the query), we must create a disjunctive Solr query
* listing all possible types like this:
*
* msg_String:hello OR msg_Boolean:hello OR msg_Long:hello OR ...
*
* Since some expressions cannot be converted to specific types (i.e. above
* 'hello' can not be converted to a long), we must anticipate and not
* include those types into the query (otherwise Solr throws an exception).
*
* XXX: this is hacky... better solution?
*/
public String toSolrQuery() {
List<String> terms = new LinkedList<String>();
// expressions are treated as strings by default
terms.add(makeTerm("_String"));
// all strings can be converted to booleans
terms.add(makeTerm("_Boolean"));
// check if expression can be parsed to a long
try {
Long.parseLong(expression);
terms.add(makeTerm("_Long"));
} catch (NumberFormatException e) {
}
// check if expression can be parsed to a double
try {
Double.parseDouble(expression);
terms.add(makeTerm("_Double"));
} catch (NumberFormatException e) {
}
// check if expression can be parsed to a date
try {
DateFormat formatter = new SimpleDateFormat(DateUtils.FORMAT);
formatter.parse(expression.replaceAll("\"", ""));
terms.add(makeTerm("_Date"));
} catch (ParseException e) {
}
// assemble disjunctive query
String query = "";
Iterator<String> iterator = terms.iterator();
while (iterator.hasNext()) {
query += iterator.next();
if (iterator.hasNext()) {
query += " OR ";
}
}
return query;
}
private String makeTerm(String suffix) {
String field = property + suffix;
if (negated) {
// for a negated query we only want the docs that contain
// the queried field but with a value other than 'expression'
// (we don't want the docs that don't even contain 'field')
return "(" + field + ":[* TO *] AND -" + field + ":" + expression
+ ")";
}
return field + ":" + expression;
}
}