package org.molgenis.data.elasticsearch.request;
import com.google.common.collect.Iterables;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.index.query.*;
import org.molgenis.data.*;
import org.molgenis.data.QueryRule.Operator;
import org.molgenis.data.elasticsearch.index.MappingsBuilder;
import org.molgenis.data.meta.AttributeType;
import org.molgenis.data.meta.model.Attribute;
import org.molgenis.data.meta.model.EntityType;
import org.molgenis.util.MolgenisDateFormat;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import static java.lang.String.format;
import static org.molgenis.data.elasticsearch.index.ElasticsearchIndexCreator.DEFAULT_ANALYZER;
/**
* Creates Elasticsearch query from MOLGENIS query
*/
public class QueryGenerator implements QueryPartGenerator
{
static final String ATTRIBUTE_SEPARATOR = ".";
@Override
public void generate(SearchRequestBuilder searchRequestBuilder, Query<Entity> query, EntityType entityType)
{
List<QueryRule> queryRules = query.getRules();
if (queryRules == null || queryRules.isEmpty()) return;
QueryBuilder q = createQueryBuilder(queryRules, entityType);
searchRequestBuilder.setQuery(q);
}
public QueryBuilder createQueryBuilder(List<QueryRule> queryRules, EntityType entityType)
{
QueryBuilder queryBuilder;
final int nrQueryRules = queryRules.size();
if (nrQueryRules == 1)
{
// simple query consisting of one query clause
queryBuilder = createQueryClause(queryRules.get(0), entityType);
}
else
{
// boolean query consisting of combination of query clauses
Operator occur = null;
BoolQueryBuilder boolQuery = QueryBuilders.boolQuery();
for (int i = 0; i < nrQueryRules; i += 2)
{
QueryRule queryRule = queryRules.get(i);
// determine whether this query is a 'not' query
if (queryRule.getOperator() == Operator.NOT)
{
occur = Operator.NOT;
queryRule = queryRules.get(i + 1);
i += 1;
}
else if (i + 1 < nrQueryRules)
{
QueryRule occurQueryRule = queryRules.get(i + 1);
Operator occurOperator = occurQueryRule.getOperator();
if (occurOperator == null) throw new MolgenisQueryException("Missing expected occur operator");
switch (occurOperator)
{
case AND:
case OR:
if (occur != null && occurOperator != occur)
{
throw new MolgenisQueryException(
"Mixing query operators not allowed, use nested queries");
}
occur = occurOperator;
break;
// $CASES-OMITTED$
default:
throw new MolgenisQueryException(
"Expected query occur operator instead of [" + occurOperator + "]");
}
}
QueryBuilder queryPartBuilder = createQueryClause(queryRule, entityType);
if (queryPartBuilder == null) continue; // skip SHOULD and DIS_MAX query rules
// add query part to query
switch (occur)
{
case AND:
boolQuery.must(queryPartBuilder);
break;
case OR:
boolQuery.should(queryPartBuilder).minimumNumberShouldMatch(1);
break;
case NOT:
boolQuery.mustNot(queryPartBuilder);
break;
// $CASES-OMITTED$
default:
throw new MolgenisQueryException("Unknown occurence operator [" + occur + "]");
}
}
queryBuilder = boolQuery;
}
return queryBuilder;
}
/**
* Create query clause for query rule
*
* @param queryRule
* @param entityType
* @return query class or null for SHOULD and DIS_MAX query rules
*/
@SuppressWarnings("unchecked")
private QueryBuilder createQueryClause(QueryRule queryRule, EntityType entityType)
{
// create query rule
String queryField = queryRule.getField();
Operator queryOperator = queryRule.getOperator();
Object queryValue = queryRule.getValue();
QueryBuilder queryBuilder;
switch (queryOperator)
{
case AND:
case OR:
case NOT:
throw new MolgenisQueryException("Unexpected query operator [" + queryOperator + ']');
case SHOULD:
BoolQueryBuilder boolQueryBuilder = QueryBuilders.boolQuery();
for (QueryRule subQuery : queryRule.getNestedRules())
{
boolQueryBuilder.should(createQueryClause(subQuery, entityType));
}
queryBuilder = boolQueryBuilder;
break;
case DIS_MAX:
DisMaxQueryBuilder disMaxQueryBuilder = QueryBuilders.disMaxQuery();
for (QueryRule subQuery : queryRule.getNestedRules())
{
disMaxQueryBuilder.add(createQueryClause(subQuery, entityType));
}
disMaxQueryBuilder.tieBreaker((float) 0.0);
if (queryRule.getValue() != null)
{
disMaxQueryBuilder.boost(Float.parseFloat(queryRule.getValue().toString()));
}
queryBuilder = disMaxQueryBuilder;
break;
case EQUALS:
{
// As a general rule, filters should be used instead of queries:
// - for binary yes/no searches
// - for queries on exact values
// Workaround for Elasticsearch Date to String conversion issue
if (queryValue instanceof Date)
{
String[] attributePath = parseAttributePath(queryField);
Attribute attr = getAttribute(entityType, attributePath);
queryValue = getESDateQueryValue((Date) queryValue, attr);
}
FilterBuilder filterBuilder;
String[] attributePath = parseAttributePath(queryField);
Attribute attr = getAttribute(entityType, attributePath);
// construct query part
if (queryValue != null)
{
AttributeType attrType = attr.getDataType();
switch (attrType)
{
case BOOL:
case DATE:
case DATE_TIME:
case DECIMAL:
case INT:
case LONG:
{
filterBuilder = FilterBuilders.termFilter(queryField, queryValue);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
break;
}
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case SCRIPT:
case STRING:
case TEXT:
{
filterBuilder = FilterBuilders
.termFilter(queryField + '.' + MappingsBuilder.FIELD_NOT_ANALYZED, queryValue);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
break;
}
case CATEGORICAL:
case CATEGORICAL_MREF:
case XREF:
case MREF:
case FILE:
case ONE_TO_MANY:
{
if (attributePath.length > 1)
throw new UnsupportedOperationException("Can not filter on references deeper than 1.");
// support both entity as entity id as value
Object queryIdValue = queryValue instanceof Entity ? ((Entity) queryValue)
.getIdValue() : queryValue;
Attribute refIdAttr = attr.getRefEntity().getIdAttribute();
String indexFieldName = getXRefEqualsInSearchFieldName(refIdAttr, queryField);
filterBuilder = FilterBuilders
.nestedFilter(queryField, FilterBuilders.termFilter(indexFieldName, queryIdValue));
break;
}
case COMPOUND:
throw new MolgenisQueryException(
format("Illegal attribute type [%s]", attrType.toString()));
default:
throw new RuntimeException(format("Unknown attribute type [%s]", attrType.toString()));
}
}
else
{
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case BOOL:
case DATE:
case DATE_TIME:
case DECIMAL:
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case INT:
case LONG:
case SCRIPT:
case STRING:
case TEXT:
filterBuilder = FilterBuilders.missingFilter(queryField).existence(true).nullValue(true);
break;
case CATEGORICAL:
case CATEGORICAL_MREF:
case FILE:
case MREF:
case XREF:
Attribute refIdAttr = attr.getRefEntity().getIdAttribute();
String indexFieldName = getXRefEqualsInSearchFieldName(refIdAttr, queryField);
// see https://github.com/elastic/elasticsearch/issues/3495
filterBuilder = FilterBuilders.notFilter(FilterBuilders
.nestedFilter(queryField, FilterBuilders.existsFilter(indexFieldName)));
break;
case COMPOUND:
throw new MolgenisQueryException(
"Illegal data type [" + dataType + "] for operator [" + queryOperator + "]");
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
}
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case GREATER:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
validateNumericalQueryField(queryField, entityType);
String[] attributePath = parseAttributePath(queryField);
// Workaround for Elasticsearch Date to String conversion issue
if (queryValue instanceof Date)
{
Attribute attr = getAttribute(entityType, attributePath);
queryValue = getESDateQueryValue((Date) queryValue, attr);
}
FilterBuilder filterBuilder = FilterBuilders.rangeFilter(queryField).gt(queryValue);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case GREATER_EQUAL:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
validateNumericalQueryField(queryField, entityType);
String[] attributePath = parseAttributePath(queryField);
// Workaround for Elasticsearch Date to String conversion issue
if (queryValue instanceof Date)
{
Attribute attr = getAttribute(entityType, attributePath);
queryValue = getESDateQueryValue((Date) queryValue, attr);
}
FilterBuilder filterBuilder = FilterBuilders.rangeFilter(queryField).gte(queryValue);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case IN:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
if (!(queryValue instanceof Iterable<?>))
{
throw new MolgenisQueryException(
"Query value must be a Iterable instead of [" + queryValue.getClass().getSimpleName()
+ "]");
}
Iterable<?> iterable = (Iterable<?>) queryValue;
String[] attributePath = parseAttributePath(queryField);
Attribute attr = getAttribute(entityType, attributePath);
AttributeType dataType = attr.getDataType();
FilterBuilder filterBuilder;
switch (dataType)
{
case BOOL:
case DATE:
case DATE_TIME:
case DECIMAL:
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case INT:
case LONG:
case SCRIPT:
case STRING:
case TEXT:
// note: inFilter expects array, not iterable
filterBuilder = FilterBuilders
.inFilter(getFieldName(attr, queryField), Iterables.toArray(iterable, Object.class));
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
break;
case CATEGORICAL:
case CATEGORICAL_MREF:
case MREF:
case XREF:
case FILE:
if (attributePath.length > 1)
throw new UnsupportedOperationException("Can not filter on references deeper than 1.");
// support both entity iterable as entity id iterable as value
Iterable<Object> idValues;
if (isEntityIterable(iterable))
{
idValues = Iterables.transform((Iterable<Entity>) iterable, Entity::getIdValue);
}
else
{
idValues = (Iterable<Object>) iterable;
}
// note: inFilter expects array, not iterable
filterBuilder = FilterBuilders.nestedFilter(queryField, FilterBuilders.inFilter(
getXRefEqualsInSearchFieldName(attr.getRefEntity().getIdAttribute(), queryField),
Iterables.toArray(idValues, Object.class)));
break;
case COMPOUND:
throw new MolgenisQueryException(
"Illegal data type [" + dataType + "] for operator [" + queryOperator + "]");
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case LESS:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
validateNumericalQueryField(queryField, entityType);
String[] attributePath = parseAttributePath(queryField);
// Workaround for Elasticsearch Date to String conversion issue
if (queryValue instanceof Date)
{
Attribute attr = getAttribute(entityType, attributePath);
queryValue = getESDateQueryValue((Date) queryValue, attr);
}
FilterBuilder filterBuilder = FilterBuilders.rangeFilter(queryField).lt(queryValue);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case LESS_EQUAL:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
validateNumericalQueryField(queryField, entityType);
String[] attributePath = parseAttributePath(queryField);
// Workaround for Elasticsearch Date to String conversion issue
if (queryValue instanceof Date)
{
Attribute attr = getAttribute(entityType, attributePath);
queryValue = getESDateQueryValue((Date) queryValue, attr);
}
FilterBuilder filterBuilder = FilterBuilders.rangeFilter(queryField).lte(queryValue);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case RANGE:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
if (!(queryValue instanceof Iterable<?>))
{
throw new MolgenisQueryException(
"Query value must be a Iterable instead of [" + queryValue.getClass().getSimpleName()
+ "]");
}
Iterable<?> iterable = (Iterable<?>) queryValue;
validateNumericalQueryField(queryField, entityType);
Iterator<?> iterator = iterable.iterator();
String[] attributePath = parseAttributePath(queryField);
Attribute attr = getAttribute(entityType, attributePath);
Object queryValueFrom = iterator.next();
// Workaround for Elasticsearch Date to String conversion issue
if (queryValueFrom instanceof Date)
{
queryValueFrom = getESDateQueryValue((Date) queryValueFrom, attr);
}
Object queryValueTo = iterator.next();
// Workaround for Elasticsearch Date to String conversion issue
if (queryValueTo instanceof Date)
{
queryValueTo = getESDateQueryValue((Date) queryValueTo, attr);
}
FilterBuilder filterBuilder = FilterBuilders.rangeFilter(queryField).gte(queryValueFrom)
.lte(queryValueTo);
filterBuilder = nestedFilterBuilder(attributePath, filterBuilder);
queryBuilder = QueryBuilders.filteredQuery(QueryBuilders.matchAllQuery(), filterBuilder);
break;
}
case NESTED:
List<QueryRule> nestedQueryRules = queryRule.getNestedRules();
if (nestedQueryRules == null || nestedQueryRules.isEmpty())
{
throw new MolgenisQueryException("Missing nested rules for nested query");
}
queryBuilder = createQueryBuilder(nestedQueryRules, entityType);
break;
case LIKE:
{
String[] attributePath = parseAttributePath(queryField);
Attribute attr = getAttribute(entityType, attributePath);
// construct query part
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case BOOL:
case DATE:
case DATE_TIME:
case DECIMAL:
case COMPOUND:
case INT:
case LONG:
throw new MolgenisQueryException(
"Illegal data type [" + dataType + "] for operator [" + queryOperator + "]");
case CATEGORICAL:
case CATEGORICAL_MREF:
case MREF:
case XREF:
case FILE:
case SCRIPT: // due to size would result in large amount of ngrams
case TEXT: // due to size would result in large amount of ngrams
case HTML: // due to size would result in large amount of ngrams
throw new UnsupportedOperationException(
"Query with operator [" + queryOperator + "] and data type [" + dataType
+ "] not supported");
case EMAIL:
case ENUM:
case HYPERLINK:
case STRING:
queryBuilder = QueryBuilders
.matchQuery(queryField + '.' + MappingsBuilder.FIELD_NGRAM_ANALYZED, queryValue)
.analyzer(DEFAULT_ANALYZER);
queryBuilder = nestedQueryBuilder(attributePath, queryBuilder);
break;
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
break;
}
case SEARCH:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
// 1. attribute: search in attribute
// 2. no attribute: search in all
if (queryField == null)
{
queryBuilder = QueryBuilders.matchPhraseQuery("_all", queryValue).slop(10);
}
else
{
String[] attributePath = parseAttributePath(queryField);
Attribute attr = getAttribute(entityType, attributePath);
// construct query part
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case BOOL:
throw new MolgenisQueryException(
"Cannot execute search query on [" + dataType + "] attribute");
case DATE:
case DATE_TIME:
case DECIMAL:
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case INT:
case LONG:
case SCRIPT:
case STRING:
case TEXT:
queryBuilder = QueryBuilders.matchQuery(queryField, queryValue);
queryBuilder = nestedQueryBuilder(attributePath, queryBuilder);
break;
case CATEGORICAL:
case CATEGORICAL_MREF:
case MREF:
case XREF:
case FILE:
if (attributePath.length > 1)
throw new UnsupportedOperationException("Can not filter on references deeper than 1.");
queryBuilder = QueryBuilders.nestedQuery(queryField,
QueryBuilders.matchQuery(queryField + '.' + "_all", queryValue));
break;
case COMPOUND:
throw new MolgenisQueryException(
"Illegal data type [" + dataType + "] for operator [" + queryOperator + "]");
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
}
break;
}
case FUZZY_MATCH:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
if (queryField == null)
{
queryBuilder = QueryBuilders.matchQuery("_all", queryValue);
}
else
{
Attribute attr = entityType.getAttribute(queryField);
if (attr == null) throw new UnknownAttributeException(queryField);
// construct query part
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case DATE:
case DATE_TIME:
case DECIMAL:
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case INT:
case LONG:
case SCRIPT:
case STRING:
case TEXT:
queryBuilder = QueryBuilders.queryStringQuery(queryField + ":(" + queryValue + ")");
break;
case MREF:
case XREF:
case CATEGORICAL:
case CATEGORICAL_MREF:
case FILE:
queryField = attr.getName() + "." + attr.getRefEntity().getLabelAttribute().getName();
queryBuilder = QueryBuilders.nestedQuery(attr.getName(),
QueryBuilders.queryStringQuery(queryField + ":(" + queryValue + ")"))
.scoreMode("max");
break;
case BOOL:
case COMPOUND:
throw new MolgenisQueryException(
"Illegal data type [" + dataType + "] for operator [" + queryOperator + "]");
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
}
break;
}
case FUZZY_MATCH_NGRAM:
{
if (queryValue == null) throw new MolgenisQueryException("Query value cannot be null");
if (queryField == null)
{
queryBuilder = QueryBuilders.matchQuery("_all", queryValue);
}
else
{
Attribute attr = entityType.getAttribute(queryField);
if (attr == null) throw new UnknownAttributeException(queryField);
// construct query part
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case DATE:
case DATE_TIME:
case DECIMAL:
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case INT:
case LONG:
case SCRIPT:
case STRING:
case TEXT:
queryField = queryField + ".ngram";
queryBuilder = QueryBuilders.queryStringQuery(queryField + ":(" + queryValue + ")");
break;
case MREF:
case XREF:
queryField =
attr.getName() + "." + attr.getRefEntity().getLabelAttribute().getName() + ".ngram";
queryBuilder = QueryBuilders.nestedQuery(attr.getName(),
QueryBuilders.queryStringQuery(queryField + ":(" + queryValue + ")"))
.scoreMode("max");
break;
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
}
break;
}
default:
throw new MolgenisQueryException("Unknown query operator [" + queryOperator + "]");
}
return queryBuilder;
}
private String getFieldName(Attribute attr, String queryField)
{
AttributeType dataType = attr.getDataType();
switch (dataType)
{
case XREF:
case CATEGORICAL:
case CATEGORICAL_MREF:
case MREF:
case FILE:
return queryField;
case BOOL:
case DATE:
case DATE_TIME:
case DECIMAL:
case INT:
case LONG:
return queryField;
case EMAIL:
case ENUM:
case HTML:
case HYPERLINK:
case SCRIPT:
case STRING:
case TEXT:
return new StringBuilder(queryField).append('.').append(MappingsBuilder.FIELD_NOT_ANALYZED).toString();
case COMPOUND:
throw new MolgenisQueryException("Illegal data type [" + dataType + "] not supported");
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
}
private String getXRefEqualsInSearchFieldName(Attribute refIdAttr, String queryField)
{
String indexFieldName = queryField + '.' + refIdAttr.getName();
return getFieldName(refIdAttr, indexFieldName);
}
private void validateNumericalQueryField(String queryField, EntityType entityType)
{
String[] attributePath = parseAttributePath(queryField);
AttributeType dataType = getAttribute(entityType, attributePath).getDataType();
switch (dataType)
{
case DATE:
case DATE_TIME:
case DECIMAL:
case INT:
case LONG:
break;
case BOOL:
case CATEGORICAL:
case CATEGORICAL_MREF:
case COMPOUND:
case EMAIL:
case ENUM:
case FILE:
case HTML:
case HYPERLINK:
case MREF:
case SCRIPT:
case STRING:
case TEXT:
case XREF:
throw new MolgenisQueryException("Range query not allowed for type [" + dataType + "]");
default:
throw new RuntimeException("Unknown data type [" + dataType + "]");
}
}
private boolean isEntityIterable(Iterable<?> iterable)
{
Iterator<?> it = iterable.iterator();
boolean isEntity = it.hasNext() && (it.next() instanceof Entity);
return isEntity;
}
private String[] parseAttributePath(String queryField)
{
return queryField.split("\\" + ATTRIBUTE_SEPARATOR);
}
/**
* Wraps the filter in a nested filter when a query is done on a reference entity. Returns the original filter when
* it is applied to the current entity.
*/
private FilterBuilder nestedFilterBuilder(String[] attributePath, FilterBuilder filterBuilder)
{
if (attributePath.length == 1)
{
return filterBuilder;
}
else if (attributePath.length == 2)
{
return FilterBuilders.nestedFilter(attributePath[0], filterBuilder);
}
else
{
throw new UnsupportedOperationException("Can not filter on references deeper than 1.");
}
}
/**
* Wraps the query in a nested query when a query is done on a reference entity. Returns the original query when it
* is applied to the current entity.
*/
private QueryBuilder nestedQueryBuilder(String[] attributePath, QueryBuilder queryBuilder)
{
if (attributePath.length == 1)
{
return queryBuilder;
}
else if (attributePath.length == 2)
{
return QueryBuilders.nestedQuery(attributePath[0], queryBuilder);
}
else
{
throw new UnsupportedOperationException("Can not filter on references deeper than 1.");
}
}
/**
* Returns the target attribute. Looks in the reference entity when it is a nested query.
*/
private Attribute getAttribute(EntityType entityType, String[] attributePath)
{
if (attributePath.length > 2)
throw new UnsupportedOperationException("Can not filter on references deeper than 1.");
if (attributePath.length == 0) throw new MolgenisQueryException("Attribute path length is 0!");
if (attributePath.length == 1)
{
Attribute attr = entityType.getAttribute(attributePath[0]);
if (attr == null) throw new UnknownAttributeException(attributePath[0]);
return attr;
}
else
{
Attribute attr = entityType.getAttribute(attributePath[0]);
if (attr == null) throw new UnknownAttributeException(attributePath[0]);
attr = attr.getRefEntity().getAttribute(attributePath[1]);
if (attr == null) throw new UnknownAttributeException(attributePath[0] + "." + attributePath[1]);
return attr;
}
}
private String getESDateQueryValue(Date queryValue, Attribute attr)
{
if (attr.getDataType() == AttributeType.DATE_TIME)
{
return MolgenisDateFormat.getDateTimeFormat().format(queryValue);
}
return MolgenisDateFormat.getDateFormat().format(queryValue);
}
}