package org.molgenis.data.elasticsearch.request; import org.elasticsearch.action.search.SearchRequestBuilder; import org.elasticsearch.search.aggregations.AbstractAggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.nested.NestedBuilder; import org.elasticsearch.search.aggregations.bucket.nested.ReverseNestedBuilder; import org.elasticsearch.search.aggregations.metrics.cardinality.CardinalityBuilder; import org.molgenis.data.elasticsearch.index.MappingsBuilder; import org.molgenis.data.meta.AttributeType; import org.molgenis.data.meta.model.Attribute; import java.util.ArrayList; import java.util.Collections; import java.util.LinkedList; import java.util.List; import static java.lang.Integer.MAX_VALUE; import static org.molgenis.data.support.EntityTypeUtils.isReferenceType; public class AggregateQueryGenerator { public static final String AGGREGATION_MISSING_POSTFIX = "_missing"; public static final String AGGREGATION_REVERSE_POSTFIX = "_reverse"; public static final String AGGREGATION_NESTED_POSTFIX = "_nested"; public static final String AGGREGATION_DISTINCT_POSTFIX = "_distinct"; public static final String AGGREGATION_TERMS_POSTFIX = "_terms"; public void generate(SearchRequestBuilder searchRequestBuilder, Attribute aggAttr1, Attribute aggAttr2, Attribute aggAttrDistinct) { // validate request if (aggAttr1 == null) { throw new IllegalArgumentException("Aggregation requires at least one isAggregatable attribute"); } if (!aggAttr1.isAggregatable()) { throw new IllegalArgumentException("Attribute is not isAggregatable [ " + aggAttr1.getName() + "]"); } if (aggAttr2 != null && !aggAttr2.isAggregatable()) { throw new IllegalArgumentException("Attribute is not isAggregatable [ " + aggAttr2.getName() + "]"); } if (aggAttrDistinct != null && aggAttrDistinct.isNillable()) { // see: https://github.com/molgenis/molgenis/issues/1938 throw new IllegalArgumentException("Distinct isAggregatable attribute cannot be nillable"); } AttributeType dataType1 = aggAttr1.getDataType(); if (aggAttr1.isNillable() && isReferenceType(aggAttr1)) { // see: https://github.com/molgenis/molgenis/issues/1937 throw new IllegalArgumentException( "Aggregatable attribute of type [" + dataType1 + "] cannot be nillable"); } if (aggAttr2 != null) { // see: https://github.com/molgenis/molgenis/issues/1937 AttributeType dataType2 = aggAttr2.getDataType(); if (aggAttr2.isNillable() && isReferenceType(aggAttr2)) { throw new IllegalArgumentException( "Aggregatable attribute of type [" + dataType2 + "] cannot be nillable"); } } // collect aggregates searchRequestBuilder.setSize(0); LinkedList<Attribute> aggAttrs = new LinkedList<Attribute>(); aggAttrs.add(aggAttr1); if (aggAttr2 != null) { aggAttrs.add(aggAttr2); } List<AggregationBuilder<?>> aggregationBuilders = createAggregations(aggAttrs, null, aggAttrDistinct); // add all aggregations to builder for (AggregationBuilder<?> aggregationBuilder : aggregationBuilders) { searchRequestBuilder.addAggregation(aggregationBuilder); } } private List<AggregationBuilder<?>> createAggregations(LinkedList<Attribute> attrs, Attribute parentAttr, Attribute distinctAttr) { Attribute attr = attrs.pop(); List<AggregationBuilder<?>> aggs = new ArrayList<AggregationBuilder<?>>(); // term aggregation String termsAggName = attr.getName() + AGGREGATION_TERMS_POSTFIX; String termsAggFieldName = getAggregateFieldName(attr); AggregationBuilder<?> termsAgg = AggregationBuilders.terms(termsAggName).size(MAX_VALUE) .field(termsAggFieldName); aggs.add(termsAgg); // missing term aggregation if (attr.isNillable()) { String missingAggName = attr.getName() + AGGREGATION_MISSING_POSTFIX; String missingAggFieldName = attr.getName(); AggregationBuilder<?> missingTermsAgg = AggregationBuilders.missing(missingAggName) .field(missingAggFieldName); aggs.add(missingTermsAgg); } // add distinct term aggregations if (attrs.isEmpty() && distinctAttr != null) { // http://www.elasticsearch.org/guide/en/elasticsearch/reference/1.x/search-aggregations-metrics-cardinality-aggregation.html // The precision_threshold options allows to trade memory for accuracy, and defines a unique count below // which counts are expected to be close to accurate. Above this value, counts might become a bit more // fuzzy. The maximum supported value is 40000, thresholds above this number will have the same effect as a // threshold of 40000. String cardinalityAggName = distinctAttr.getName() + AGGREGATION_DISTINCT_POSTFIX; String cardinalityAggFieldName = getAggregateFieldName(distinctAttr); CardinalityBuilder distinctAgg = AggregationBuilders.cardinality(cardinalityAggName) .field(cardinalityAggFieldName).precisionThreshold(40000L); // CardinalityBuilder does not implement AggregationBuilder interface, so we need some more code AbstractAggregationBuilder wrappedDistinctAgg; if (isNestedType(distinctAttr)) { String nestedAggName = distinctAttr.getName() + AGGREGATION_NESTED_POSTFIX; String nestedAggFieldName = distinctAttr.getName(); NestedBuilder nestedBuilder = AggregationBuilders.nested(nestedAggName).path(nestedAggFieldName); nestedBuilder.subAggregation(distinctAgg); if (isNestedType(attr)) { String reverseAggName = attr.getName() + AggregateQueryGenerator.AGGREGATION_REVERSE_POSTFIX; ReverseNestedBuilder reverseNestedBuilder = AggregationBuilders.reverseNested(reverseAggName); reverseNestedBuilder.subAggregation(nestedBuilder); wrappedDistinctAgg = reverseNestedBuilder; } else { wrappedDistinctAgg = nestedBuilder; } } else { if (isNestedType(attr)) { String reverseAggName = attr.getName() + AggregateQueryGenerator.AGGREGATION_REVERSE_POSTFIX; ReverseNestedBuilder reverseNestedBuilder = AggregationBuilders.reverseNested(reverseAggName); reverseNestedBuilder.subAggregation(distinctAgg); wrappedDistinctAgg = reverseNestedBuilder; } else { wrappedDistinctAgg = distinctAgg; } } // add wrapped distinct term aggregation to aggregations for (AggregationBuilder<?> agg : aggs) { agg.subAggregation(wrappedDistinctAgg); } } // add sub aggregations if (!attrs.isEmpty()) { List<AggregationBuilder<?>> subAggs = createAggregations(attrs, attr, distinctAttr); for (AggregationBuilder<?> agg : aggs) { for (AggregationBuilder<?> subAgg : subAggs) { agg.subAggregation(subAgg); } } } // wrap in nested aggregation is this aggregation is nested if (isNestedType(attr)) { String nestedAggName = attr.getName() + AGGREGATION_NESTED_POSTFIX; String nestedAggFieldName = attr.getName(); NestedBuilder nestedAgg = AggregationBuilders.nested(nestedAggName).path(nestedAggFieldName); for (AggregationBuilder<?> agg : aggs) { nestedAgg.subAggregation(agg); } aggs = Collections.<AggregationBuilder<?>>singletonList(nestedAgg); } // wrap in reverse nested aggregation if parent aggregation is nested if (parentAttr != null && isNestedType(parentAttr)) { String reverseAggName = parentAttr.getName() + AggregateQueryGenerator.AGGREGATION_REVERSE_POSTFIX; ReverseNestedBuilder reverseNestedAgg = AggregationBuilders.reverseNested(reverseAggName); for (AggregationBuilder<?> agg : aggs) { reverseNestedAgg.subAggregation(agg); } aggs = Collections.<AggregationBuilder<?>>singletonList(reverseNestedAgg); } return aggs; } public static boolean isNestedType(Attribute attr) { return isReferenceType(attr); } private String getAggregateFieldName(Attribute attr) { String attrName = attr.getName(); AttributeType dataType = attr.getDataType(); switch (dataType) { case BOOL: case INT: case LONG: case DECIMAL: return attrName; case DATE: case DATE_TIME: case EMAIL: case ENUM: case HTML: case HYPERLINK: case SCRIPT: case STRING: case TEXT: // use non-analyzed field return attrName + '.' + MappingsBuilder.FIELD_NOT_ANALYZED; case CATEGORICAL: case CATEGORICAL_MREF: case XREF: case MREF: case FILE: // use id attribute of nested field return attrName + '.' + getAggregateFieldName(attr.getRefEntity().getIdAttribute()); case COMPOUND: throw new UnsupportedOperationException(); default: throw new RuntimeException("Unknown data type [" + dataType + "]"); } } }