/* * Copyright 2016 Red Hat, Inc. and/or its affiliates. *   * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at *   *    http://www.apache.org/licenses/LICENSE-2.0 *   * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.dashbuilder.dataprovider.backend.elasticsearch.rest.impl; import org.dashbuilder.dataprovider.backend.elasticsearch.ElasticSearchClientFactory; import org.dashbuilder.dataprovider.backend.elasticsearch.rest.ElasticSearchClient; import org.dashbuilder.dataprovider.backend.elasticsearch.rest.exception.ElasticSearchClientGenericException; import org.dashbuilder.dataprovider.backend.elasticsearch.rest.model.SearchRequest; import org.dashbuilder.dataprovider.backend.elasticsearch.rest.util.ElasticSearchUtils; import org.dashbuilder.dataset.ColumnType; import org.dashbuilder.dataset.DataColumn; import org.dashbuilder.dataset.DataSetMetadata; import org.dashbuilder.dataset.IntervalBuilderDynamicDate; import org.dashbuilder.dataset.date.DayOfWeek; import org.dashbuilder.dataset.date.Month; import org.dashbuilder.dataset.def.DataSetDef; import org.dashbuilder.dataset.def.ElasticSearchDataSetDef; import org.dashbuilder.dataset.group.*; import org.elasticsearch.script.Script; import org.elasticsearch.search.aggregations.AbstractAggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilder; import org.elasticsearch.search.aggregations.AggregationBuilders; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramBuilder; import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramInterval; import org.elasticsearch.search.aggregations.bucket.histogram.Histogram; import org.elasticsearch.search.aggregations.bucket.histogram.HistogramBuilder; import org.elasticsearch.search.aggregations.bucket.terms.Terms; import org.elasticsearch.search.aggregations.bucket.terms.TermsBuilder; import org.elasticsearch.search.aggregations.metrics.ValuesSourceMetricsAggregationBuilder; import org.elasticsearch.search.aggregations.metrics.min.MinBuilder; import java.text.MessageFormat; import java.util.*; /** * Helper class for the ELS native client that provides the different <code>AggregationBuilder</code>'s given a group operation. * * @since 0.5.0 */ public class NativeClientAggregationsBuilder { private final ElasticSearchClientFactory clientFactory; private final IntervalBuilderDynamicDate intervalBuilder; private final ElasticSearchUtils utils; private final DataSetMetadata metadata; private final List<DataColumn> columns; private final SearchRequest request; public NativeClientAggregationsBuilder(ElasticSearchClientFactory clientFactory, IntervalBuilderDynamicDate intervalBuilder, ElasticSearchUtils utils, DataSetMetadata metadata, List<DataColumn> columns, SearchRequest request) { this.clientFactory = clientFactory; this.intervalBuilder = intervalBuilder; this.utils = utils; this.metadata = metadata; this.columns = columns; this.request = request; } public List<AbstractAggregationBuilder> build( DataSetGroup groupOp ) throws ElasticSearchClientGenericException { ColumnGroup columnGroup = groupOp.getColumnGroup(); List<GroupFunction> groupFunctions = groupOp.getGroupFunctions(); List<GroupFunction> columnPickUps = new LinkedList<GroupFunction>(); // Group functions. final List<AbstractAggregationBuilder> aggregationBuilders = new LinkedList<>(); if (groupFunctions != null && !groupFunctions.isEmpty()) { for (GroupFunction groupFunction : groupFunctions) { // If not a "group" lookup operation (not the groupby column), seralize the core function. if (groupFunction.getFunction() != null) { ValuesSourceMetricsAggregationBuilder b = serializeCoreFunction( groupFunction ); if ( null != b ) { aggregationBuilders.add( b ); } } else { columnPickUps.add(groupFunction); } } } // Group by columns. if (columnGroup != null) { String columnId = columnGroup.getColumnId(); String sourceId = columnGroup.getSourceId(); // Check that all column pickups are also column groups. if (!columnPickUps.isEmpty()) { for (GroupFunction groupFunction : columnPickUps) { if (groupFunction.getFunction() == null && sourceId.equals(groupFunction.getSourceId())) { columnId = groupFunction.getColumnId(); if ( !existColumnInMetadataDef( sourceId ) ) { throw new RuntimeException("Aggregation by column [" + sourceId + "] failed. No column with the given id."); } } } } AbstractAggregationBuilder b = serializeGroupByFunction( columnGroup, columnId, aggregationBuilders ); if ( null != b ) { return new ArrayList<AbstractAggregationBuilder>() {{ add( b ); }}; } } else { // If there is no group function, cannot use column pickups. if ( !columnPickUps.isEmpty() ) { throw new RuntimeException("Column [" + columnPickUps.get(0).getSourceId() + "] pickup failed. " + "No grouping is set for this column."); } } return aggregationBuilders; } /** * <p>Serializes a core function.</p> * <p>Example of SUM function serialization:</p> * <code> * "column_id" : { * "sum" : { "field" : "change" } * } * </code> * @return */ protected ValuesSourceMetricsAggregationBuilder serializeCoreFunction(GroupFunction groupFunction) { if ( null != groupFunction ) { String sourceId = groupFunction.getSourceId(); if ( sourceId != null && !existColumnInMetadataDef( sourceId ) ) { throw new RuntimeException("Aggregation by column [" + sourceId + "] failed. No column with the given id."); } if (sourceId == null) { sourceId = metadata.getColumnId(0); } if (sourceId == null) { throw new IllegalArgumentException("Aggregation from unknown column id."); } String columnId = groupFunction.getColumnId(); if (columnId == null) columnId = sourceId; AggregateFunctionType type = groupFunction.getFunction(); ColumnType sourceColumnType = metadata.getColumnType(sourceId); // ColumnType resultingColumnType = sourceColumnType.equals(ColumnType.DATE) ? ColumnType.DATE : ColumnType.NUMBER; ValuesSourceMetricsAggregationBuilder result = null; switch (type) { case COUNT: result = AggregationBuilders.count(columnId).field(sourceId); break; case DISTINCT: result = AggregationBuilders.cardinality(columnId).field(sourceId); break; case AVERAGE: result = AggregationBuilders.avg(columnId).field(sourceId); break; case SUM: result = AggregationBuilders.sum(columnId).field(sourceId); break; case MIN: result = AggregationBuilders.min(columnId).field(sourceId); break; case MAX: result = AggregationBuilders.max(columnId).field(sourceId); break; } if ( null == result ) { throw new RuntimeException( "Core function not supported as an Elastic Search aggregation [type=" + type.name() + "]" ); } return result; } return null; } protected AbstractAggregationBuilder serializeGroupByFunction( ColumnGroup columnGroup, String resultingColumnId, List<AbstractAggregationBuilder> aggregationBuilders ) throws ElasticSearchClientGenericException { if (columnGroup == null || metadata == null) { return null; } DataSetDef dataSetDef = metadata.getDefinition(); String sourceId = columnGroup.getSourceId(); if (resultingColumnId == null) resultingColumnId = sourceId; boolean asc = columnGroup.isAscendingOrder(); ColumnType columnType = metadata.getColumnType(sourceId); GroupStrategy groupStrategy = columnGroup.getStrategy(); String intervalSize = columnGroup.getIntervalSize(); boolean areEmptyIntervalsAllowed = columnGroup.areEmptyIntervalsAllowed(); int minDocCount = areEmptyIntervalsAllowed ? 0 : 1; // TODO: Support for maxIntervals. int maxIntervals = columnGroup.getMaxIntervals(); AbstractAggregationBuilder theResult = null; if (ColumnType.LABEL.equals(columnType)) { // Translate into a TERMS aggregation. TermsBuilder termsBuilder = new TermsBuilder( resultingColumnId ) .field( sourceId ) .size( 0 ) .minDocCount( minDocCount ) .order( Terms.Order.term(asc) ); addSubAggregations( termsBuilder, aggregationBuilders ); // Add the resulting data set column. if (columns != null) { DataColumn column = getColumn(resultingColumnId); column.setColumnGroup(new ColumnGroup(sourceId, resultingColumnId, columnGroup.getStrategy(), columnGroup.getMaxIntervals(), columnGroup.getIntervalSize())); } theResult = termsBuilder; } else if (ColumnType.NUMBER.equals(columnType)) { // Translate into a HISTOGRAM aggregation. HistogramBuilder histogramBuilder = new HistogramBuilder( resultingColumnId ) .field( sourceId ) .minDocCount( minDocCount ) .order( asc ? Histogram.Order.KEY_ASC : Histogram.Order.KEY_DESC ); if ( null != intervalSize ) { histogramBuilder.interval( Long.parseLong(intervalSize) ); } addSubAggregations( histogramBuilder, aggregationBuilders ); // Add the resulting dataset column. if (columns != null) { DataColumn column = getColumn(resultingColumnId); column.setColumnGroup(new ColumnGroup(sourceId, resultingColumnId, columnGroup.getStrategy(), columnGroup.getMaxIntervals(), columnGroup.getIntervalSize())); } theResult = histogramBuilder; } else if (ColumnType.DATE.equals(columnType)) { DateIntervalType dateIntervalType = null; // Fixed grouping -> use term field aggregation with a date format script. if (GroupStrategy.FIXED.equals(columnGroup.getStrategy())) { // String dateColumnPattern = dataSetDef.getPattern(sourceId); if (intervalSize != null) { dateIntervalType = DateIntervalType.valueOf(intervalSize); } if (dateIntervalType == null) { throw new RuntimeException("Column [" + columnGroup.getColumnId() + "] is type Date and grouped using a fixed strategy, but the ate interval type is not specified. Please specify it."); } String[] scripts = buildIntervalExtractorScript(sourceId, columnGroup); String valueScript = scripts[0]; String orderScript = scripts[1]; TermsBuilder termsBuilder = new TermsBuilder( resultingColumnId ) .size( 0 ) .minDocCount( minDocCount ) .script( new Script( valueScript ) ); if ( null == orderScript ) { termsBuilder.order( Terms.Order.term(asc) ); } else { termsBuilder.order( Terms.Order.aggregation( "_sortOrder", true) ); } addSubAggregations( termsBuilder, aggregationBuilders ); if ( null != orderScript ) { MinBuilder orderAggBuilder = new MinBuilder( "_sortOrder" ); orderAggBuilder.script( new Script( orderScript ) ); termsBuilder.subAggregation( orderAggBuilder ); } theResult = termsBuilder; } // Dynamic grouping -> use date histograms. if (GroupStrategy.DYNAMIC.equals(columnGroup.getStrategy())) { if (intervalSize != null) { // If interval size specified by the lookup group operation, use it. dateIntervalType = DateIntervalType.valueOf(intervalSize); } else { // If interval size is not specified by the lookup group operation, calculate the current date limits for index document's date field and the interval size that fits.. try { ElasticSearchClient anotherClient = clientFactory.newClient( (ElasticSearchDataSetDef) metadata.getDefinition() ); Date[] limits = utils.calculateDateLimits(anotherClient, metadata, columnGroup.getSourceId(), this.request != null ? this.request.getQuery() : null); if (limits != null) { dateIntervalType = intervalBuilder.calculateIntervalSize(limits[0], limits[1], columnGroup); } anotherClient.close(); } catch (Exception e) { throw new ElasticSearchClientGenericException("Cannot calculate date limits.", e); } } if (dateIntervalType == null) { // Not limits found. No matches. No matter the interval type used. dateIntervalType = DateIntervalType.MILLISECOND; } String intervalPattern = DateIntervalPattern.getPattern(dateIntervalType); DateHistogramBuilder builder = new DateHistogramBuilder( resultingColumnId ) .field( sourceId ) .interval( getInterval( dateIntervalType ) ) .format( intervalPattern ) .minDocCount( minDocCount ) .order( asc ? Histogram.Order.KEY_ASC : Histogram.Order.KEY_DESC ); addSubAggregations( builder, aggregationBuilders ); theResult = builder; } // Add the resulting dataset column. if (columns != null) { DataColumn column = getColumn(resultingColumnId); column.setColumnType(ColumnType.LABEL); column.setIntervalType(dateIntervalType.name()); ColumnGroup cg = new ColumnGroup(sourceId, resultingColumnId, columnGroup.getStrategy(), columnGroup.getMaxIntervals(), columnGroup.getIntervalSize()); cg.setEmptyIntervalsAllowed(areEmptyIntervalsAllowed); cg.setFirstMonthOfYear(columnGroup.getFirstMonthOfYear()); cg.setFirstDayOfWeek(columnGroup.getFirstDayOfWeek()); column.setColumnGroup(cg); } } else { throw new RuntimeException("No translation supported for column group with sourceId [" + sourceId + "] and group strategy [" + groupStrategy.name() + "]."); } return theResult; } private void addSubAggregations( AggregationBuilder parent, List<AbstractAggregationBuilder> aggregationBuilders ) { if ( null != aggregationBuilders && !aggregationBuilders.isEmpty() ) { for ( AbstractAggregationBuilder b : aggregationBuilders ) { parent.subAggregation( b ); } } } protected DateHistogramInterval getInterval(DateIntervalType dateIntervalType) { String intervalExpression; switch (dateIntervalType) { case MILLISECOND: intervalExpression = "0.001s"; break; case HUNDRETH: intervalExpression = "0.01s"; break; case TENTH: intervalExpression = "0.1s"; break; case SECOND: intervalExpression = "1s"; break; case MINUTE: intervalExpression = "1m"; break; case HOUR: intervalExpression = "1h"; break; case DAY: intervalExpression = "1d"; break; case DAY_OF_WEEK: intervalExpression = "1d"; break; case WEEK: intervalExpression = "1w"; break; case MONTH: intervalExpression = "1M"; break; case QUARTER: intervalExpression = "1q"; break; case YEAR: intervalExpression = "1y"; break; case DECADE: intervalExpression = "10y"; break; case CENTURY: intervalExpression = "100y"; break; case MILLENIUM: intervalExpression = "1000y"; break; default: throw new RuntimeException("No interval mapping for date interval type [" + dateIntervalType.name() + "]."); } return new DateHistogramInterval( intervalExpression ); } private String[] buildIntervalExtractorScript(String sourceId, ColumnGroup columnGroup) { DateIntervalType intervalType = DateIntervalType.getByName(columnGroup.getIntervalSize()); Month firstMonth = columnGroup.getFirstMonthOfYear(); DayOfWeek firstDayOfWeek = columnGroup.getFirstDayOfWeek(); String script = "new Date(doc[\"{0}\"].value).toCalendar()."; switch (intervalType) { case QUARTER: // For quarters use this pseudocode script: <code>quarter = round-up(date.month / 3)</code> script = "ceil( ( " + script + "get(Calendar.MONTH) + 1 ) / 3 ).toInteger()"; break; case MONTH: script = script + "get(Calendar.MONTH) + 1"; break; case DAY_OF_WEEK: script = script + "get(Calendar.DAY_OF_WEEK)"; break; case HOUR: script = script + "get(Calendar.HOUR_OF_DAY)"; break; case MINUTE: script = script + "get(Calendar.MINUTE)"; break; case SECOND: script = script + "get(Calendar.SECOND)"; break; default: throw new UnsupportedOperationException("Fixed grouping strategy by interval type " + intervalType.name() + " is not supported."); } String valueScript = MessageFormat.format( script, sourceId ); String orderScript = null; if (firstMonth != null && intervalType.equals(DateIntervalType.MONTH)) { int firstMonthIndex = firstMonth.getIndex(); int[] positions = buildPositionsArray(firstMonthIndex, 12, columnGroup.isAscendingOrder()); orderScript = "month="+valueScript+".toInteger(); list = "+Arrays.toString(positions)+"; list.indexOf(month)"; } if (firstDayOfWeek!= null && intervalType.equals(DateIntervalType.DAY_OF_WEEK)) { int firstDayIndex = firstDayOfWeek.getIndex(); int[] positions = buildPositionsArray(firstDayIndex, 7, columnGroup.isAscendingOrder()); orderScript = "day="+valueScript+".toInteger(); list = "+Arrays.toString(positions)+"; list.indexOf(day)"; } return new String[] { valueScript, orderScript}; } private int[] buildPositionsArray( int firstElementIndex, int end, boolean asc) { int[] positions = new int[end]; for (int x = 0, month = firstElementIndex; x < end; x++) { if ( month > end ) { month = 1; } if ( month < 1 ) { month = end; } positions[x] = month; if ( asc ) { month ++; } else { month--; } } return positions; } protected boolean existColumnInMetadataDef( String name ) { if (name == null || metadata == null) return false; int cols = metadata.getNumberOfColumns(); for (int x = 0; x < cols; x++) { String colName = metadata.getColumnId(x); if (name.equals(colName)) return true; } return false; } protected DataColumn getColumn(String columnId) { if (columns != null && columnId != null && !columns.isEmpty()) { for (DataColumn column : columns) { if (columnId.equals(column.getId())) return column; } } return null; } }