/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.core.query.reduce;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.linkedin.pinot.common.data.FieldSpec;
import com.linkedin.pinot.common.exception.QueryException;
import com.linkedin.pinot.common.metrics.BrokerMeter;
import com.linkedin.pinot.common.metrics.BrokerMetrics;
import com.linkedin.pinot.common.query.ReduceService;
import com.linkedin.pinot.common.request.BrokerRequest;
import com.linkedin.pinot.common.request.GroupBy;
import com.linkedin.pinot.common.request.Selection;
import com.linkedin.pinot.common.response.ServerInstance;
import com.linkedin.pinot.common.response.broker.AggregationResult;
import com.linkedin.pinot.common.response.broker.BrokerResponseNative;
import com.linkedin.pinot.common.response.broker.GroupByResult;
import com.linkedin.pinot.common.response.broker.QueryProcessingException;
import com.linkedin.pinot.common.response.broker.SelectionResults;
import com.linkedin.pinot.common.utils.DataSchema;
import com.linkedin.pinot.common.utils.DataTable;
import com.linkedin.pinot.core.query.aggregation.function.AggregationFunction;
import com.linkedin.pinot.core.query.aggregation.function.AggregationFunctionUtils;
import com.linkedin.pinot.core.query.aggregation.groupby.AggregationGroupByTrimmingService;
import com.linkedin.pinot.core.query.selection.SelectionOperatorService;
import com.linkedin.pinot.core.query.selection.SelectionOperatorUtils;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.annotation.concurrent.ThreadSafe;
/**
* The <code>BrokerReduceService</code> class provides service to reduce data tables gathered from multiple servers
* to {@link BrokerResponseNative}.
*/
@ThreadSafe
public class BrokerReduceService implements ReduceService<BrokerResponseNative> {
private static final Logger LOGGER = LoggerFactory.getLogger(BrokerReduceService.class);
@Nonnull
@Override
public BrokerResponseNative reduceOnDataTable(@Nonnull BrokerRequest brokerRequest,
@Nonnull Map<ServerInstance, DataTable> instanceResponseMap) {
return reduceOnDataTable(brokerRequest, instanceResponseMap, null);
}
@Nonnull
@Override
public BrokerResponseNative reduceOnDataTable(@Nonnull BrokerRequest brokerRequest,
@Nonnull Map<ServerInstance, DataTable> dataTableMap, @Nullable BrokerMetrics brokerMetrics) {
if (dataTableMap.size() == 0) {
// Empty response.
return BrokerResponseNative.empty();
}
BrokerResponseNative brokerResponseNative = new BrokerResponseNative();
List<QueryProcessingException> processingExceptions = brokerResponseNative.getProcessingExceptions();
long numDocsScanned = 0L;
long numEntriesScannedInFilter = 0L;
long numEntriesScannedPostFilter = 0L;
long numTotalRawDocs = 0L;
// Cache a data schema from data tables (try to cache one with data rows associated with it).
DataSchema cachedDataSchema = null;
// Process server response metadata.
Iterator<Map.Entry<ServerInstance, DataTable>> iterator = dataTableMap.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<ServerInstance, DataTable> entry = iterator.next();
ServerInstance serverInstance = entry.getKey();
DataTable dataTable = entry.getValue();
Map<String, String> metadata = dataTable.getMetadata();
// Reduce on trace info.
if (brokerRequest.isEnableTrace()) {
brokerResponseNative.getTraceInfo()
.put(serverInstance.getHostname(), metadata.get(DataTable.TRACE_INFO_METADATA_KEY));
}
// Reduce on exceptions.
for (String key : metadata.keySet()) {
if (key.startsWith(DataTable.EXCEPTION_METADATA_KEY)) {
processingExceptions.add(new QueryProcessingException(Integer.parseInt(key.substring(9)), metadata.get(key)));
}
}
// Reduce on execution statistics.
String numDocsScannedString = metadata.get(DataTable.NUM_DOCS_SCANNED_METADATA_KEY);
if (numDocsScannedString != null) {
numDocsScanned += Long.parseLong(numDocsScannedString);
}
String numEntriesScannedInFilterString = metadata.get(DataTable.NUM_ENTRIES_SCANNED_IN_FILTER_METADATA_KEY);
if (numEntriesScannedInFilterString != null) {
numEntriesScannedInFilter += Long.parseLong(numEntriesScannedInFilterString);
}
String numEntriesScannedPostFilterString = metadata.get(DataTable.NUM_ENTRIES_SCANNED_POST_FILTER_METADATA_KEY);
if (numEntriesScannedPostFilterString != null) {
numEntriesScannedPostFilter += Long.parseLong(numEntriesScannedPostFilterString);
}
String numTotalRawDocsString = metadata.get(DataTable.TOTAL_DOCS_METADATA_KEY);
if (numTotalRawDocsString != null) {
numTotalRawDocs += Long.parseLong(numTotalRawDocsString);
}
// After processing the metadata, remove data tables without data rows inside.
DataSchema dataSchema = dataTable.getDataSchema();
if (dataSchema == null) {
iterator.remove();
} else {
// Try to cache a data table with data rows inside, or cache one with data schema inside.
if (dataTable.getNumberOfRows() == 0) {
if (cachedDataSchema == null) {
cachedDataSchema = dataSchema;
}
iterator.remove();
} else {
cachedDataSchema = dataSchema;
}
}
}
// Set execution statistics.
brokerResponseNative.setNumDocsScanned(numDocsScanned);
brokerResponseNative.setNumEntriesScannedInFilter(numEntriesScannedInFilter);
brokerResponseNative.setNumEntriesScannedPostFilter(numEntriesScannedPostFilter);
brokerResponseNative.setTotalDocs(numTotalRawDocs);
// Update broker metrics.
String tableName = brokerRequest.getQuerySource().getTableName();
if (brokerMetrics != null) {
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.DOCUMENTS_SCANNED, numDocsScanned);
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.ENTRIES_SCANNED_IN_FILTER, numEntriesScannedInFilter);
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.ENTRIES_SCANNED_POST_FILTER,
numEntriesScannedPostFilter);
}
if (dataTableMap.isEmpty()) {
// For empty data table map, construct empty result using the cached data schema.
// This will only happen to selection query.
if (cachedDataSchema != null) {
List<String> selectionColumns =
SelectionOperatorUtils.getSelectionColumns(brokerRequest.getSelections().getSelectionColumns(),
cachedDataSchema);
brokerResponseNative.setSelectionResults(
new SelectionResults(selectionColumns, new ArrayList<Serializable[]>(0)));
}
} else {
// Reduce server responses data and set query results into the broker response.
assert cachedDataSchema != null;
if (brokerRequest.isSetSelections()) {
// Selection query.
// For data table map with more than one data tables, remove conflicting data tables.
DataSchema masterDataSchema = cachedDataSchema.clone();
if (dataTableMap.size() > 1) {
List<String> droppedServers = removeConflictingResponses(masterDataSchema, dataTableMap);
if (!droppedServers.isEmpty()) {
String errorMessage =
QueryException.MERGE_RESPONSE_ERROR.getMessage() + ": responses for table: " + tableName
+ " from servers: " + droppedServers + " got dropped due to data schema inconsistency.";
LOGGER.info(errorMessage);
if (brokerMetrics != null) {
brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.RESPONSE_MERGE_EXCEPTIONS, 1);
}
brokerResponseNative.addToExceptions(
new QueryProcessingException(QueryException.MERGE_RESPONSE_ERROR_CODE, errorMessage));
}
}
setSelectionResults(brokerResponseNative, brokerRequest.getSelections(), dataTableMap, masterDataSchema);
} else {
// Aggregation query.
AggregationFunction[] aggregationFunctions =
AggregationFunctionUtils.getAggregationFunctions(brokerRequest.getAggregationsInfo());
if (!brokerRequest.isSetGroupBy()) {
// Aggregation only query.
setAggregationResults(brokerResponseNative, aggregationFunctions, dataTableMap, cachedDataSchema);
} else {
// Aggregation group-by query.
setGroupByResults(brokerResponseNative, aggregationFunctions, brokerRequest.getGroupBy(), dataTableMap);
}
}
}
return brokerResponseNative;
}
/**
* Given a data schema, remove data tables that are not compatible with this data schema.
* <p>Upgrade the data schema passed in to cover all remaining data schemas.
*
* @param dataSchema data schema.
* @param dataTableMap map from server to data table.
* @return list of server names where the data table got removed.
*/
@Nonnull
private List<String> removeConflictingResponses(@Nonnull DataSchema dataSchema,
@Nonnull Map<ServerInstance, DataTable> dataTableMap) {
List<String> droppedServers = new ArrayList<>();
Iterator<Map.Entry<ServerInstance, DataTable>> iterator = dataTableMap.entrySet().iterator();
while (iterator.hasNext()) {
Map.Entry<ServerInstance, DataTable> entry = iterator.next();
DataSchema dataSchemaToCompare = entry.getValue().getDataSchema();
assert dataSchemaToCompare != null;
if (!dataSchema.isTypeCompatibleWith(dataSchemaToCompare)) {
droppedServers.add(entry.getKey().toString());
iterator.remove();
} else {
dataSchema.upgradeToCover(dataSchemaToCompare);
}
}
return droppedServers;
}
/**
* Reduce selection results from multiple servers and set them into BrokerResponseNative passed in.
*
* @param brokerResponseNative broker response.
* @param selection selection information.
* @param dataTableMap map from server to data table.
* @param dataSchema data schema.
*/
private void setSelectionResults(@Nonnull BrokerResponseNative brokerResponseNative, @Nonnull Selection selection,
@Nonnull Map<ServerInstance, DataTable> dataTableMap, @Nonnull DataSchema dataSchema) {
// Reduce the selection results.
SelectionResults selectionResults;
int selectionSize = selection.getSize();
if (selection.isSetSelectionSortSequence() && selectionSize != 0) {
// Selection order-by.
SelectionOperatorService selectionService = new SelectionOperatorService(selection, dataSchema);
selectionService.reduceWithOrdering(dataTableMap);
selectionResults = selectionService.renderSelectionResultsWithOrdering();
} else {
// Selection only.
selectionResults = SelectionOperatorUtils.renderSelectionResultsWithoutOrdering(
SelectionOperatorUtils.reduceWithoutOrdering(dataTableMap, selectionSize), dataSchema,
SelectionOperatorUtils.getSelectionColumns(selection.getSelectionColumns(), dataSchema));
}
brokerResponseNative.setSelectionResults(selectionResults);
}
/**
* Reduce aggregation results from multiple servers and set them into BrokerResponseNative passed in.
*
* @param brokerResponseNative broker response.
* @param aggregationFunctions array of aggregation functions.
* @param dataTableMap map from server to data table.
* @param dataSchema data schema.
*/
@SuppressWarnings("unchecked")
private void setAggregationResults(@Nonnull BrokerResponseNative brokerResponseNative,
@Nonnull AggregationFunction[] aggregationFunctions, @Nonnull Map<ServerInstance, DataTable> dataTableMap,
@Nonnull DataSchema dataSchema) {
int numAggregationFunctions = aggregationFunctions.length;
// Merge results from all data tables.
Object[] intermediateResults = new Object[numAggregationFunctions];
for (DataTable dataTable : dataTableMap.values()) {
for (int i = 0; i < numAggregationFunctions; i++) {
Object intermediateResultToMerge;
FieldSpec.DataType columnType = dataSchema.getColumnType(i);
switch (columnType) {
case LONG:
intermediateResultToMerge = dataTable.getLong(0, i);
break;
case DOUBLE:
intermediateResultToMerge = dataTable.getDouble(0, i);
break;
case OBJECT:
intermediateResultToMerge = dataTable.getObject(0, i);
break;
default:
throw new IllegalStateException("Illegal column type in aggregation results: " + columnType);
}
Object mergedIntermediateResult = intermediateResults[i];
if (mergedIntermediateResult == null) {
intermediateResults[i] = intermediateResultToMerge;
} else {
intermediateResults[i] = aggregationFunctions[i].merge(mergedIntermediateResult, intermediateResultToMerge);
}
}
}
// Extract final results and set them into the broker response.
List<AggregationResult> reducedAggregationResults = new ArrayList<>(numAggregationFunctions);
for (int i = 0; i < numAggregationFunctions; i++) {
String formattedResult =
AggregationFunctionUtils.formatValue(aggregationFunctions[i].extractFinalResult(intermediateResults[i]));
reducedAggregationResults.add(new AggregationResult(dataSchema.getColumnName(i), formattedResult));
}
brokerResponseNative.setAggregationResults(reducedAggregationResults);
}
/**
* Reduce group-by results from multiple servers and set them into BrokerResponseNative passed in.
*
* @param brokerResponseNative broker response.
* @param aggregationFunctions array of aggregation functions.
* @param groupBy group-by information.
* @param dataTableMap map from server to data table.
*/
@SuppressWarnings("unchecked")
private void setGroupByResults(@Nonnull BrokerResponseNative brokerResponseNative,
@Nonnull AggregationFunction[] aggregationFunctions, @Nonnull GroupBy groupBy,
@Nonnull Map<ServerInstance, DataTable> dataTableMap) {
int numAggregationFunctions = aggregationFunctions.length;
// Merge results from all data tables.
String[] columnNames = new String[numAggregationFunctions];
Map<String, Object>[] intermediateResultMaps = new Map[numAggregationFunctions];
for (DataTable dataTable : dataTableMap.values()) {
for (int i = 0; i < numAggregationFunctions; i++) {
if (columnNames[i] == null) {
columnNames[i] = dataTable.getString(i, 0);
intermediateResultMaps[i] = dataTable.getObject(i, 1);
} else {
Map<String, Object> mergedIntermediateResultMap = intermediateResultMaps[i];
Map<String, Object> intermediateResultMapToMerge = dataTable.getObject(i, 1);
for (Map.Entry<String, Object> entry : intermediateResultMapToMerge.entrySet()) {
String groupKey = entry.getKey();
Object intermediateResultToMerge = entry.getValue();
if (mergedIntermediateResultMap.containsKey(groupKey)) {
Object mergedIntermediateResult = mergedIntermediateResultMap.get(groupKey);
mergedIntermediateResultMap.put(groupKey,
aggregationFunctions[i].merge(mergedIntermediateResult, intermediateResultToMerge));
} else {
mergedIntermediateResultMap.put(groupKey, intermediateResultToMerge);
}
}
}
}
}
// Extract final result maps from the merged intermediate result maps.
Map<String, Comparable>[] finalResultMaps = new Map[numAggregationFunctions];
for (int i = 0; i < numAggregationFunctions; i++) {
Map<String, Object> intermediateResultMap = intermediateResultMaps[i];
Map<String, Comparable> finalResultMap = new HashMap<>();
for (String groupKey : intermediateResultMap.keySet()) {
Object intermediateResult = intermediateResultMap.get(groupKey);
finalResultMap.put(groupKey, aggregationFunctions[i].extractFinalResult(intermediateResult));
}
finalResultMaps[i] = finalResultMap;
}
// Trim the final result maps to topN and set them into the broker response.
AggregationGroupByTrimmingService aggregationGroupByTrimmingService =
new AggregationGroupByTrimmingService(aggregationFunctions, (int) groupBy.getTopN());
List<GroupByResult>[] groupByResultLists = aggregationGroupByTrimmingService.trimFinalResults(finalResultMaps);
List<AggregationResult> aggregationResults = new ArrayList<>(numAggregationFunctions);
for (int i = 0; i < numAggregationFunctions; i++) {
List<GroupByResult> groupByResultList = groupByResultLists[i];
List<String> groupByColumns = groupBy.getExpressions();
if (groupByColumns == null) {
groupByColumns = groupBy.getColumns();
}
aggregationResults.add(new AggregationResult(groupByResultList, groupByColumns, columnNames[i]));
}
brokerResponseNative.setAggregationResults(aggregationResults);
}
}