/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.broker.requesthandler; import com.google.common.base.Splitter; import com.linkedin.pinot.common.config.TableNameBuilder; import com.linkedin.pinot.common.exception.QueryException; import com.linkedin.pinot.common.metrics.BrokerMeter; import com.linkedin.pinot.common.metrics.BrokerMetrics; import com.linkedin.pinot.common.metrics.BrokerQueryPhase; import com.linkedin.pinot.common.query.ReduceService; import com.linkedin.pinot.common.query.ReduceServiceRegistry; import com.linkedin.pinot.common.request.BrokerRequest; import com.linkedin.pinot.common.request.FilterOperator; import com.linkedin.pinot.common.request.FilterQuery; import com.linkedin.pinot.common.request.FilterQueryMap; import com.linkedin.pinot.common.request.InstanceRequest; import com.linkedin.pinot.common.response.BrokerResponse; import com.linkedin.pinot.common.response.BrokerResponseFactory; import com.linkedin.pinot.common.response.BrokerResponseFactory.ResponseType; import com.linkedin.pinot.common.response.ProcessingException; import com.linkedin.pinot.common.response.ServerInstance; import com.linkedin.pinot.common.utils.DataTable; import com.linkedin.pinot.core.common.datatable.DataTableFactory; import com.linkedin.pinot.pql.parsers.Pql2Compiler; import com.linkedin.pinot.routing.RoutingTable; import com.linkedin.pinot.routing.RoutingTableLookupRequest; import com.linkedin.pinot.routing.TimeBoundaryService; import com.linkedin.pinot.routing.TimeBoundaryService.TimeBoundaryInfo; import com.linkedin.pinot.serde.SerDe; import com.linkedin.pinot.transport.common.BucketingSelection; import com.linkedin.pinot.transport.common.CompositeFuture; import com.linkedin.pinot.transport.common.ReplicaSelection; import com.linkedin.pinot.transport.common.ReplicaSelectionGranularity; import com.linkedin.pinot.transport.common.RoundRobinReplicaSelection; import com.linkedin.pinot.transport.common.SegmentIdSet; import com.linkedin.pinot.transport.scattergather.ScatterGather; import com.linkedin.pinot.transport.scattergather.ScatterGatherRequest; import com.linkedin.pinot.transport.scattergather.ScatterGatherStats; import io.netty.buffer.ByteBuf; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicLong; import javax.annotation.Nonnull; import javax.annotation.Nullable; import javax.annotation.concurrent.ThreadSafe; import org.apache.commons.configuration.Configuration; import org.apache.thrift.protocol.TCompactProtocol; import org.json.JSONObject; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * The <code>BrokerRequestHandler</code> class is a thread-safe broker request handler. Clients can submit multiple * requests to be processed parallel. */ @ThreadSafe public class BrokerRequestHandler { private static final Logger LOGGER = LoggerFactory.getLogger(BrokerRequestHandler.class); private static final Pql2Compiler REQUEST_COMPILER = new Pql2Compiler(); private static final int DEFAULT_BROKER_QUERY_RESPONSE_LIMIT = Integer.MAX_VALUE; private static final String BROKER_QUERY_RESPONSE_LIMIT_CONFIG = "pinot.broker.query.response.limit"; public static final long DEFAULT_BROKER_TIME_OUT_MS = 10 * 1000L; private static final String BROKER_TIME_OUT_CONFIG = "pinot.broker.timeoutMs"; private static final String DEFAULT_BROKER_ID; public static final String BROKER_ID_CONFIG_KEY = "pinot.broker.id"; private static final ResponseType DEFAULT_BROKER_RESPONSE_TYPE = ResponseType.BROKER_RESPONSE_TYPE_NATIVE; static { String defaultBrokerId = ""; try { defaultBrokerId = InetAddress.getLocalHost().getHostName(); } catch (UnknownHostException e) { LOGGER.error("Failed to read default broker id.", e); } DEFAULT_BROKER_ID = defaultBrokerId; } private final RoutingTable _routingTable; private final ScatterGather _scatterGatherer; private final ReduceServiceRegistry _reduceServiceRegistry; private final BrokerMetrics _brokerMetrics; private final TimeBoundaryService _timeBoundaryService; private final long _brokerTimeOutMs; private final BrokerRequestOptimizer _optimizer; private final int _queryResponseLimit; private final AtomicLong _requestIdGenerator; private final String _brokerId; // TODO: Currently only using RoundRobin selection. But, this can be allowed to be configured. private RoundRobinReplicaSelection _replicaSelection; public BrokerRequestHandler(RoutingTable table, TimeBoundaryService timeBoundaryService, ScatterGather scatterGatherer, ReduceServiceRegistry reduceServiceRegistry, BrokerMetrics brokerMetrics, Configuration config) { _routingTable = table; _timeBoundaryService = timeBoundaryService; _reduceServiceRegistry = reduceServiceRegistry; _scatterGatherer = scatterGatherer; _replicaSelection = new RoundRobinReplicaSelection(); _brokerMetrics = brokerMetrics; _optimizer = new BrokerRequestOptimizer(); _requestIdGenerator = new AtomicLong(0); _queryResponseLimit = config.getInt(BROKER_QUERY_RESPONSE_LIMIT_CONFIG, DEFAULT_BROKER_QUERY_RESPONSE_LIMIT); _brokerTimeOutMs = config.getLong(BROKER_TIME_OUT_CONFIG, DEFAULT_BROKER_TIME_OUT_MS); _brokerId = config.getString(BROKER_ID_CONFIG_KEY, DEFAULT_BROKER_ID); LOGGER.info("Broker response limit is: " + _queryResponseLimit); LOGGER.info("Broker timeout is - " + _brokerTimeOutMs + " ms"); LOGGER.info("Broker id: " + _brokerId); } /** * Process a JSON format request. * * @param request JSON format request to be processed. * @return broker response. * @throws Exception */ @Nonnull public BrokerResponse handleRequest(@Nonnull JSONObject request) throws Exception { long requestId = _requestIdGenerator.incrementAndGet(); String pql = request.getString("pql"); LOGGER.debug("Query string for requestId {}: {}", requestId, pql); boolean isTraceEnabled = false; if (request.has("trace")) { isTraceEnabled = Boolean.parseBoolean(request.getString("trace")); LOGGER.debug("Trace is set to: {} for requestId {}: {}", isTraceEnabled, requestId, pql); } Map<String, String> debugOptions = null; if (request.has("debugOptions")) { String routingOptionParameter = request.getString("debugOptions"); debugOptions = Splitter.on(';').omitEmptyStrings().trimResults().withKeyValueSeparator('=').split(routingOptionParameter); LOGGER.debug("Debug options are set to: {} for requestId {}: {}", debugOptions, requestId, pql); } // Compile and validate the request. long compilationStartTime = System.nanoTime(); BrokerRequest brokerRequest; try { brokerRequest = REQUEST_COMPILER.compileToBrokerRequest(pql); } catch (Exception e) { LOGGER.info("Parsing error on requestId {}: {}, {}", requestId, pql, e.getMessage()); _brokerMetrics.addMeteredGlobalValue(BrokerMeter.REQUEST_COMPILATION_EXCEPTIONS, 1); return BrokerResponseFactory.getBrokerResponseWithException(DEFAULT_BROKER_RESPONSE_TYPE, QueryException.getException(QueryException.PQL_PARSING_ERROR, e)); } String tableName = brokerRequest.getQuerySource().getTableName(); try { validateRequest(brokerRequest); } catch (Exception e) { LOGGER.info("Validation error on requestId {}: {}, {}", requestId, pql, e.getMessage()); _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.QUERY_VALIDATION_EXCEPTIONS, 1); return BrokerResponseFactory.getBrokerResponseWithException(DEFAULT_BROKER_RESPONSE_TYPE, QueryException.getException(QueryException.QUERY_VALIDATION_ERROR, e)); } if (isTraceEnabled) { brokerRequest.setEnableTrace(true); } if (debugOptions != null) { brokerRequest.setDebugOptions(debugOptions); } brokerRequest.setResponseFormat(ResponseType.BROKER_RESPONSE_TYPE_NATIVE.name()); _brokerMetrics.addPhaseTiming(tableName, BrokerQueryPhase.REQUEST_COMPILATION, System.nanoTime() - compilationStartTime); _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.QUERIES, 1); // Execute the query. long executionStartTime = System.nanoTime(); ScatterGatherStats scatterGatherStats = new ScatterGatherStats(); BrokerResponse brokerResponse = processBrokerRequest(brokerRequest, scatterGatherStats, requestId); _brokerMetrics.addPhaseTiming(tableName, BrokerQueryPhase.QUERY_EXECUTION, System.nanoTime() - executionStartTime); // Set total query processing time. long totalTimeMs = TimeUnit.MILLISECONDS.convert(System.nanoTime() - compilationStartTime, TimeUnit.NANOSECONDS); brokerResponse.setTimeUsedMs(totalTimeMs); LOGGER.debug("Broker Response: {}", brokerResponse); // Table name might have been changed (with suffix _OFFLINE/_REALTIME appended). LOGGER.info("RequestId: {}, table: {}, totalTimeMs: {}, numDocsScanned: {}, numEntriesScannedInFilter: {}, " + "numEntriesScannedPostFilter: {}, totalDocs: {}, scatterGatherStats: {}, query: {}", requestId, brokerRequest.getQuerySource().getTableName(), totalTimeMs, brokerResponse.getNumDocsScanned(), brokerResponse.getNumEntriesScannedInFilter(), brokerResponse.getNumEntriesScannedPostFilter(), brokerResponse.getTotalDocs(), scatterGatherStats, pql); return brokerResponse; } /** * Broker side validation on the broker request. * <p>Throw RuntimeException if query does not pass validation. * <p>Current validations are: * <ul> * <li>Value for 'TOP' for aggregation group-by query is <= configured value.</li> * <li>Value for 'LIMIT' for selection query is <= configured value.</li> * </ul> * * @param brokerRequest broker request to be validated. */ public void validateRequest(@Nonnull BrokerRequest brokerRequest) { if (brokerRequest.isSetAggregationsInfo()) { if (brokerRequest.isSetGroupBy()) { long topN = brokerRequest.getGroupBy().getTopN(); if (topN > _queryResponseLimit) { throw new RuntimeException( "Value for 'TOP' " + topN + " exceeded maximum allowed value of " + _queryResponseLimit); } } } else { int limit = brokerRequest.getSelections().getSize(); if (limit > _queryResponseLimit) { throw new RuntimeException( "Value for 'LIMIT' " + limit + " exceeded maximum allowed value of " + _queryResponseLimit); } } } /** * Main method to process the request. * <p>Following lifecycle stages: * <ul> * <li>1. Find the candidate servers to be queried for each set of segments from the routing table.</li> * <li>2. Select servers for each segment set and scatter request to the servers.</li> * <li>3. Gather responses from the servers.</li> * <li>4. Deserialize the server responses.</li> * <li>5. Reduce (merge) the server responses and create a broker response to be returned.</li> * </ul> * * @param brokerRequest broker request to be processed. * @param scatterGatherStats scatter-gather statistics. * @param requestId broker request ID. * @return broker response. * @throws InterruptedException */ @Nonnull public BrokerResponse processBrokerRequest(@Nonnull BrokerRequest brokerRequest, @Nonnull ScatterGatherStats scatterGatherStats, long requestId) throws InterruptedException { String tableName = brokerRequest.getQuerySource().getTableName(); ResponseType responseType = BrokerResponseFactory.getResponseType(brokerRequest.getResponseFormat()); LOGGER.debug("Broker Response Type: {}", responseType.name()); String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(tableName); if (!_routingTable.routingTableExists(offlineTableName)) { offlineTableName = null; } String realtimeTableName = TableNameBuilder.REALTIME.tableNameWithType(tableName); if (!_routingTable.routingTableExists(realtimeTableName)) { realtimeTableName = null; } if ((offlineTableName == null) && (realtimeTableName == null)) { // No table matches the broker request. LOGGER.info("No table matches the name: {}", tableName); _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.RESOURCE_MISSING_EXCEPTIONS, 1); return BrokerResponseFactory.getStaticNoTableHitBrokerResponse(responseType); } else { // At least one table matches the broker request. BrokerRequest offlineBrokerRequest = null; BrokerRequest realtimeBrokerRequest = null; // TODO: get time column name from schema or table config so that we can apply it in realtime only use case. // We get timeColumnName from time boundary service currently, which only exists for offline table. String timeColumnName = (offlineTableName != null) ? getTimeColumnName(offlineTableName) : null; if ((offlineTableName != null) && (realtimeTableName != null)) { // Hybrid table. offlineBrokerRequest = _optimizer.optimize(getOfflineBrokerRequest(brokerRequest), timeColumnName); realtimeBrokerRequest = _optimizer.optimize(getRealtimeBrokerRequest(brokerRequest), timeColumnName); } else if (offlineTableName != null) { // Offline table only. brokerRequest.getQuerySource().setTableName(offlineTableName); offlineBrokerRequest = _optimizer.optimize(brokerRequest, timeColumnName); } else { // Realtime table only. brokerRequest.getQuerySource().setTableName(realtimeTableName); realtimeBrokerRequest = _optimizer.optimize(brokerRequest, timeColumnName); } ReduceService reduceService = _reduceServiceRegistry.get(responseType); // TODO: wire up the customized BucketingSelection. return processOptimizedBrokerRequests(brokerRequest, offlineBrokerRequest, realtimeBrokerRequest, reduceService, scatterGatherStats, null, requestId); } } /** * Returns the time column name for the table name from the time boundary service. * Can return null if the time boundary service does not have the information. * * @param tableName Name of table for which to get the time column name * @return Time column name for the table. */ @Nullable private String getTimeColumnName(@Nonnull String tableName) { TimeBoundaryInfo timeBoundary = _timeBoundaryService.getTimeBoundaryInfoFor(tableName); return (timeBoundary != null) ? timeBoundary.getTimeColumn() : null; } /** * Given a broker request, use it to create an offline broker request. * * @param brokerRequest original broker request. * @return offline broker request. */ @Nonnull private BrokerRequest getOfflineBrokerRequest(@Nonnull BrokerRequest brokerRequest) { BrokerRequest offlineRequest = brokerRequest.deepCopy(); String hybridTableName = brokerRequest.getQuerySource().getTableName(); String offlineTableName = TableNameBuilder.OFFLINE.tableNameWithType(hybridTableName); offlineRequest.getQuerySource().setTableName(offlineTableName); attachTimeBoundary(hybridTableName, offlineRequest, true); return offlineRequest; } /** * Given a broker request, use it to create a realtime broker request. * * @param brokerRequest original broker request. * @return realtime broker request. */ @Nonnull private BrokerRequest getRealtimeBrokerRequest(@Nonnull BrokerRequest brokerRequest) { BrokerRequest realtimeRequest = brokerRequest.deepCopy(); String hybridTableName = brokerRequest.getQuerySource().getTableName(); String realtimeTableName = TableNameBuilder.REALTIME.tableNameWithType(hybridTableName); realtimeRequest.getQuerySource().setTableName(realtimeTableName); attachTimeBoundary(hybridTableName, realtimeRequest, false); return realtimeRequest; } /** * Attach time boundary to a broker request. * * @param hybridTableName hybrid table name. * @param brokerRequest original broker request. * @param isOfflineRequest flag for offline/realtime request. */ private void attachTimeBoundary(@Nonnull String hybridTableName, @Nonnull BrokerRequest brokerRequest, boolean isOfflineRequest) { TimeBoundaryInfo timeBoundaryInfo = _timeBoundaryService.getTimeBoundaryInfoFor(TableNameBuilder.OFFLINE.tableNameWithType(hybridTableName)); if (timeBoundaryInfo == null || timeBoundaryInfo.getTimeColumn() == null || timeBoundaryInfo.getTimeValue() == null) { LOGGER.warn("No time boundary attached for table: {}", hybridTableName); return; } // Create a range filter based on the request type. String timeValue = timeBoundaryInfo.getTimeValue(); FilterQuery timeFilterQuery = new FilterQuery(); timeFilterQuery.setOperator(FilterOperator.RANGE); timeFilterQuery.setColumn(timeBoundaryInfo.getTimeColumn()); timeFilterQuery.setNestedFilterQueryIds(new ArrayList<Integer>()); List<String> values = new ArrayList<>(); if (isOfflineRequest) { values.add("(*\t\t" + timeValue + ")"); } else { values.add("[" + timeValue + "\t\t*)"); } timeFilterQuery.setValue(values); timeFilterQuery.setId(-1); // Attach the range filter to the current filter. FilterQuery currentFilterQuery = brokerRequest.getFilterQuery(); if (currentFilterQuery != null) { FilterQuery andFilterQuery = new FilterQuery(); andFilterQuery.setOperator(FilterOperator.AND); List<Integer> nestedFilterQueryIds = new ArrayList<>(); nestedFilterQueryIds.add(currentFilterQuery.getId()); nestedFilterQueryIds.add(timeFilterQuery.getId()); andFilterQuery.setNestedFilterQueryIds(nestedFilterQueryIds); andFilterQuery.setId(-2); FilterQueryMap filterSubQueryMap = brokerRequest.getFilterSubQueryMap(); filterSubQueryMap.putToFilterQueryMap(timeFilterQuery.getId(), timeFilterQuery); filterSubQueryMap.putToFilterQueryMap(andFilterQuery.getId(), andFilterQuery); brokerRequest.setFilterQuery(andFilterQuery); brokerRequest.setFilterSubQueryMap(filterSubQueryMap); } else { FilterQueryMap filterSubQueryMap = new FilterQueryMap(); filterSubQueryMap.putToFilterQueryMap(timeFilterQuery.getId(), timeFilterQuery); brokerRequest.setFilterQuery(timeFilterQuery); brokerRequest.setFilterSubQueryMap(filterSubQueryMap); } } /** * Process the optimized broker requests for both OFFLINE and REALTIME table. * * @param originalBrokerRequest original broker request. * @param offlineBrokerRequest broker request for OFFLINE table. * @param realtimeBrokerRequest broker request for REALTIME table. * @param reduceService reduce service. * @param bucketingSelection customized bucketing selection. * @param scatterGatherStats scatter-gather statistics. * @param requestId request ID. * @return broker response. * @throws InterruptedException */ @Nonnull private BrokerResponse processOptimizedBrokerRequests(@Nonnull BrokerRequest originalBrokerRequest, @Nullable BrokerRequest offlineBrokerRequest, @Nullable BrokerRequest realtimeBrokerRequest, @Nonnull ReduceService reduceService, @Nonnull ScatterGatherStats scatterGatherStats, @Nullable BucketingSelection bucketingSelection, long requestId) throws InterruptedException { String originalTableName = originalBrokerRequest.getQuerySource().getTableName(); ResponseType serverResponseType = BrokerResponseFactory.getResponseType(originalBrokerRequest.getResponseFormat()); PhaseTimes phaseTimes = new PhaseTimes(); // Step 1: find the candidate servers to be queried for each set of segments from the routing table. // Step 2: select servers for each segment set and scatter request to the servers. String offlineTableName = null; CompositeFuture<ByteBuf> offlineCompositeFuture = null; if (offlineBrokerRequest != null) { offlineTableName = offlineBrokerRequest.getQuerySource().getTableName(); offlineCompositeFuture = routeAndScatterBrokerRequest(offlineBrokerRequest, phaseTimes, scatterGatherStats, true, bucketingSelection, requestId); } String realtimeTableName = null; CompositeFuture<ByteBuf> realtimeCompositeFuture = null; if (realtimeBrokerRequest != null) { realtimeTableName = realtimeBrokerRequest.getQuerySource().getTableName(); realtimeCompositeFuture = routeAndScatterBrokerRequest(realtimeBrokerRequest, phaseTimes, scatterGatherStats, false, bucketingSelection, requestId); } if ((offlineCompositeFuture == null) && (realtimeCompositeFuture == null)) { // No server found in either OFFLINE or REALTIME table. return BrokerResponseFactory.getStaticEmptyBrokerResponse(serverResponseType); } // Step 3: gather response from the servers. int numServersQueried = 0; long gatherStartTime = System.nanoTime(); List<ProcessingException> processingExceptions = new ArrayList<>(); Map<ServerInstance, ByteBuf> offlineServerResponseMap = null; Map<ServerInstance, ByteBuf> realtimeServerResponseMap = null; if (offlineCompositeFuture != null) { numServersQueried += offlineCompositeFuture.getNumFutures(); offlineServerResponseMap = gatherServerResponses(offlineCompositeFuture, scatterGatherStats, true, offlineTableName, processingExceptions); } if (realtimeCompositeFuture != null) { numServersQueried += realtimeCompositeFuture.getNumFutures(); realtimeServerResponseMap = gatherServerResponses(realtimeCompositeFuture, scatterGatherStats, false, realtimeTableName, processingExceptions); } phaseTimes.addToGatherTime(System.nanoTime() - gatherStartTime); if ((offlineServerResponseMap == null) && (realtimeServerResponseMap == null)) { // No response gathered. return BrokerResponseFactory.getBrokerResponseWithExceptions(serverResponseType, processingExceptions); } //Step 4: deserialize the server responses. int numServersResponded = 0; long deserializationStartTime = System.nanoTime(); Map<ServerInstance, DataTable> dataTableMap = new HashMap<>(); if (offlineServerResponseMap != null) { numServersResponded += offlineServerResponseMap.size(); deserializeServerResponses(offlineServerResponseMap, true, dataTableMap, offlineTableName, processingExceptions); } if (realtimeServerResponseMap != null) { numServersResponded += realtimeServerResponseMap.size(); deserializeServerResponses(realtimeServerResponseMap, false, dataTableMap, realtimeTableName, processingExceptions); } phaseTimes.addToDeserializationTime(System.nanoTime() - deserializationStartTime); // Step 5: reduce (merge) the server responses and create a broker response to be returned. long reduceStartTime = System.nanoTime(); BrokerResponse brokerResponse = reduceService.reduceOnDataTable(originalBrokerRequest, dataTableMap, _brokerMetrics); phaseTimes.addToReduceTime(System.nanoTime() - reduceStartTime); // Set processing exceptions and number of servers queried/responded. brokerResponse.setExceptions(processingExceptions); brokerResponse.setNumServersQueried(numServersQueried); brokerResponse.setNumServersResponded(numServersResponded); // Update broker metrics. phaseTimes.addPhaseTimesToBrokerMetrics(_brokerMetrics, originalTableName); if (brokerResponse.getExceptionsSize() > 0) { _brokerMetrics.addMeteredTableValue(originalTableName, BrokerMeter.BROKER_RESPONSES_WITH_PROCESSING_EXCEPTIONS, 1); } if (numServersQueried > numServersResponded) { _brokerMetrics.addMeteredTableValue(originalTableName, BrokerMeter.BROKER_RESPONSES_WITH_PARTIAL_SERVERS_RESPONDED, 1); } return brokerResponse; } /** * Route and scatter the broker request. * * @return composite future used to gather responses. */ @Nullable private CompositeFuture<ByteBuf> routeAndScatterBrokerRequest(@Nonnull BrokerRequest brokerRequest, @Nonnull PhaseTimes phaseTimes, @Nonnull ScatterGatherStats scatterGatherStats, boolean isOfflineTable, @Nullable BucketingSelection bucketingSelection, long requestId) throws InterruptedException { // Step 1: find the candidate servers to be queried for each set of segments from the routing table. // TODO: add checks for whether all segments are covered. long routingStartTime = System.nanoTime(); Map<ServerInstance, SegmentIdSet> segmentServices = findCandidateServers(brokerRequest); phaseTimes.addToRoutingTime(System.nanoTime() - routingStartTime); if (segmentServices == null || segmentServices.isEmpty()) { String tableName = brokerRequest.getQuerySource().getTableName(); LOGGER.info("No server found for table: {}", tableName); _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.NO_SERVER_FOUND_EXCEPTIONS, 1); return null; } // Step 2: select servers for each segment set and scatter request to the servers. long scatterStartTime = System.nanoTime(); ScatterGatherRequestImpl scatterRequest = new ScatterGatherRequestImpl(brokerRequest, segmentServices, _replicaSelection, ReplicaSelectionGranularity.SEGMENT_ID_SET, brokerRequest.getBucketHashKey(), 0, bucketingSelection, requestId, _brokerTimeOutMs, _brokerId); CompositeFuture<ByteBuf> compositeFuture = _scatterGatherer.scatterGather(scatterRequest, scatterGatherStats, isOfflineTable, _brokerMetrics); phaseTimes.addToScatterTime(System.nanoTime() - scatterStartTime); return compositeFuture; } /** * Find the candidate servers to be queried for each set of segments from the routing table. * * @param brokerRequest broker request. * @return map from server to set of segments. */ @Nullable private Map<ServerInstance, SegmentIdSet> findCandidateServers(@Nonnull BrokerRequest brokerRequest) { String tableName = brokerRequest.getQuerySource().getTableName(); List<String> routingOptions; Map<String, String> debugOptions = brokerRequest.getDebugOptions(); if (debugOptions == null || !debugOptions.containsKey("routingOptions")) { routingOptions = Collections.emptyList(); } else { routingOptions = Splitter.on(",").omitEmptyStrings().trimResults().splitToList(debugOptions.get("routingOptions")); } RoutingTableLookupRequest routingTableLookupRequest = new RoutingTableLookupRequest(tableName, routingOptions); return _routingTable.findServers(routingTableLookupRequest); } /** * Gather responses from servers, append processing exceptions to the processing exception list passed in. * * @param compositeFuture composite future returned from scatter phase. * @param scatterGatherStats scatter-gather statistics. * @param isOfflineTable whether the scatter-gather target is an OFFLINE table. * @param tableName table name. * @param processingExceptions list of processing exceptions. * @return server response map. */ @Nullable private Map<ServerInstance, ByteBuf> gatherServerResponses( @Nonnull CompositeFuture<ByteBuf> compositeFuture, @Nonnull ScatterGatherStats scatterGatherStats, boolean isOfflineTable, @Nonnull String tableName, @Nonnull List<ProcessingException> processingExceptions) { try { Map<ServerInstance, ByteBuf> serverResponseMap = compositeFuture.get(); Map<ServerInstance, Long> responseTimes = compositeFuture.getResponseTimes(); scatterGatherStats.setResponseTimeMillis(responseTimes, isOfflineTable); return serverResponseMap; } catch (Exception e) { LOGGER.error("Caught exception while fetching responses for table: {}", tableName, e); _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.RESPONSE_FETCH_EXCEPTIONS, 1); processingExceptions.add(QueryException.getException(QueryException.BROKER_GATHER_ERROR, e)); return null; } } /** * Deserialize the server responses, put the de-serialized data table into the data table map passed in, append * processing exceptions to the processing exception list passed in. * <p>For hybrid use case, multiple responses might be from the same instance. Use response sequence to distinguish * them. * * @param responseMap map from server to response. * @param isOfflineTable whether the responses are from an OFFLINE table. * @param dataTableMap map from server to data table. * @param tableName table name. * @param processingExceptions list of processing exceptions. */ private void deserializeServerResponses(@Nonnull Map<ServerInstance, ByteBuf> responseMap, boolean isOfflineTable, @Nonnull Map<ServerInstance, DataTable> dataTableMap, @Nonnull String tableName, @Nonnull List<ProcessingException> processingExceptions) { for (Entry<ServerInstance, ByteBuf> entry : responseMap.entrySet()) { ServerInstance serverInstance = entry.getKey(); if (!isOfflineTable) { serverInstance = new ServerInstance(serverInstance.getHostname(), serverInstance.getPort(), 1); } ByteBuf byteBuf = entry.getValue(); try { byte[] byteArray = new byte[byteBuf.readableBytes()]; byteBuf.readBytes(byteArray); dataTableMap.put(serverInstance, DataTableFactory.getDataTable(byteArray)); } catch (Exception e) { LOGGER.error("Caught exceptions while deserializing response for table: {} from server: {}", tableName, serverInstance, e); _brokerMetrics.addMeteredTableValue(tableName, BrokerMeter.DATA_TABLE_DESERIALIZATION_EXCEPTIONS, 1); processingExceptions.add(QueryException.getException(QueryException.DATA_TABLE_DESERIALIZATION_ERROR, e)); } } } /** * Container for time statistics in all phases. */ private static class PhaseTimes { private long _routingTime = 0L; private long _scatterTime = 0L; private long _gatherTime = 0L; private long _deserializationTime = 0L; private long _reduceTime = 0L; public void addToRoutingTime(long routingTime) { _routingTime += routingTime; } public void addToScatterTime(long scatterTime) { _scatterTime += scatterTime; } public void addToGatherTime(long gatherTime) { _gatherTime += gatherTime; } public void addToDeserializationTime(long deserializationTime) { _deserializationTime += deserializationTime; } public void addToReduceTime(long reduceTime) { _reduceTime += reduceTime; } public void addPhaseTimesToBrokerMetrics(BrokerMetrics brokerMetrics, String tableName) { brokerMetrics.addPhaseTiming(tableName, BrokerQueryPhase.QUERY_ROUTING, _routingTime); brokerMetrics.addPhaseTiming(tableName, BrokerQueryPhase.SCATTER_GATHER, _scatterTime + _gatherTime); brokerMetrics.addPhaseTiming(tableName, BrokerQueryPhase.DESERIALIZATION, _deserializationTime); brokerMetrics.addPhaseTiming(tableName, BrokerQueryPhase.REDUCE, _reduceTime); } } private static class ScatterGatherRequestImpl implements ScatterGatherRequest { private final BrokerRequest _brokerRequest; private final Map<ServerInstance, SegmentIdSet> _segmentServices; private final ReplicaSelection _replicaSelection; private final ReplicaSelectionGranularity _replicaSelectionGranularity; private final Object _hashKey; private final int _numSpeculativeRequests; private final BucketingSelection _bucketingSelection; private final long _requestId; private final long _requestTimeoutMs; private final String _brokerId; public ScatterGatherRequestImpl(BrokerRequest request, Map<ServerInstance, SegmentIdSet> segmentServices, ReplicaSelection replicaSelection, ReplicaSelectionGranularity replicaSelectionGranularity, Object hashKey, int numSpeculativeRequests, BucketingSelection bucketingSelection, long requestId, long requestTimeoutMs, String brokerId) { _brokerRequest = request; _segmentServices = segmentServices; _replicaSelection = replicaSelection; _replicaSelectionGranularity = replicaSelectionGranularity; _hashKey = hashKey; _numSpeculativeRequests = numSpeculativeRequests; _bucketingSelection = bucketingSelection; _requestId = requestId; _requestTimeoutMs = requestTimeoutMs; _brokerId = brokerId; } @Override public Map<ServerInstance, SegmentIdSet> getSegmentsServicesMap() { return _segmentServices; } @Override public byte[] getRequestForService(ServerInstance service, SegmentIdSet querySegments) { InstanceRequest r = new InstanceRequest(); r.setRequestId(_requestId); r.setEnableTrace(_brokerRequest.isEnableTrace()); r.setQuery(_brokerRequest); r.setSearchSegments(querySegments.getSegmentsNameList()); r.setBrokerId(_brokerId); // _serde is not threadsafe. return getSerde().serialize(r); // return _serde.serialize(r); } @Override public ReplicaSelection getReplicaSelection() { return _replicaSelection; } @Override public ReplicaSelectionGranularity getReplicaSelectionGranularity() { return _replicaSelectionGranularity; } @Override public Object getHashKey() { return _hashKey; } @Override public int getNumSpeculativeRequests() { return _numSpeculativeRequests; } @Override public BucketingSelection getPredefinedSelection() { return _bucketingSelection; } @Override public long getRequestId() { return _requestId; } @Override public long getRequestTimeoutMS() { return _requestTimeoutMs; } public SerDe getSerde() { return new SerDe(new TCompactProtocol.Factory()); } @Override public BrokerRequest getBrokerRequest() { return _brokerRequest; } } public String getRoutingTableSnapshot(String tableName) throws Exception { return _routingTable.dumpSnapshot(tableName); } }