/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.controller.validation;
import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.config.TableNameBuilder;
import com.linkedin.pinot.common.metadata.ZKMetadataProvider;
import com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata;
import com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata;
import com.linkedin.pinot.common.metadata.stream.KafkaStreamMetadata;
import com.linkedin.pinot.common.metrics.ValidationMetrics;
import com.linkedin.pinot.common.segment.SegmentMetadata;
import com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType;
import com.linkedin.pinot.common.utils.HLCSegmentName;
import com.linkedin.pinot.common.utils.LLCSegmentName;
import com.linkedin.pinot.common.utils.SegmentName;
import com.linkedin.pinot.common.utils.helix.HelixHelper;
import com.linkedin.pinot.common.utils.time.TimeUtils;
import com.linkedin.pinot.controller.ControllerConf;
import com.linkedin.pinot.controller.helix.core.PinotHelixResourceManager;
import com.linkedin.pinot.controller.helix.core.PinotHelixSegmentOnlineOfflineStateModelGenerator;
import com.linkedin.pinot.controller.helix.core.realtime.PinotLLCRealtimeSegmentManager;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import org.apache.helix.ZNRecord;
import org.apache.helix.model.IdealState;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import org.joda.time.Duration;
import org.joda.time.Interval;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Manages the segment validation metrics, to ensure that all offline segments are contiguous (no missing segments) and
* that the offline push delay isn't too high.
*
* Dec 10, 2014
*/
public class ValidationManager {
private static final Logger LOGGER = LoggerFactory.getLogger(ValidationManager.class);
private final ValidationMetrics _validationMetrics;
private final ScheduledExecutorService _executorService;
private final PinotHelixResourceManager _pinotHelixResourceManager;
private final long _validationIntervalSeconds;
private final boolean _autoCreateOnError;
private final PinotLLCRealtimeSegmentManager _llcRealtimeSegmentManager;
/**
* Constructs the validation manager.
* @param validationMetrics The validation metrics utility used to publish the metrics.
* @param pinotHelixResourceManager The resource manager used to interact with Helix
* @param config
* @param llcRealtimeSegmentManager
*/
public ValidationManager(ValidationMetrics validationMetrics, PinotHelixResourceManager pinotHelixResourceManager,
ControllerConf config, PinotLLCRealtimeSegmentManager llcRealtimeSegmentManager) {
_validationMetrics = validationMetrics;
_pinotHelixResourceManager = pinotHelixResourceManager;
_validationIntervalSeconds = config.getValidationControllerFrequencyInSeconds();
_autoCreateOnError = true;
_llcRealtimeSegmentManager = llcRealtimeSegmentManager;
_executorService = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() {
@Override
public Thread newThread(Runnable runnable) {
Thread thread = new Thread(runnable);
thread.setName("PinotValidationManagerExecutorService");
return thread;
}
});
}
/**
* Starts the validation manager.
*/
public void start() {
LOGGER.info("Starting validation manager");
// Set up an executor that executes validation tasks periodically
_executorService.scheduleWithFixedDelay(new Runnable() {
@Override
public void run() {
try {
runValidation();
} catch (Exception e) {
LOGGER.warn("Caught exception while running validation", e);
}
}
}, 120, _validationIntervalSeconds, TimeUnit.SECONDS);
}
/**
* Stops the validation manager.
*/
public void stop() {
// Shut down the executor
_executorService.shutdown();
}
/**
* Runs a validation pass over the currently loaded tables.
*/
public void runValidation() {
if (!_pinotHelixResourceManager.isLeader()) {
LOGGER.info("Skipping validation, not leader!");
return;
}
LOGGER.info("Starting validation");
// Fetch the list of tables
List<String> allTableNames = _pinotHelixResourceManager.getAllTables();
ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore();
for (String tableName : allTableNames) {
List<SegmentMetadata> segmentMetadataList = new ArrayList<SegmentMetadata>();
TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
AbstractTableConfig tableConfig = null;
_pinotHelixResourceManager.rebuildBrokerResourceFromHelixTags(tableName);
// For each table, fetch the metadata for all its segments
if (tableType.equals(TableType.OFFLINE)) {
validateOfflineSegmentPush(propertyStore, tableName, segmentMetadataList);
} else if (tableType.equals(TableType.REALTIME)) {
LOGGER.info("Starting to validate table {}", tableName);
List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName);
boolean countHLCSegments = true; // false if this table has ONLY LLC segments (i.e. fully migrated)
KafkaStreamMetadata streamMetadata = null;
try {
tableConfig = _pinotHelixResourceManager.getRealtimeTableConfig(tableName);
streamMetadata = new KafkaStreamMetadata(tableConfig.getIndexingConfig().getStreamConfigs());
if (streamMetadata.hasSimpleKafkaConsumerType() && !streamMetadata.hasHighLevelKafkaConsumerType()) {
countHLCSegments = false;
}
for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(realtimeSegmentZKMetadata);
segmentMetadataList.add(segmentMetadata);
}
// Update the gauge to contain the total document count in the segments
_validationMetrics.updateTotalDocumentsGauge(tableName, computeRealtimeTotalDocumentInSegments(segmentMetadataList,
countHLCSegments));
if (streamMetadata.hasSimpleKafkaConsumerType()) {
validateLLCSegments(tableName, tableConfig);
}
} catch (Exception e) {
if (tableConfig == null) {
LOGGER.warn("Cannot get realtime tableconfig for {}", tableName);
} else if (streamMetadata == null) {
LOGGER.warn("Cannot get streamconfig for {}", tableName);
} else {
LOGGER.error("Exception while validating table {}", tableName, e);
}
}
} else {
LOGGER.warn("Ignoring table type {} for table {}", tableType, tableName);
}
}
LOGGER.info("Validation completed");
}
// For LLC segments, validate that there is at least one segment in CONSUMING state for every partition.
void validateLLCSegments(final String realtimeTableName, AbstractTableConfig tableConfig) {
LOGGER.info("Validating LLC Segments for {}", realtimeTableName);
Map<String, String> streamConfigs = tableConfig.getIndexingConfig().getStreamConfigs();
ZNRecord partitionAssignment = _llcRealtimeSegmentManager.getKafkaPartitionAssignment(realtimeTableName);
if (partitionAssignment == null) {
LOGGER.warn("No partition assignment found for table {}", realtimeTableName);
return;
}
Map<String, List<String>> partitionToHostsMap = partitionAssignment.getListFields();
// Keep a set of kafka partitions, and remove the partition when we find a segment in CONSUMING state in
// that partition.
Set<Integer> nonConsumingKafkaPartitions = new HashSet<>(partitionToHostsMap.size());
for (String partitionStr : partitionToHostsMap.keySet()) {
nonConsumingKafkaPartitions.add(Integer.valueOf(partitionStr));
}
IdealState idealState =
HelixHelper.getTableIdealState(_pinotHelixResourceManager.getHelixZkManager(), realtimeTableName);
if (!idealState.isEnabled()) {
// No validation to be done.
LOGGER.info("Skipping validation for {} since it is disabled", realtimeTableName);
return;
}
// Walk through all segments in the idealState, looking for one instance that is in CONSUMING state. If we find one
// remove the kafka partition that the segment belongs to, from the kafka partition set.
// Make sure that there are at least some LLC segments in place. If there are no LLC segments, it is possible
// that this table is in the process of being disabled for LLC
Set<String> segmentIds = idealState.getPartitionSet();
List<String> llcSegments = new ArrayList<>(segmentIds.size());
for (String segmentId : segmentIds) {
if (SegmentName.isLowLevelConsumerSegmentName(segmentId)) {
llcSegments.add(segmentId);
Map<String, String> stateMap = idealState.getInstanceStateMap(segmentId);
Iterator<String> iterator = stateMap.values().iterator();
// If there is at least one instance in CONSUMING state, we are good.
boolean foundConsuming = false;
while (iterator.hasNext() && !foundConsuming) {
String stateString = iterator.next();
if (stateString.equals(PinotHelixSegmentOnlineOfflineStateModelGenerator.CONSUMING_STATE)) {
LOGGER.info("Found CONSUMING segment {}", segmentId);
foundConsuming = true;
}
}
if (foundConsuming) {
LLCSegmentName llcSegmentName = new LLCSegmentName(segmentId);
nonConsumingKafkaPartitions.remove(llcSegmentName.getPartitionId());
}
}
}
// Kafka partition set now has all the partitions that do not have any segments in CONSUMING state.
if (!llcSegments.isEmpty()) {
// Raise the metric only if there is at least one llc segment in the idealstate.
_validationMetrics.updateNumNonConsumingPartitionsMetric(realtimeTableName, nonConsumingKafkaPartitions.size());
// Recreate a segment for the partitions that are missing one.
for (Integer kafkaPartition : nonConsumingKafkaPartitions) {
LOGGER.warn("Table {}, kafka partition {} has no segments in CONSUMING state (out of {} llc segments)",
realtimeTableName, kafkaPartition, llcSegments.size());
}
if (_autoCreateOnError) {
_llcRealtimeSegmentManager.createConsumingSegment(realtimeTableName, nonConsumingKafkaPartitions, llcSegments,
tableConfig);
_llcRealtimeSegmentManager.completeCommittingSegments(realtimeTableName, llcSegments);
}
}
// Make this call after other validations (so that we verify that we are consistent against the existing partition
// assignment). This call may end up changing the kafka partition assignment for the table.
_llcRealtimeSegmentManager.updateKafkaPartitionsIfNecessary(realtimeTableName, tableConfig);
}
// For offline segment pushes, validate that there are no missing segments, and update metrics
private void validateOfflineSegmentPush(ZkHelixPropertyStore<ZNRecord> propertyStore, String tableName,
List<SegmentMetadata> segmentMetadataList) {
List<OfflineSegmentZKMetadata> offlineSegmentZKMetadatas = ZKMetadataProvider
.getOfflineSegmentZKMetadataListForTable(propertyStore, tableName);
for (OfflineSegmentZKMetadata offlineSegmentZKMetadata : offlineSegmentZKMetadatas) {
SegmentMetadata segmentMetadata = new SegmentMetadataImpl(offlineSegmentZKMetadata);
segmentMetadataList.add(segmentMetadata);
}
// Calculate missing segments only for offline tables
int missingSegmentCount = 0;
// Compute the missing segments if there are at least two
if (2 < segmentMetadataList.size()) {
List<Interval> segmentIntervals = new ArrayList<Interval>();
for (SegmentMetadata segmentMetadata : segmentMetadataList) {
Interval timeInterval = segmentMetadata.getTimeInterval();
if (timeInterval != null && TimeUtils.timeValueInValidRange(timeInterval.getStartMillis()) && TimeUtils
.timeValueInValidRange(timeInterval.getEndMillis())) {
segmentIntervals.add(timeInterval);
}
}
List<Interval> missingIntervals = computeMissingIntervals(segmentIntervals, segmentMetadataList.get(0).getTimeGranularity());
missingSegmentCount = missingIntervals.size();
for (Interval missingInterval : missingIntervals) {
LOGGER.warn("Missing data in table {} for time interval {}", tableName, missingInterval);
}
}
// Update the gauge that contains the number of missing segments
_validationMetrics.updateMissingSegmentsGauge(tableName, missingSegmentCount);
// Compute the max segment end time and max segment push time
long maxSegmentEndTime = Long.MIN_VALUE;
long maxSegmentPushTime = Long.MIN_VALUE;
for (SegmentMetadata segmentMetadata : segmentMetadataList) {
Interval segmentInterval = segmentMetadata.getTimeInterval();
if (segmentInterval != null && maxSegmentEndTime < segmentInterval.getEndMillis()) {
maxSegmentEndTime = segmentInterval.getEndMillis();
}
long segmentPushTime = segmentMetadata.getPushTime();
long segmentRefreshTime = segmentMetadata.getRefreshTime();
long segmentUpdateTime = Math.max(segmentPushTime, segmentRefreshTime);
if (maxSegmentPushTime < segmentUpdateTime) {
maxSegmentPushTime = segmentUpdateTime;
}
}
// Update the gauges that contain the delay between the current time and last segment end time
_validationMetrics.updateOfflineSegmentDelayGauge(tableName, maxSegmentEndTime);
_validationMetrics.updateLastPushTimeGauge(tableName, maxSegmentPushTime);
// Update the gauge to contain the total document count in the segments
_validationMetrics.updateTotalDocumentsGauge(tableName, computeOfflineTotalDocumentInSegments(segmentMetadataList));
// Update the gauge to contain the total number of segments for this table
_validationMetrics.updateSegmentCountGauge(tableName, segmentMetadataList.size());
}
public static long computeOfflineTotalDocumentInSegments(List<SegmentMetadata> segmentMetadataList) {
long totalDocumentCount = 0;
for (SegmentMetadata segmentMetadata : segmentMetadataList) {
totalDocumentCount += segmentMetadata.getTotalRawDocs();
}
return totalDocumentCount;
}
public static long computeRealtimeTotalDocumentInSegments(List<SegmentMetadata> segmentMetadataList,
boolean countHLCSegments) {
long totalDocumentCount = 0;
String groupId = "";
for (SegmentMetadata segmentMetadata : segmentMetadataList) {
String segmentName = segmentMetadata.getName();
if (SegmentName.isHighLevelConsumerSegmentName(segmentName)) {
if (countHLCSegments) {
HLCSegmentName hlcSegmentName = new HLCSegmentName(segmentName);
String segmentGroupIdName = hlcSegmentName.getGroupId();
if (groupId.isEmpty()) {
groupId = segmentGroupIdName;
}
// Discard all segments with different groupids as they are replicas
if (groupId.equals(segmentGroupIdName) && segmentMetadata.getTotalRawDocs() >= 0) {
totalDocumentCount += segmentMetadata.getTotalRawDocs();
}
}
} else {
// Low level segments
if (!countHLCSegments) {
totalDocumentCount += segmentMetadata.getTotalRawDocs();
}
}
}
return totalDocumentCount;
}
/**
* Computes a list of missing intervals, given a list of existing intervals and the expected frequency of the
* intervals.
*
* @param segmentIntervals The list of existing intervals
* @param frequency The expected interval frequency
* @return The list of missing intervals
*/
public static List<Interval> computeMissingIntervals(List<Interval> segmentIntervals, Duration frequency) {
// Sanity check for freuency
if (frequency == null) {
return Collections.emptyList();
}
// Default segment granularity to day level if its small than hours.
if (frequency.getMillis() < Duration.standardHours(1).getMillis()) {
frequency = Duration.standardDays(1);
}
// If there are less than two segments, none can be missing
if (segmentIntervals.size() < 2) {
return Collections.emptyList();
}
// Sort the intervals by ascending starting time
List<Interval> sortedSegmentIntervals = new ArrayList<Interval>(segmentIntervals);
Collections.sort(sortedSegmentIntervals, new Comparator<Interval>() {
@Override
public int compare(Interval first, Interval second) {
if (first.getStartMillis() < second.getStartMillis())
return -1;
else if (second.getStartMillis() < first.getStartMillis())
return 1;
return 0;
}
});
// Find the minimum starting time and maximum ending time
final long startTime = sortedSegmentIntervals.get(0).getStartMillis();
long endTime = Long.MIN_VALUE;
for (Interval sortedSegmentInterval : sortedSegmentIntervals) {
if (endTime < sortedSegmentInterval.getEndMillis()) {
endTime = sortedSegmentInterval.getEndMillis();
}
}
final long frequencyMillis = frequency.getMillis();
int lastEndIntervalCount = 0;
List<Interval> missingIntervals = new ArrayList<Interval>(10);
for (Interval segmentInterval : sortedSegmentIntervals) {
int startIntervalCount = (int) ((segmentInterval.getStartMillis() - startTime) / frequencyMillis);
int endIntervalCount = (int) ((segmentInterval.getEndMillis() - startTime) / frequencyMillis);
// If there is at least one complete missing interval between the end of the previous interval and the start of
// the current interval, then mark the missing interval(s) as missing
if (lastEndIntervalCount < startIntervalCount - 1) {
for (int missingIntervalIndex = lastEndIntervalCount + 1; missingIntervalIndex < startIntervalCount; ++missingIntervalIndex) {
missingIntervals.add(new Interval(startTime + frequencyMillis * missingIntervalIndex, startTime + frequencyMillis * (missingIntervalIndex + 1) - 1));
}
}
lastEndIntervalCount = Math.max(lastEndIntervalCount, endIntervalCount);
}
return missingIntervals;
}
/**
* Counts the number of missing segments, given their start times and their expected frequency.
*
* @param sortedStartTimes Start times for the segments, sorted in ascending order.
* @param frequency The expected segment frequency (ie. daily, hourly, etc.)
*/
public static int countMissingSegments(long[] sortedStartTimes, TimeUnit frequency) {
// If there are less than two segments, none can be missing
if (sortedStartTimes.length < 2) {
return 0;
}
final long frequencyMillis = frequency.toMillis(1);
final long halfFrequencyMillis = frequencyMillis / 2;
final long firstStartTime = sortedStartTimes[0];
final long lastStartTime = sortedStartTimes[sortedStartTimes.length - 1];
final int expectedSegmentCount = (int) ((lastStartTime + halfFrequencyMillis - firstStartTime) / frequencyMillis);
int missingSegments = 0;
int currentIndex = 1;
for (int expectedIntervalCount = 1; expectedIntervalCount <= expectedSegmentCount;) {
// Count the number of complete intervals that are found
final int intervalCount =
(int) ((sortedStartTimes[currentIndex] + halfFrequencyMillis - firstStartTime) / frequencyMillis);
// Does this segment have the expected interval count?
if (intervalCount == expectedIntervalCount) {
// Yes, advance both the current index and expected interval count
++expectedIntervalCount;
++currentIndex;
} else {
if (intervalCount < expectedIntervalCount) {
// Duplicate segment, just advance the index
++currentIndex;
} else {
// Missing segment(s), advance the index, increment the number of missing segments by the number of missing
// intervals and set the expected interval to the following one
missingSegments += intervalCount - expectedIntervalCount;
expectedIntervalCount = intervalCount + 1;
++currentIndex;
}
}
}
return missingSegments;
}
}