/** * Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com) * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.linkedin.pinot.controller.helix.core.retention; import com.linkedin.pinot.common.config.AbstractTableConfig; import com.linkedin.pinot.common.config.SegmentsValidationAndRetentionConfig; import com.linkedin.pinot.common.config.TableNameBuilder; import com.linkedin.pinot.common.metadata.ZKMetadataProvider; import com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata; import com.linkedin.pinot.common.metadata.segment.RealtimeSegmentZKMetadata; import com.linkedin.pinot.common.metadata.segment.SegmentZKMetadata; import com.linkedin.pinot.common.utils.CommonConstants; import com.linkedin.pinot.common.utils.CommonConstants.Helix.TableType; import com.linkedin.pinot.common.utils.CommonConstants.Segment.Realtime.Status; import com.linkedin.pinot.common.utils.SegmentName; import com.linkedin.pinot.common.utils.helix.HelixHelper; import com.linkedin.pinot.controller.helix.core.PinotHelixResourceManager; import com.linkedin.pinot.controller.helix.core.retention.strategy.RetentionStrategy; import com.linkedin.pinot.controller.helix.core.retention.strategy.TimeRetentionStrategy; import java.util.ArrayList; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ThreadFactory; import java.util.concurrent.TimeUnit; import javax.annotation.Nonnull; import org.apache.helix.ZNRecord; import org.apache.helix.model.IdealState; import org.apache.helix.store.zk.ZkHelixPropertyStore; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * RetentionManager is scheduled to run only on Leader controller. * It will first scan the table configs to get segment retention strategy then * do data retention.. * * */ public class RetentionManager { public static final Logger LOGGER = LoggerFactory.getLogger(RetentionManager.class); private final PinotHelixResourceManager _pinotHelixResourceManager; private final Map<String, RetentionStrategy> _tableDeletionStrategy = new HashMap<>(); private final Map<String, List<SegmentZKMetadata>> _segmentMetadataMap = new HashMap<>(); private final Object _lock = new Object(); private final ScheduledExecutorService _executorService; private final int _runFrequencyInSeconds; private final int _deletedSegmentsRetentionInDays; private static final int RETENTION_TIME_FOR_OLD_LLC_SEGMENTS_DAYS = 5; private static final int DEFAULT_RETENTION_FOR_DELETED_SEGMENTS_DAYS = 7; public RetentionManager(PinotHelixResourceManager pinotHelixResourceManager, int runFrequencyInSeconds, int deletedSegmentsRetentionInDays) { _pinotHelixResourceManager = pinotHelixResourceManager; _runFrequencyInSeconds = runFrequencyInSeconds; _deletedSegmentsRetentionInDays = deletedSegmentsRetentionInDays; _executorService = Executors.newSingleThreadScheduledExecutor(new ThreadFactory() { @Override public Thread newThread(@Nonnull Runnable runnable) { Thread thread = new Thread(runnable); thread.setName("PinotRetentionManagerExecutorService"); return thread; } }); } public RetentionManager(PinotHelixResourceManager pinotHelixResourceManager, int runFrequencyInSeconds) { this(pinotHelixResourceManager, runFrequencyInSeconds, DEFAULT_RETENTION_FOR_DELETED_SEGMENTS_DAYS); } public static long getRetentionTimeForOldLLCSegmentsDays() { return RETENTION_TIME_FOR_OLD_LLC_SEGMENTS_DAYS; } public void start() { scheduleRetentionThreadWithFrequency(_runFrequencyInSeconds); LOGGER.info("RetentionManager is started!"); } private void scheduleRetentionThreadWithFrequency(int runFrequencyInSeconds) { _executorService.scheduleWithFixedDelay(new Runnable() { @Override public void run() { synchronized (getLock()) { execute(); } } }, Math.min(50, runFrequencyInSeconds), runFrequencyInSeconds, TimeUnit.SECONDS); } private Object getLock() { return _lock; } private void execute() { try { if (_pinotHelixResourceManager.isLeader()) { LOGGER.info("Trying to run retentionManager!"); updateDeletionStrategiesForEntireCluster(); LOGGER.info("Finished update deletion strategies for entire cluster!"); updateSegmentMetadataForEntireCluster(); LOGGER.info("Finished update segment metadata for entire cluster!"); scanSegmentMetadataAndPurge(); LOGGER.info("Finished segment purge for entire cluster!"); removeAgedDeletedSegments(); LOGGER.info("Finished remove aged deleted segments!"); } else { LOGGER.info("Not leader of the controller, sleep!"); } } catch (Exception e) { LOGGER.error("Caught exception while running retention", e); } } private void scanSegmentMetadataAndPurge() { for (String tableName : _segmentMetadataMap.keySet()) { List<SegmentZKMetadata> segmentZKMetadataList = _segmentMetadataMap.get(tableName); List<String> segmentsToDelete = new ArrayList<>(128); IdealState idealState = null; try { if (TableNameBuilder.getTableTypeFromTableName(tableName).equals(TableType.REALTIME)) { idealState = HelixHelper.getTableIdealState(_pinotHelixResourceManager.getHelixZkManager(), tableName); } } catch (Exception e) { LOGGER.warn("Could not get idealstate for {}", tableName, e); // Ignore, worst case we have some old inactive segments in place. } for (SegmentZKMetadata segmentZKMetadata : segmentZKMetadataList) { RetentionStrategy deletionStrategy; deletionStrategy = _tableDeletionStrategy.get(tableName); if (deletionStrategy == null) { LOGGER.info("No Retention strategy found for segment: {}", segmentZKMetadata.getSegmentName()); continue; } if (segmentZKMetadata instanceof RealtimeSegmentZKMetadata) { final RealtimeSegmentZKMetadata realtimeSegmentZKMetadata = (RealtimeSegmentZKMetadata)segmentZKMetadata; if (realtimeSegmentZKMetadata.getStatus() == Status.IN_PROGRESS) { final String segmentId = realtimeSegmentZKMetadata.getSegmentName(); if (SegmentName.isHighLevelConsumerSegmentName(segmentId)) { continue; } // This is an in-progress LLC segment. Delete any old ones hanging around. Do not delete // segments that are current since there may be a race with the ValidationManager trying to // auto-create LLC segments. if (shouldDeleteInProgressLLCSegment(segmentId, idealState, realtimeSegmentZKMetadata)) { segmentsToDelete.add(segmentId); } continue; } } if (deletionStrategy.isPurgeable(segmentZKMetadata)) { LOGGER.info("Marking segment to delete: {}", segmentZKMetadata.getSegmentName()); segmentsToDelete.add(segmentZKMetadata.getSegmentName()); } } if (segmentsToDelete.size() > 0) { LOGGER.info("Trying to delete {} segments for table {}", segmentsToDelete.size(), tableName); _pinotHelixResourceManager.deleteSegments(tableName, segmentsToDelete); } } } private void removeAgedDeletedSegments() { // Trigger clean-up for deleted segments from the deleted directory _pinotHelixResourceManager.getSegmentDeletionManager().removeAgedDeletedSegments(_deletedSegmentsRetentionInDays); } private boolean shouldDeleteInProgressLLCSegment(final String segmentId, final IdealState idealState, RealtimeSegmentZKMetadata segmentZKMetadata) { if (idealState == null) { return false; } Map<String, String> stateMap = idealState.getInstanceStateMap(segmentId); if (stateMap == null) { // segment is there in propertystore but not in idealstate. mark for deletion return true; } else { Set<String> states = new HashSet<>(stateMap.values()); if (states.size() == 1 && states .contains(CommonConstants.Helix.StateModel.SegmentOnlineOfflineStateModel.OFFLINE)) { // All replicas of this segment are offline, delete it if it is old enough final long now = System.currentTimeMillis(); if (now - segmentZKMetadata.getCreationTime() >= TimeUnit.DAYS.toMillis( RETENTION_TIME_FOR_OLD_LLC_SEGMENTS_DAYS)) { return true; } } } return false; } private void updateDeletionStrategiesForEntireCluster() { List<String> tableNames = _pinotHelixResourceManager.getAllTables(); for (String tableName : tableNames) { updateDeletionStrategyForTable(tableName); } } private void updateDeletionStrategyForTable(String tableName) { TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName); assert tableType != null; switch (tableType) { case OFFLINE: updateDeletionStrategyForOfflineTable(tableName); break; case REALTIME: updateDeletionStrategyForRealtimeTable(tableName); break; default: throw new IllegalArgumentException("No table type matches table name: " + tableName); } } /** * Update deletion strategy for offline table. * <ul> * <li>Keep the current deletion strategy when one of the followings happened: * <ul> * <li>Failed to fetch the retention config.</li> * <li>Push type is not valid (neither 'APPEND' nor 'REFRESH').</li> * </ul> * <li> * Remove the deletion strategy when one of the followings happened: * <ul> * <li>Push type is set to 'REFRESH'.</li> * <li>No valid retention time is set.</li> * </ul> * </li> * <li>Update the deletion strategy when push type is set to 'APPEND' and valid retention time is set.</li> * </ul> */ private void updateDeletionStrategyForOfflineTable(String offlineTableName) { // Fetch table config. AbstractTableConfig offlineTableConfig; try { offlineTableConfig = ZKMetadataProvider.getOfflineTableConfig(_pinotHelixResourceManager.getPropertyStore(), offlineTableName); if (offlineTableConfig == null) { LOGGER.error("Table config is null, skip updating deletion strategy for table: {}.", offlineTableName); return; } } catch (Exception e) { LOGGER.error("Caught exception while fetching table config, skip updating deletion strategy for table: {}.", offlineTableName, e); return; } // Fetch validation config. SegmentsValidationAndRetentionConfig validationConfig = offlineTableConfig.getValidationConfig(); if (validationConfig == null) { LOGGER.error("Validation config is null, skip updating deletion strategy for table: {}.", offlineTableName); return; } // Fetch push type. String segmentPushType = validationConfig.getSegmentPushType(); if ((segmentPushType == null) || (!segmentPushType.equalsIgnoreCase("APPEND") && !segmentPushType.equalsIgnoreCase("REFRESH"))) { LOGGER.error( "Segment push type: {} is not valid ('APPEND' or 'REFRESH'), skip updating deletion strategy for table: {}.", segmentPushType, offlineTableName); return; } if (segmentPushType.equalsIgnoreCase("REFRESH")) { LOGGER.info("Segment push type is set to 'REFRESH', remove deletion strategy for table: {}.", offlineTableName); _tableDeletionStrategy.remove(offlineTableName); return; } // Fetch retention time unit and value. String retentionTimeUnit = validationConfig.getRetentionTimeUnit(); String retentionTimeValue = validationConfig.getRetentionTimeValue(); if (((retentionTimeUnit == null) || retentionTimeUnit.isEmpty()) || ((retentionTimeValue == null) || retentionTimeValue.isEmpty())) { LOGGER.info("Retention time unit/value is not set, remove deletion strategy for table: {}.", offlineTableName); _tableDeletionStrategy.remove(offlineTableName); return; } // Update time retention strategy. try { TimeRetentionStrategy timeRetentionStrategy = new TimeRetentionStrategy(retentionTimeUnit, retentionTimeValue); _tableDeletionStrategy.put(offlineTableName, timeRetentionStrategy); LOGGER.info("Updated deletion strategy for table: {} using retention time: {} {}.", offlineTableName, retentionTimeValue, retentionTimeUnit); } catch (Exception e) { LOGGER.error( "Caught exception while building deletion strategy with retention time: {} {], remove deletion strategy for table: {}.", retentionTimeValue, retentionTimeUnit, offlineTableName); _tableDeletionStrategy.remove(offlineTableName); } } /** * Update deletion strategy for realtime table. * <ul> * <li>Keep the current deletion strategy when failed to get a valid retention time</li> * <li>Update the deletion strategy when valid retention time is set.</li> * </ul> * The reason for this is that we don't allow realtime table without deletion strategy. */ private void updateDeletionStrategyForRealtimeTable(String realtimeTableName) { try { AbstractTableConfig realtimeTableConfig = ZKMetadataProvider.getRealtimeTableConfig(_pinotHelixResourceManager.getPropertyStore(), realtimeTableName); assert realtimeTableConfig != null; SegmentsValidationAndRetentionConfig validationConfig = realtimeTableConfig.getValidationConfig(); TimeRetentionStrategy timeRetentionStrategy = new TimeRetentionStrategy(validationConfig.getRetentionTimeUnit(), validationConfig.getRetentionTimeValue()); _tableDeletionStrategy.put(realtimeTableName, timeRetentionStrategy); } catch (Exception e) { LOGGER.error("Caught exception while updating deletion strategy, skip updating deletion strategy for table: {}.", realtimeTableName, e); } } private void updateSegmentMetadataForEntireCluster() { List<String> tableNames = _pinotHelixResourceManager.getAllTables(); for (String tableName : tableNames) { _segmentMetadataMap.put(tableName, retrieveSegmentMetadataForTable(tableName)); } } private List<SegmentZKMetadata> retrieveSegmentMetadataForTable(String tableName) { List<SegmentZKMetadata> segmentMetadataList = new ArrayList<>(); ZkHelixPropertyStore<ZNRecord> propertyStore = _pinotHelixResourceManager.getPropertyStore(); TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName); assert tableType != null; switch (tableType) { case OFFLINE: List<OfflineSegmentZKMetadata> offlineSegmentZKMetadatas = ZKMetadataProvider.getOfflineSegmentZKMetadataListForTable(propertyStore, tableName); for (OfflineSegmentZKMetadata offlineSegmentZKMetadata : offlineSegmentZKMetadatas) { segmentMetadataList.add(offlineSegmentZKMetadata); } break; case REALTIME: List<RealtimeSegmentZKMetadata> realtimeSegmentZKMetadatas = ZKMetadataProvider.getRealtimeSegmentZKMetadataListForTable(propertyStore, tableName); for (RealtimeSegmentZKMetadata realtimeSegmentZKMetadata : realtimeSegmentZKMetadatas) { segmentMetadataList.add(realtimeSegmentZKMetadata); } break; default: throw new IllegalArgumentException("No table type matches table name: " + tableName); } return segmentMetadataList; } public void stop() { _executorService.shutdown(); } }