/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.server.starter.helix;
import com.linkedin.pinot.common.Utils;
import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.config.TableNameBuilder;
import com.linkedin.pinot.common.data.DataManager;
import com.linkedin.pinot.common.data.Schema;
import com.linkedin.pinot.common.metadata.ZKMetadataProvider;
import com.linkedin.pinot.common.metadata.segment.OfflineSegmentZKMetadata;
import com.linkedin.pinot.common.segment.SegmentMetadata;
import com.linkedin.pinot.common.segment.SegmentMetadataLoader;
import com.linkedin.pinot.common.segment.fetcher.SegmentFetcherFactory;
import com.linkedin.pinot.common.utils.CommonConstants;
import com.linkedin.pinot.common.utils.TarGzCompressionUtils;
import com.linkedin.pinot.core.segment.index.SegmentMetadataImpl;
import com.linkedin.pinot.core.segment.index.loader.LoaderUtils;
import com.linkedin.pinot.core.segment.index.loader.V3RemoveIndexException;
import java.io.File;
import java.util.concurrent.TimeUnit;
import org.apache.commons.configuration.Configuration;
import org.apache.commons.io.FileUtils;
import org.apache.helix.ZNRecord;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class SegmentFetcherAndLoader {
private static final Logger LOGGER = LoggerFactory.getLogger(SegmentFetcherAndLoader.class);
private final ZkHelixPropertyStore<ZNRecord> _propertyStore;
private final DataManager _dataManager;
private final SegmentMetadataLoader _metadataLoader;
private final String _instanceId;
private final int _segmentLoadMaxRetryCount;
private final long _segmentLoadMinRetryDelayMs; // Min delay (in msecs) between retries
public SegmentFetcherAndLoader(DataManager dataManager, SegmentMetadataLoader metadataLoader,
ZkHelixPropertyStore<ZNRecord> propertyStore, Configuration pinotHelixProperties, String instanceId) {
_propertyStore = propertyStore;
_dataManager = dataManager;
_metadataLoader = metadataLoader;
_instanceId = instanceId;
int maxRetries = Integer.parseInt(CommonConstants.Server.DEFAULT_SEGMENT_LOAD_MAX_RETRY_COUNT);
try {
maxRetries = pinotHelixProperties
.getInt(CommonConstants.Server.CONFIG_OF_SEGMENT_LOAD_MAX_RETRY_COUNT, maxRetries);
} catch (Exception e) {
// Keep the default value
}
_segmentLoadMaxRetryCount = maxRetries;
long minRetryDelayMillis =
Long.parseLong(CommonConstants.Server.DEFAULT_SEGMENT_LOAD_MIN_RETRY_DELAY_MILLIS);
try {
minRetryDelayMillis = pinotHelixProperties.getLong(
CommonConstants.Server.CONFIG_OF_SEGMENT_LOAD_MIN_RETRY_DELAY_MILLIS,
minRetryDelayMillis);
} catch (Exception e) {
// Keep the default value
}
_segmentLoadMinRetryDelayMs = minRetryDelayMillis;
SegmentFetcherFactory.initSegmentFetcherFactory(pinotHelixProperties);
}
public void addOrReplaceOfflineSegment(String tableName, String segmentId, boolean retryOnFailure) {
OfflineSegmentZKMetadata offlineSegmentZKMetadata =
ZKMetadataProvider.getOfflineSegmentZKMetadata(_propertyStore, tableName, segmentId);
// Try to load table schema from Helix property store.
// This schema is used for adding default values for newly added columns.
Schema schema = ZKMetadataProvider.getOfflineTableSchema(_propertyStore, tableName);
LOGGER.info("Adding or replacing segment {} for table {}, metadata {}", segmentId, tableName, offlineSegmentZKMetadata);
try {
SegmentMetadata segmentMetadataForCheck = new SegmentMetadataImpl(offlineSegmentZKMetadata);
// We lock the segment in order to get its metadata, and then release the lock, so it is possible
// that the segment is dropped after we get its metadata.
SegmentMetadata localSegmentMetadata = _dataManager.getSegmentMetadata(tableName, segmentId);
if (localSegmentMetadata == null) {
LOGGER.info("Segment {} of table {} is not loaded in memory, checking disk", segmentId, tableName);
File indexDir = new File(getSegmentLocalDirectory(tableName, segmentId));
// Restart during segment reload might leave segment in inconsistent state (index directory might not exist but
// segment backup directory existed), need to first try to recover from reload failure before checking the
// existence of the index directory and loading segment metadata from it
LoaderUtils.reloadFailureRecovery(indexDir);
if (indexDir.exists()) {
LOGGER.info("Segment {} of table {} found on disk, attempting to load it", segmentId, tableName);
try {
localSegmentMetadata = new SegmentMetadataImpl(indexDir);
LOGGER.info("Found segment {} of table {} with crc {} on disk", segmentId, tableName, localSegmentMetadata.getCrc());
} catch (Exception e) {
// The localSegmentDir should help us get the table name,
LOGGER.error("Failed to load segment metadata from {}. Deleting it.", indexDir, e);
FileUtils.deleteQuietly(indexDir);
localSegmentMetadata = null;
}
try {
if (!isNewSegmentMetadata(localSegmentMetadata, segmentMetadataForCheck, segmentId, tableName)) {
LOGGER.info("Segment metadata same as before, loading {} of table {} (crc {}) from disk", segmentId,
tableName, localSegmentMetadata.getCrc());
AbstractTableConfig tableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
_dataManager.addSegment(localSegmentMetadata, tableConfig, schema);
// TODO Update zk metadata with CRC for this instance
return;
}
} catch (V3RemoveIndexException e) {
LOGGER.info(
"Unable to remove local index from V3 format segment: {}, table: {}, try to reload it from controller.",
segmentId, tableName, e);
FileUtils.deleteQuietly(indexDir);
localSegmentMetadata = null;
} catch (Exception e) {
LOGGER.error("Failed to load {} of table {} from local, will try to reload it from controller!", segmentId,
tableName, e);
FileUtils.deleteQuietly(indexDir);
localSegmentMetadata = null;
}
}
}
// There is a very unlikely race condition that we may have gotten the metadata of a
// segment that was not dropped when we checked, but was dropped after the check above.
// That is possible only if we get two helix transitions (to drop, and then to add back) the
// segment at the same, or very close to each other.If the race condition triggers, and the
// two segments are same in metadata, then we may end up NOT adding back the segment
// that is in the process of being dropped.
// If we get here, then either it is the case that we have the segment loaded in memory (and therefore present
// in disk) or, we need to load from the server. In the former case, we still need to check if the metadata
// that we have is different from that in zookeeper.
if (isNewSegmentMetadata(localSegmentMetadata, segmentMetadataForCheck, segmentId, tableName)) {
if (localSegmentMetadata == null) {
LOGGER.info("Loading new segment {} of table {} from controller", segmentId, tableName);
} else {
LOGGER.info("Trying to refresh segment {} of table {} with new data.", segmentId, tableName);
}
int retryCount;
int maxRetryCount = 1;
if (retryOnFailure) {
maxRetryCount = _segmentLoadMaxRetryCount;
}
for (retryCount = 0; retryCount < maxRetryCount; ++retryCount) {
long attemptStartTime = System.currentTimeMillis();
try {
AbstractTableConfig tableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
final String uri = offlineSegmentZKMetadata.getDownloadUrl();
final String localSegmentDir = downloadSegmentToLocal(uri, tableName, segmentId);
final SegmentMetadata segmentMetadata =
_metadataLoader.loadIndexSegmentMetadataFromDir(localSegmentDir);
_dataManager.addSegment(segmentMetadata, tableConfig, schema);
LOGGER.info("Downloaded segment {} of table {} crc {} from controller", segmentId, tableName, segmentMetadata.getCrc());
// Successfully loaded the segment, break out of the retry loop
break;
} catch (Exception e) {
long attemptDurationMillis = System.currentTimeMillis() - attemptStartTime;
LOGGER.warn("Caught exception while loading segment " + segmentId + "(table " + tableName + "), attempt "
+ (retryCount + 1) + " of " + maxRetryCount, e);
// Do we need to wait for the next retry attempt?
if (retryCount < maxRetryCount - 1) {
// Exponentially back off, wait for (minDuration + attemptDurationMillis) *
// 1.0..(2^retryCount)+1.0
double maxRetryDurationMultiplier = Math.pow(2.0, (retryCount + 1));
double retryDurationMultiplier = Math.random() * maxRetryDurationMultiplier + 1.0;
long waitTime =
(long) ((_segmentLoadMinRetryDelayMs + attemptDurationMillis) * retryDurationMultiplier);
LOGGER.warn("Waiting for " + TimeUnit.MILLISECONDS.toSeconds(waitTime)
+ " seconds to retry(" + segmentId + " of table " + tableName);
long waitEndTime = System.currentTimeMillis() + waitTime;
while (System.currentTimeMillis() < waitEndTime) {
try {
Thread.sleep(Math.max(System.currentTimeMillis() - waitEndTime, 1L));
} catch (InterruptedException ie) {
// Ignore spurious wakeup
}
}
}
}
}
if (retryCount == maxRetryCount) {
throw new RuntimeException(
"Failed to download and load segment " + segmentId + " (table " + tableName + " after " + retryCount
+ " retries");
}
} else {
LOGGER.info("Got already loaded segment {} of table {} crc {} again, will do nothing.", segmentId, tableName, localSegmentMetadata.getCrc());
}
} catch (final Exception e) {
LOGGER.error("Cannot load segment : " + segmentId + " for table " + tableName, e);
Utils.rethrowException(e);
throw new AssertionError("Should not reach this");
}
}
private boolean isNewSegmentMetadata(SegmentMetadata segmentMetadataFromServer,
SegmentMetadata segmentMetadataForCheck, String segmentName, String tableName) {
if (segmentMetadataFromServer == null || segmentMetadataForCheck == null) {
LOGGER.info("segmentMetadataForCheck = null? {}, segmentMetadataFromServer = null? {} for {} of table {}",
segmentMetadataForCheck == null, segmentMetadataFromServer == null, segmentName, tableName);
return true;
}
LOGGER.info("segmentMetadataForCheck.crc={},segmentMetadataFromServer.crc={} for {} of table {}",
segmentMetadataForCheck.getCrc(), segmentMetadataFromServer.getCrc(), segmentName, tableName);
if ((!segmentMetadataFromServer.getCrc().equalsIgnoreCase("null"))
&& (segmentMetadataFromServer.getCrc().equals(segmentMetadataForCheck.getCrc()))) {
return false;
}
return true;
}
private String downloadSegmentToLocal(String uri, String tableName, String segmentId)
throws Exception {
File tempSegmentFile = null;
File tempFile = null;
try {
tempSegmentFile = new File(_dataManager.getSegmentFileDirectory() + "/"
+ tableName + "/temp_" + segmentId + "_" + System.currentTimeMillis());
tempFile = new File(_dataManager.getSegmentFileDirectory(), segmentId + ".tar.gz");
SegmentFetcherFactory.getSegmentFetcherBasedOnURI(uri).fetchSegmentToLocal(uri, tempFile);
LOGGER.info("Downloaded file from {} to {}; Length of downloaded file: {}; segmentName: {}; table: {}", uri, tempFile,
tempFile.length(), segmentId, tableName);
LOGGER.info("Trying to decompress segment tar file from {} to {} for table {}", tempFile, tempSegmentFile, tableName);
TarGzCompressionUtils.unTar(tempFile, tempSegmentFile);
FileUtils.deleteQuietly(tempFile);
final File segmentDir = new File(new File(_dataManager.getSegmentDataDirectory(), tableName), segmentId);
Thread.sleep(1000);
if (segmentDir.exists()) {
LOGGER.info("Deleting the directory {} and recreating it again table {} ", segmentDir.getAbsolutePath(), tableName);
FileUtils.deleteDirectory(segmentDir);
}
LOGGER.info("Move the dir - " + tempSegmentFile.listFiles()[0] + " to "
+ segmentDir.getAbsolutePath() + " for " + segmentId + " of table " + tableName);
FileUtils.moveDirectory(tempSegmentFile.listFiles()[0], segmentDir);
FileUtils.deleteDirectory(tempSegmentFile);
Thread.sleep(1000);
LOGGER.info("Was able to succesfully rename the dir to match the segment {} for table {}", segmentId, tableName);
new File(segmentDir, "finishedLoading").createNewFile();
return segmentDir.getAbsolutePath();
} catch (Exception e) {
FileUtils.deleteQuietly(tempSegmentFile);
FileUtils.deleteQuietly(tempFile);
LOGGER.error("Caught exception downloading segment {} for table {}", segmentId, tableName, e);
Utils.rethrowException(e);
throw new AssertionError("Should not reach this");
}
}
public String getSegmentLocalDirectory(String tableName, String segmentId) {
return _dataManager.getSegmentDataDirectory() + "/" + tableName + "/" + segmentId;
}
public void reloadSegment(String tableName, String segmentName)
throws Exception {
SegmentMetadata segmentMetadata = _dataManager.getSegmentMetadata(tableName, segmentName);
if (segmentMetadata == null) {
LOGGER.warn("Cannot locate segment: {} in table: {]", segmentName, tableName);
return;
}
String indexDir = segmentMetadata.getIndexDir();
if (indexDir == null) {
LOGGER.info("Skip reloading REALTIME consuming segment: {} in table: {}", segmentName, tableName);
return;
}
CommonConstants.Helix.TableType tableType = TableNameBuilder.getTableTypeFromTableName(tableName);
if (tableType == null) {
LOGGER.error("Invalid table name: {}, neither OFFLINE or REALTIME table", tableName);
return;
}
switch (tableType) {
case OFFLINE:
AbstractTableConfig offlineTableConfig = ZKMetadataProvider.getOfflineTableConfig(_propertyStore, tableName);
// For OFFLINE table, try to get schema for default columns
Schema schema = ZKMetadataProvider.getOfflineTableSchema(_propertyStore, tableName);
_dataManager.reloadSegment(segmentMetadata, tableType, offlineTableConfig, schema);
break;
case REALTIME:
AbstractTableConfig realtimeTableConfig = ZKMetadataProvider.getRealtimeTableConfig(_propertyStore, tableName);
// For REALTIME table, ignore schema for default columns
_dataManager.reloadSegment(segmentMetadata, tableType, realtimeTableConfig, null);
break;
default:
LOGGER.error("Invalid table name: {}, neither OFFLINE or REALTIME table", tableName);
break;
}
}
}