/* This file is part of VoltDB.
* Copyright (C) 2008-2017 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.sysprocs.saverestore;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicInteger;
import org.json_voltpatches.JSONObject;
import org.voltcore.utils.CoreUtils;
import org.voltcore.utils.InstanceId;
import org.voltdb.DefaultSnapshotDataTarget;
import org.voltdb.ExtensibleSnapshotDigestData;
import org.voltdb.SnapshotDataFilter;
import org.voltdb.SnapshotDataTarget;
import org.voltdb.SnapshotFormat;
import org.voltdb.SnapshotSiteProcessor;
import org.voltdb.SnapshotTableTask;
import org.voltdb.SystemProcedureExecutionContext;
import org.voltdb.TheHashinator;
import org.voltdb.TheHashinator.HashinatorType;
import org.voltdb.VoltDB;
import org.voltdb.VoltTable;
import org.voltdb.catalog.Table;
import org.voltdb.compiler.deploymentfile.DrRoleType;
import org.voltdb.dtxn.SiteTracker;
import org.voltdb.export.ExportManager;
import org.voltdb.sysprocs.SnapshotRegistry;
import org.voltdb.utils.CatalogUtil;
import com.google_voltpatches.common.collect.Maps;
/**
* Create a snapshot write plan for a native snapshot. This will attempt to
* write every table at every site on every node to disk, with one file per
* table per node. Replicated tables are written on every node, but the
* responsibility for writing them is spread round-robin across the sites on a
* node. Partitioned tables are written to the same target per table by every
* site on a node.
*/
public class NativeSnapshotWritePlan extends SnapshotWritePlan
{
@Override
public Callable<Boolean> createSetup(String file_path, String pathType,
String file_nonce,
long txnId,
Map<Integer, Long> partitionTransactionIds,
JSONObject jsData,
SystemProcedureExecutionContext context,
final VoltTable result,
ExtensibleSnapshotDigestData extraSnapshotData,
SiteTracker tracker,
HashinatorSnapshotData hashinatorData,
long timestamp)
{
return createSetupInternal(file_path, pathType, file_nonce, txnId, partitionTransactionIds,
jsData, context, result, extraSnapshotData, tracker, hashinatorData,
timestamp, context.getNumberOfPartitions());
}
Callable<Boolean> createSetupInternal(String file_path, String pathType,
String file_nonce,
long txnId,
Map<Integer, Long> partitionTransactionIds,
JSONObject jsData,
SystemProcedureExecutionContext context,
final VoltTable result,
ExtensibleSnapshotDigestData extraSnapshotData,
SiteTracker tracker,
HashinatorSnapshotData hashinatorData,
long timestamp,
int newPartitionCount)
{
assert(SnapshotSiteProcessor.ExecutionSitesCurrentlySnapshotting.isEmpty());
if (TheHashinator.getConfiguredHashinatorType() == HashinatorType.ELASTIC && hashinatorData == null) {
throw new RuntimeException("No hashinator data provided for elastic hashinator type.");
}
final SnapshotRequestConfig config = new SnapshotRequestConfig(jsData, context.getDatabase());
final Table[] tableArray;
if (config.tables.length == 0 && (jsData == null || !jsData.has("tables"))) {
tableArray = SnapshotUtil.getTablesToSave(context.getDatabase()).toArray(new Table[0]);
} else {
tableArray = config.tables;
}
m_snapshotRecord =
SnapshotRegistry.startSnapshot(
txnId,
context.getHostId(),
file_path,
file_nonce,
SnapshotFormat.NATIVE,
tableArray);
final ArrayList<SnapshotTableTask> partitionedSnapshotTasks =
new ArrayList<SnapshotTableTask>();
final ArrayList<SnapshotTableTask> replicatedSnapshotTasks =
new ArrayList<SnapshotTableTask>();
for (final Table table : tableArray) {
final SnapshotTableTask task =
new SnapshotTableTask(
table,
new SnapshotDataFilter[0],
null,
false);
SNAP_LOG.debug("ADDING TASK: " + task);
if (table.getIsreplicated()) {
replicatedSnapshotTasks.add(task);
} else {
partitionedSnapshotTasks.add(task);
}
result.addRow(context.getHostId(),
CoreUtils.getHostnameOrAddress(),
table.getTypeName(),
"SUCCESS",
"");
}
if (tableArray.length > 0 && replicatedSnapshotTasks.isEmpty() && partitionedSnapshotTasks.isEmpty()) {
SnapshotRegistry.discardSnapshot(m_snapshotRecord);
}
// Native snapshots place the partitioned tasks on every site and round-robin the
// replicated tasks across all the sites on every host
placePartitionedTasks(partitionedSnapshotTasks, tracker.getSitesForHost(context.getHostId()));
placeReplicatedTasks(replicatedSnapshotTasks, tracker.getSitesForHost(context.getHostId()));
boolean isTruncationSnapshot = true;
if (jsData != null) {
isTruncationSnapshot = jsData.has("truncReqId");
}
// All IO work will be deferred and be run on the dedicated snapshot IO thread
return createDeferredSetup(file_path, pathType, file_nonce, txnId, partitionTransactionIds,
context, extraSnapshotData, tracker, hashinatorData, timestamp,
newPartitionCount, tableArray, m_snapshotRecord, partitionedSnapshotTasks,
replicatedSnapshotTasks, isTruncationSnapshot);
}
private Callable<Boolean> createDeferredSetup(final String file_path,
final String pathType,
final String file_nonce,
final long txnId,
final Map<Integer, Long> partitionTransactionIds,
final SystemProcedureExecutionContext context,
final ExtensibleSnapshotDigestData extraSnapshotData,
final SiteTracker tracker,
final HashinatorSnapshotData hashinatorData,
final long timestamp,
final int newPartitionCount,
final Table[] tables,
final SnapshotRegistry.Snapshot snapshotRecord,
final ArrayList<SnapshotTableTask> partitionedSnapshotTasks,
final ArrayList<SnapshotTableTask> replicatedSnapshotTasks,
final boolean isTruncationSnapshot)
{
return new Callable<Boolean>() {
private final HashMap<Integer, SnapshotDataTarget> m_createdTargets = Maps.newHashMap();
@Override
public Boolean call() throws Exception
{
final AtomicInteger numTables = new AtomicInteger(tables.length);
NativeSnapshotWritePlan.createFileBasedCompletionTasks(file_path, pathType, file_nonce,
txnId, partitionTransactionIds, context, extraSnapshotData,
hashinatorData,
timestamp,
newPartitionCount,
tables);
for (SnapshotTableTask task : replicatedSnapshotTasks) {
SnapshotDataTarget target = getSnapshotDataTarget(numTables, task);
task.setTarget(target);
}
for (SnapshotTableTask task : partitionedSnapshotTasks) {
SnapshotDataTarget target = getSnapshotDataTarget(numTables, task);
task.setTarget(target);
}
if (isTruncationSnapshot) {
// Only sync the DR Log on Native Snapshots
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(new Runnable() {
@Override
public void run()
{
context.forceAllDRNodeBuffersToDisk(false);
}
});
}
// Sync export buffer for all types of snapshot
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(new Runnable() {
@Override
public void run()
{
ExportManager.sync(false);
}
});
return true;
}
private SnapshotDataTarget getSnapshotDataTarget(AtomicInteger numTables, SnapshotTableTask task)
throws IOException
{
SnapshotDataTarget target = m_createdTargets.get(task.m_table.getRelativeIndex());
if (target == null) {
target = createDataTargetForTable(file_path, file_nonce, task.m_table, txnId,
context.getHostId(), context.getCluster().getTypeName(),
context.getDatabase().getTypeName(), context.getNumberOfPartitions(),
DrRoleType.XDCR.value().equals(context.getCluster().getDrrole()),
tracker, timestamp, numTables, snapshotRecord);
m_createdTargets.put(task.m_table.getRelativeIndex(), target);
}
return target;
}
};
}
private SnapshotDataTarget createDataTargetForTable(String file_path,
String file_nonce,
Table table,
long txnId,
int hostId,
String clusterName,
String databaseName,
int partitionCount,
boolean isActiveActiveDRed,
SiteTracker tracker,
long timestamp,
AtomicInteger numTables,
SnapshotRegistry.Snapshot snapshotRecord)
throws IOException
{
SnapshotDataTarget sdt;
File saveFilePath = SnapshotUtil.constructFileForTable(
table,
file_path,
file_nonce,
SnapshotFormat.NATIVE,
hostId);
if (isActiveActiveDRed && table.getIsdred()) {
sdt = new DefaultSnapshotDataTarget(saveFilePath,
hostId,
clusterName,
databaseName,
table.getTypeName(),
partitionCount,
table.getIsreplicated(),
tracker.getPartitionsForHost(hostId),
CatalogUtil.getVoltTable(table, CatalogUtil.DR_HIDDEN_COLUMN_INFO),
txnId,
timestamp);
}
else {
sdt = new DefaultSnapshotDataTarget(saveFilePath,
hostId,
clusterName,
databaseName,
table.getTypeName(),
partitionCount,
table.getIsreplicated(),
tracker.getPartitionsForHost(hostId),
CatalogUtil.getVoltTable(table),
txnId,
timestamp);
}
m_targets.add(sdt);
final Runnable onClose = new TargetStatsClosure(sdt, table.getTypeName(), numTables, snapshotRecord);
sdt.setOnCloseHandler(onClose);
return sdt;
}
static void createFileBasedCompletionTasks(
String file_path, String pathType, String file_nonce,
long txnId, Map<Integer, Long> partitionTransactionIds,
SystemProcedureExecutionContext context,
ExtensibleSnapshotDigestData extraSnapshotData,
HashinatorSnapshotData hashinatorData,
long timestamp, int newPartitionCount,
Table[] tables) throws IOException
{
InstanceId instId = VoltDB.instance().getHostMessenger().getInstanceId();
Runnable completionTask = SnapshotUtil.writeSnapshotDigest(
txnId,
context.getCatalogCRC(),
file_path,
pathType,
file_nonce,
Arrays.asList(tables),
context.getHostId(),
partitionTransactionIds,
extraSnapshotData,
instId,
timestamp,
newPartitionCount,
context.getClusterId());
if (completionTask != null) {
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(completionTask);
}
if (hashinatorData != null) {
completionTask = SnapshotUtil.writeHashinatorConfig(
instId, file_path, file_nonce, context.getHostId(), hashinatorData);
if (completionTask != null) {
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(completionTask);
}
}
completionTask = SnapshotUtil.writeSnapshotCatalog(file_path, file_nonce);
if (completionTask != null) {
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(completionTask);
}
completionTask = SnapshotUtil.writeSnapshotCompletion(file_path, file_nonce, context.getHostId(), SNAP_LOG);
if (completionTask != null) {
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(completionTask);
}
if (extraSnapshotData.getTerminus() != 0L) {
completionTask = SnapshotUtil.writeTerminusMarker(file_nonce, context.getPaths(), SNAP_LOG);
SnapshotSiteProcessor.m_tasksOnSnapshotCompletion.offer(completionTask);
}
}
}