/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.iv2; import java.io.IOException; import java.lang.reflect.Method; import java.nio.ByteBuffer; import java.util.Collection; import java.util.Deque; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.ListIterator; import java.util.Map; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicInteger; import org.voltcore.logging.Level; import org.voltcore.logging.VoltLogger; import org.voltcore.messaging.TransactionInfoBaseMessage; import org.voltcore.utils.CoreUtils; import org.voltcore.utils.DBBPool; import org.voltcore.utils.EstTime; import org.voltcore.utils.Pair; import org.voltdb.BackendTarget; import org.voltdb.CatalogContext; import org.voltdb.CatalogSpecificPlanner; import org.voltdb.DRConsumerDrIdTracker; import org.voltdb.DRIdempotencyResult; import org.voltdb.DRLogSegmentId; import org.voltdb.DependencyPair; import org.voltdb.ExtensibleSnapshotDigestData; import org.voltdb.HsqlBackend; import org.voltdb.IndexStats; import org.voltdb.LoadedProcedureSet; import org.voltdb.MemoryStats; import org.voltdb.NonVoltDBBackend; import org.voltdb.ParameterSet; import org.voltdb.PartitionDRGateway; import org.voltdb.PostGISBackend; import org.voltdb.PostgreSQLBackend; import org.voltdb.ProcedureRunner; import org.voltdb.SiteProcedureConnection; import org.voltdb.SiteSnapshotConnection; import org.voltdb.SnapshotDataTarget; import org.voltdb.SnapshotFormat; import org.voltdb.SnapshotSiteProcessor; import org.voltdb.SnapshotTableTask; import org.voltdb.StartAction; import org.voltdb.StatsAgent; import org.voltdb.StatsSelector; import org.voltdb.SystemProcedureCatalog; import org.voltdb.SystemProcedureExecutionContext; import org.voltdb.TableStats; import org.voltdb.TableStreamType; import org.voltdb.TheHashinator; import org.voltdb.TheHashinator.HashinatorConfig; import org.voltdb.TupleStreamStateInfo; import org.voltdb.VoltDB; import org.voltdb.VoltProcedure.VoltAbortException; import org.voltdb.VoltTable; import org.voltdb.catalog.CatalogMap; import org.voltdb.catalog.Cluster; import org.voltdb.catalog.DRCatalogCommands; import org.voltdb.catalog.DRCatalogDiffEngine; import org.voltdb.catalog.Database; import org.voltdb.catalog.Deployment; import org.voltdb.catalog.Procedure; import org.voltdb.catalog.Table; import org.voltdb.dtxn.SiteTracker; import org.voltdb.dtxn.TransactionState; import org.voltdb.dtxn.UndoAction; import org.voltdb.exceptions.EEException; import org.voltdb.jni.ExecutionEngine; import org.voltdb.jni.ExecutionEngine.EventType; import org.voltdb.jni.ExecutionEngine.TaskType; import org.voltdb.jni.ExecutionEngineIPC; import org.voltdb.jni.ExecutionEngineJNI; import org.voltdb.jni.MockExecutionEngine; import org.voltdb.messaging.CompleteTransactionMessage; import org.voltdb.messaging.FastDeserializer; import org.voltdb.messaging.FragmentTaskMessage; import org.voltdb.messaging.Iv2InitiateTaskMessage; import org.voltdb.rejoin.TaskLog; import org.voltdb.settings.ClusterSettings; import org.voltdb.settings.NodeSettings; import org.voltdb.sysprocs.SysProcFragmentId; import org.voltdb.utils.CompressionService; import org.voltdb.utils.LogKeys; import org.voltdb.utils.MinimumRatioMaintainer; import com.google_voltpatches.common.base.Charsets; import com.google_voltpatches.common.base.Preconditions; import vanilla.java.affinity.impl.PosixJNAAffinity; public class Site implements Runnable, SiteProcedureConnection, SiteSnapshotConnection { private static final VoltLogger hostLog = new VoltLogger("HOST"); private static final VoltLogger drLog = new VoltLogger("DRAGENT"); private static final double m_taskLogReplayRatio = Double.valueOf(System.getProperty("TASKLOG_REPLAY_RATIO", "0.6")); // Set to false trigger shutdown. volatile boolean m_shouldContinue = true; // HSId of this site's initiator. final long m_siteId; final int m_snapshotPriority; // Partition count is important on SPIs, MPI doesn't use it. int m_numberOfPartitions; // What type of EE is controlled final BackendTarget m_backend; // Is the site in a rejoining mode. private final static int kStateRunning = 0; private final static int kStateRejoining = 1; private final static int kStateReplayingRejoin = 2; private int m_rejoinState; private final TaskLog m_rejoinTaskLog; private JoinProducerBase.JoinCompletionAction m_replayCompletionAction; // Enumerate execution sites by host. private static final AtomicInteger siteIndexCounter = new AtomicInteger(0); private final int m_siteIndex = siteIndexCounter.getAndIncrement(); // Manages pending tasks. final SiteTaskerQueue m_scheduler; /* * There is really no legitimate reason to touch the initiator mailbox from the site, * but it turns out to be necessary at startup when restoring a snapshot. The snapshot * has the transaction id for the partition that it must continue from and it has to be * set at all replicas of the partition. */ final InitiatorMailbox m_initiatorMailbox; // Almighty execution engine and its (HSQL or PostgreSQL) backend sidekick ExecutionEngine m_ee; NonVoltDBBackend m_non_voltdb_backend; // Stats final TableStats m_tableStats; final IndexStats m_indexStats; final MemoryStats m_memStats; // Each execution site manages snapshot using a SnapshotSiteProcessor private SnapshotSiteProcessor m_snapshotter; // Current catalog volatile CatalogContext m_context; // Currently available procedure volatile LoadedProcedureSet m_loadedProcedures; // Cache the DR gateway here so that we can pass it to tasks as they are reconstructed from // the task log private PartitionDRGateway m_drGateway; private PartitionDRGateway m_mpDrGateway; private final boolean m_hasMPDRGateway; // true if this site has the MP gateway /* * Track the last producer-cluster unique IDs and drIds associated with an * @ApplyBinaryLogSP and @ApplyBinaryLogMP invocation so it can be provided to the * ReplicaDRGateway on repair */ private Map<Integer, Map<Integer, DRConsumerDrIdTracker>> m_maxSeenDrLogsBySrcPartition = new HashMap<Integer, Map<Integer, DRConsumerDrIdTracker>>(); private long m_lastLocalSpUniqueId = -1L; // Only populated by the Site for ApplyBinaryLog Txns private long m_lastLocalMpUniqueId = -1L; // Only populated by the Site for ApplyBinaryLog Txns // Current topology int m_partitionId; private final String m_coreBindIds; // Need temporary access to some startup parameters in order to // initialize EEs in the right thread. private static class StartupConfig { final String m_serializedCatalog; final long m_timestamp; StartupConfig(final String catalog, final long timestamp) { m_serializedCatalog = catalog; m_timestamp = timestamp; } } private StartupConfig m_startupConfig = null; // Undo token state for the corresponding EE. public final static long kInvalidUndoToken = -1L; private long m_latestUndoToken = 0L; private long m_latestUndoTxnId = Long.MIN_VALUE; private long getNextUndoToken(long txnId) { if (txnId != m_latestUndoTxnId) { m_latestUndoTxnId = txnId; return ++m_latestUndoToken; } else { return m_latestUndoToken; } } /* * Increment the undo token blindly to work around * issues using a single token per transaction * See ENG-5242 */ private long getNextUndoTokenBroken() { m_latestUndoTxnId = m_currentTxnId; return ++m_latestUndoToken; } @Override public long getLatestUndoToken() { return m_latestUndoToken; } // Advanced in complete transaction. long m_lastCommittedSpHandle = 0; long m_spHandleForSnapshotDigest = 0; long m_currentTxnId = Long.MIN_VALUE; long m_lastTxnTime = System.currentTimeMillis(); /* * The version of the hashinator currently in use at the site will be consistent * across the node because balance partitions runs everywhere and all sites update. * * There is a corner case with live rejoin where sites replay their log and some sites * can pull ahead and update the global hashinator to ones further ahead causing transactions * to not be applied correctly during replay at the other sites. To avoid this each site * maintains a reference to it's own hashinator (which will be shared if possible). * * When two partition transactions come online they will diverge for pretty much the entire rebalance, * but will converge at the end when the final hash function update is issued everywhere */ TheHashinator m_hashinator; SiteProcedureConnection getSiteProcedureConnection() { return this; } /** * SystemProcedures are "friends" with ExecutionSites and granted * access to internal state via m_systemProcedureContext. */ SystemProcedureExecutionContext m_sysprocContext = new SystemProcedureExecutionContext() { @Override public ClusterSettings getClusterSettings() { return m_context.getClusterSettings(); } @Override public NodeSettings getPaths() { return m_context.getNodeSettings(); } @Override public Database getDatabase() { return m_context.database; } @Override public Cluster getCluster() { return m_context.cluster; } @Override public long getSpHandleForSnapshotDigest() { return m_spHandleForSnapshotDigest; } @Override public long getSiteId() { return m_siteId; } @Override public int getLocalSitesCount() { return m_context.getNodeSettings().getLocalSitesCount(); } /* * Expensive to compute, memoize it */ private Boolean m_isLowestSiteId = null; @Override public boolean isLowestSiteId() { if (m_isLowestSiteId != null) { return m_isLowestSiteId; } else { // FUTURE: should pass this status in at construction. long lowestSiteId = VoltDB.instance().getSiteTrackerForSnapshot().getLowestSiteForHost(getHostId()); m_isLowestSiteId = m_siteId == lowestSiteId; return m_isLowestSiteId; } } @Override public int getClusterId() { return getCorrespondingClusterId(); } @Override public int getHostId() { return CoreUtils.getHostIdFromHSId(m_siteId); } @Override public int getPartitionId() { return m_partitionId; } @Override public long getCatalogCRC() { return m_context.getCatalogCRC(); } @Override public int getCatalogVersion() { return m_context.catalogVersion; } @Override public byte[] getCatalogHash() { return m_context.getCatalogHash(); } @Override public byte[] getDeploymentHash() { return m_context.deploymentHash; } @Override public SiteTracker getSiteTrackerForSnapshot() { return VoltDB.instance().getSiteTrackerForSnapshot(); } @Override public int getNumberOfPartitions() { return m_numberOfPartitions; } @Override public void setNumberOfPartitions(int partitionCount) { Site.this.setNumberOfPartitions(partitionCount); } @Override public SiteProcedureConnection getSiteProcedureConnection() { return Site.this; } @Override public SiteSnapshotConnection getSiteSnapshotConnection() { return Site.this; } @Override public void updateBackendLogLevels() { Site.this.updateBackendLogLevels(); } @Override public boolean updateCatalog(String diffCmds, CatalogContext context, CatalogSpecificPlanner csp, boolean requiresSnapshotIsolation, long uniqueId, long spHandle, boolean requireCatalogDiffCmdsApplyToEE, boolean requiresNewExportGeneration) { return Site.this.updateCatalog(diffCmds, context, csp, requiresSnapshotIsolation, false, uniqueId, spHandle, requireCatalogDiffCmdsApplyToEE, requiresNewExportGeneration); } @Override public boolean updateSettings(CatalogContext context, CatalogSpecificPlanner csp) { return Site.this.updateSettings(context, csp); } @Override public TheHashinator getCurrentHashinator() { return m_hashinator; } @Override public void updateHashinator(TheHashinator hashinator) { Site.this.updateHashinator(hashinator); } @Override public boolean activateTableStream(final int tableId, TableStreamType type, boolean undo, byte[] predicates) { return m_ee.activateTableStream(tableId, type, undo ? getNextUndoToken(m_currentTxnId) : Long.MAX_VALUE, predicates); } @Override public Pair<Long, int[]> tableStreamSerializeMore(int tableId, TableStreamType type, List<DBBPool.BBContainer> outputBuffers) { return m_ee.tableStreamSerializeMore(tableId, type, outputBuffers); } @Override public void forceAllDRNodeBuffersToDisk(final boolean nofsync) { if (m_drGateway != null) { m_drGateway.forceAllDRNodeBuffersToDisk(nofsync); } if (m_mpDrGateway != null) { m_mpDrGateway.forceAllDRNodeBuffersToDisk(nofsync); } } /** * Check to see if binary log is expected (start DR id adjacent to last received DR id) */ @Override public DRIdempotencyResult isExpectedApplyBinaryLog(int producerClusterId, int producerPartitionId, long lastReceivedDRId) { Map<Integer, DRConsumerDrIdTracker> clusterSources = m_maxSeenDrLogsBySrcPartition.get(producerClusterId); if (clusterSources == null) { drLog.warn(String.format("P%d binary log site idempotency check failed. " + "Site doesn't have tracker for this cluster while the last received is %s", producerPartitionId, DRLogSegmentId.getDebugStringFromDRId(lastReceivedDRId))); } else { DRConsumerDrIdTracker targetTracker = clusterSources.get(producerPartitionId); if (targetTracker == null) { drLog.warn(String.format("P%d binary log site idempotency check failed. " + "Site's tracker is null while the last received is %s", producerPartitionId, DRLogSegmentId.getDebugStringFromDRId(lastReceivedDRId))); } else { assert (targetTracker.size() > 0); final long lastDrId = targetTracker.getLastDrId(); if (lastDrId == lastReceivedDRId) { // This is what we expected return DRIdempotencyResult.SUCCESS; } if (lastDrId > lastReceivedDRId) { // This is a duplicate return DRIdempotencyResult.DUPLICATE; } if (drLog.isTraceEnabled()) { drLog.trace(String.format("P%d binary log site idempotency check failed. " + "Site's tracker is %s while the last received is %s", producerPartitionId, DRLogSegmentId.getDebugStringFromDRId(lastDrId), DRLogSegmentId.getDebugStringFromDRId(lastReceivedDRId))); } } } return DRIdempotencyResult.GAP; } @Override public void appendApplyBinaryLogTxns(int producerClusterId, int producerPartitionId, long localUniqueId, DRConsumerDrIdTracker tracker) { assert(tracker.size() > 0); if (UniqueIdGenerator.getPartitionIdFromUniqueId(localUniqueId) == MpInitiator.MP_INIT_PID) { m_lastLocalMpUniqueId = localUniqueId; } else { m_lastLocalSpUniqueId = localUniqueId; } Map<Integer, DRConsumerDrIdTracker> clusterSources = m_maxSeenDrLogsBySrcPartition.get(producerClusterId); if (clusterSources == null) { clusterSources = new HashMap<Integer, DRConsumerDrIdTracker>(); clusterSources.put(producerPartitionId, tracker); m_maxSeenDrLogsBySrcPartition.put(producerClusterId, clusterSources); } else { DRConsumerDrIdTracker targetTracker = clusterSources.get(producerPartitionId); if (targetTracker == null) { clusterSources.put(producerPartitionId, tracker); } else { targetTracker.mergeTracker(tracker); } } } @Override public void recoverWithDrAppliedTrackers(Map<Integer, Map<Integer, DRConsumerDrIdTracker>> trackers) { assert(m_maxSeenDrLogsBySrcPartition.size() == 0); m_maxSeenDrLogsBySrcPartition = trackers; } @Override public void resetDrAppliedTracker() { m_maxSeenDrLogsBySrcPartition.clear(); if (drLog.isDebugEnabled()) { drLog.debug("Cleared DR Applied tracker"); } m_lastLocalSpUniqueId = -1L; m_lastLocalMpUniqueId = -1L; } @Override public void resetDrAppliedTracker(byte clusterId) { m_maxSeenDrLogsBySrcPartition.remove((int) clusterId); if (drLog.isDebugEnabled()) { drLog.debug("Reset DR Applied tracker for " + clusterId); } if (m_maxSeenDrLogsBySrcPartition.isEmpty()) { m_lastLocalSpUniqueId = -1L; m_lastLocalMpUniqueId = -1L; } } @Override public boolean hasRealDrAppliedTracker(byte clusterId) { boolean has = false; if (m_maxSeenDrLogsBySrcPartition.containsKey((int) clusterId)) { for (DRConsumerDrIdTracker tracker: m_maxSeenDrLogsBySrcPartition.get((int) clusterId).values()) { if (tracker.isRealTracker()) { has = true; break; } } } return has; } @Override public void initDRAppliedTracker(Map<Byte, Integer> clusterIdToPartitionCountMap) { for (Map.Entry<Byte, Integer> entry : clusterIdToPartitionCountMap.entrySet()) { int producerClusterId = entry.getKey(); if (m_maxSeenDrLogsBySrcPartition.containsKey(producerClusterId)) { continue; } int producerPartitionCount = entry.getValue(); assert(producerPartitionCount != -1); Map<Integer, DRConsumerDrIdTracker> clusterSources = new HashMap<>(); for (int i = 0; i < producerPartitionCount; i++) { DRConsumerDrIdTracker tracker = DRConsumerDrIdTracker.createPartitionTracker( DRLogSegmentId.makeEmptyDRId(producerClusterId), Long.MIN_VALUE, Long.MIN_VALUE, i); clusterSources.put(i, tracker); } DRConsumerDrIdTracker tracker = DRConsumerDrIdTracker.createPartitionTracker( DRLogSegmentId.makeEmptyDRId(producerClusterId), Long.MIN_VALUE, Long.MIN_VALUE, MpInitiator.MP_INIT_PID); clusterSources.put(MpInitiator.MP_INIT_PID, tracker); m_maxSeenDrLogsBySrcPartition.put(producerClusterId, clusterSources); } } @Override public Map<Integer, Map<Integer, DRConsumerDrIdTracker>> getDrAppliedTrackers() { DRConsumerDrIdTracker.debugTraceTracker(drLog, m_maxSeenDrLogsBySrcPartition); return m_maxSeenDrLogsBySrcPartition; } @Override public Pair<Long, Long> getDrLastAppliedUniqueIds() { return Pair.of(m_lastLocalSpUniqueId, m_lastLocalMpUniqueId); } @Override public Procedure ensureDefaultProcLoaded(String procName) { ProcedureRunner runner = Site.this.m_loadedProcedures.getProcByName(procName); return runner.getCatalogProcedure(); } }; /** Create a new execution site and the corresponding EE */ public Site( SiteTaskerQueue scheduler, long siteId, BackendTarget backend, CatalogContext context, String serializedCatalog, int partitionId, int numPartitions, StartAction startAction, int snapshotPriority, InitiatorMailbox initiatorMailbox, StatsAgent agent, MemoryStats memStats, String coreBindIds, TaskLog rejoinTaskLog, boolean hasMPDRGateway) { m_siteId = siteId; m_context = context; m_partitionId = partitionId; m_numberOfPartitions = numPartitions; m_scheduler = scheduler; m_backend = backend; m_rejoinState = startAction.doesJoin() ? kStateRejoining : kStateRunning; m_snapshotPriority = snapshotPriority; // need this later when running in the final thread. m_startupConfig = new StartupConfig(serializedCatalog, context.m_uniqueId); m_lastCommittedSpHandle = TxnEgo.makeZero(partitionId).getTxnId(); m_spHandleForSnapshotDigest = m_lastCommittedSpHandle; m_currentTxnId = Long.MIN_VALUE; m_initiatorMailbox = initiatorMailbox; m_coreBindIds = coreBindIds; m_rejoinTaskLog = rejoinTaskLog; m_hasMPDRGateway = hasMPDRGateway; m_hashinator = TheHashinator.getCurrentHashinator(); if (agent != null) { m_tableStats = new TableStats(m_siteId); agent.registerStatsSource(StatsSelector.TABLE, m_siteId, m_tableStats); m_indexStats = new IndexStats(m_siteId); agent.registerStatsSource(StatsSelector.INDEX, m_siteId, m_indexStats); m_memStats = memStats; } else { // MPI doesn't need to track these stats m_tableStats = null; m_indexStats = null; m_memStats = null; } } public void setDRGateway(PartitionDRGateway drGateway, PartitionDRGateway mpDrGateway) { m_drGateway = drGateway; m_mpDrGateway = mpDrGateway; if (m_hasMPDRGateway && m_mpDrGateway == null) { throw new IllegalArgumentException("This site should contain the MP DR gateway but was not given"); } else if (!m_hasMPDRGateway && m_mpDrGateway != null) { throw new IllegalArgumentException("This site should not contain the MP DR gateway but was given"); } } /** Update the loaded procedures. */ void setLoadedProcedures(LoadedProcedureSet loadedProcedure) { m_loadedProcedures = loadedProcedure; } /** Thread specific initialization */ void initialize() { if (m_backend == BackendTarget.NONE) { m_non_voltdb_backend = null; m_ee = new MockExecutionEngine(); } else if (m_backend == BackendTarget.HSQLDB_BACKEND) { m_non_voltdb_backend = HsqlBackend.initializeHSQLBackend(m_siteId, m_context); m_ee = new MockExecutionEngine(); } else if (m_backend == BackendTarget.POSTGRESQL_BACKEND) { m_non_voltdb_backend = PostgreSQLBackend.initializePostgreSQLBackend(m_context); m_ee = new MockExecutionEngine(); } else if (m_backend == BackendTarget.POSTGIS_BACKEND) { m_non_voltdb_backend = PostGISBackend.initializePostGISBackend(m_context); m_ee = new MockExecutionEngine(); } else { m_non_voltdb_backend = null; m_ee = initializeEE(); } m_snapshotter = new SnapshotSiteProcessor(m_scheduler, m_snapshotPriority, new SnapshotSiteProcessor.IdlePredicate() { @Override public boolean idle(long now) { return (now - 5) > m_lastTxnTime; } }); } /** Create a native VoltDB execution engine */ ExecutionEngine initializeEE() { String hostname = CoreUtils.getHostnameOrAddress(); HashinatorConfig hashinatorConfig = TheHashinator.getCurrentConfig(); ExecutionEngine eeTemp = null; Deployment deploy = m_context.cluster.getDeployment().get("deployment"); final int defaultDrBufferSize = Integer.getInteger("DR_DEFAULT_BUFFER_SIZE", 512 * 1024); // 512KB try { if (m_backend == BackendTarget.NATIVE_EE_JNI) { eeTemp = new ExecutionEngineJNI( m_context.cluster.getRelativeIndex(), m_siteId, m_partitionId, CoreUtils.getHostIdFromHSId(m_siteId), hostname, m_context.cluster.getDrclusterid(), defaultDrBufferSize, deploy.getSystemsettings().get("systemsettings").getTemptablemaxsize(), hashinatorConfig, m_hasMPDRGateway); } else if (m_backend == BackendTarget.NATIVE_EE_SPY_JNI){ Class<?> spyClass = Class.forName("org.mockito.Mockito"); Method spyMethod = spyClass.getDeclaredMethod("spy", Object.class); ExecutionEngine internalEE = new ExecutionEngineJNI( m_context.cluster.getRelativeIndex(), m_siteId, m_partitionId, CoreUtils.getHostIdFromHSId(m_siteId), hostname, m_context.cluster.getDrclusterid(), defaultDrBufferSize, m_context.cluster.getDeployment().get("deployment"). getSystemsettings().get("systemsettings").getTemptablemaxsize(), hashinatorConfig, m_hasMPDRGateway); eeTemp = (ExecutionEngine) spyMethod.invoke(null, internalEE); } else { // set up the EE over IPC eeTemp = new ExecutionEngineIPC( m_context.cluster.getRelativeIndex(), m_siteId, m_partitionId, CoreUtils.getHostIdFromHSId(m_siteId), hostname, m_context.cluster.getDrclusterid(), defaultDrBufferSize, deploy.getSystemsettings().get("systemsettings").getTemptablemaxsize(), m_backend, VoltDB.instance().getConfig().m_ipcPort, hashinatorConfig, m_hasMPDRGateway); } eeTemp.loadCatalog(m_startupConfig.m_timestamp, m_startupConfig.m_serializedCatalog); eeTemp.setBatchTimeout(m_context.cluster.getDeployment().get("deployment"). getSystemsettings().get("systemsettings").getQuerytimeout()); } // just print error info an bail if we run into an error here catch (final Exception ex) { hostLog.l7dlog( Level.FATAL, LogKeys.host_ExecutionSite_FailedConstruction.name(), new Object[] { m_siteId, m_siteIndex }, ex); VoltDB.crashLocalVoltDB(ex.getMessage(), true, ex); } return eeTemp; } @Override public void run() { if (m_partitionId == MpInitiator.MP_INIT_PID) { Thread.currentThread().setName("MP Site - " + CoreUtils.hsIdToString(m_siteId)); } else { Thread.currentThread().setName("SP " + m_partitionId + " Site - " + CoreUtils.hsIdToString(m_siteId)); } if (m_coreBindIds != null) { PosixJNAAffinity.INSTANCE.setAffinity(m_coreBindIds); } initialize(); m_startupConfig = null; // release the serializableCatalog. //Maintain a minimum ratio of task log (unrestricted) to live (restricted) transactions final MinimumRatioMaintainer mrm = new MinimumRatioMaintainer(m_taskLogReplayRatio); try { while (m_shouldContinue) { if (m_rejoinState == kStateRunning) { // Normal operation blocks the site thread on the sitetasker queue. SiteTasker task = m_scheduler.take(); if (task instanceof TransactionTask) { m_currentTxnId = ((TransactionTask)task).getTxnId(); m_lastTxnTime = EstTime.currentTimeMillis(); } task.run(getSiteProcedureConnection()); } else if (m_rejoinState == kStateReplayingRejoin) { // Rejoin operation poll and try to do some catchup work. Tasks // are responsible for logging any rejoin work they might have. SiteTasker task = m_scheduler.peek(); boolean didWork = false; if (task != null) { didWork = true; //If the task log is empty, free to execute the task //If the mrm says we can do a restricted task, go do it //Otherwise spin doing unrestricted tasks until we can bail out //and do the restricted task that was polled while (!m_rejoinTaskLog.isEmpty() && !mrm.canDoRestricted()) { replayFromTaskLog(mrm); } mrm.didRestricted(); // If m_rejoinState didn't change to kStateRunning because of replayFromTaskLog(), // remove the task from the scheduler and give it to task log. // Otherwise, keep the task in the scheduler and let the next loop take and handle it if (m_rejoinState != kStateRunning) { m_scheduler.poll(); task.runForRejoin(getSiteProcedureConnection(), m_rejoinTaskLog); } } else { //If there are no tasks, do task log work didWork |= replayFromTaskLog(mrm); } if (!didWork) Thread.yield(); } else { SiteTasker task = m_scheduler.take(); task.runForRejoin(getSiteProcedureConnection(), m_rejoinTaskLog); } } } catch (OutOfMemoryError e) { // Even though OOM should be caught by the Throwable section below, // it sadly needs to be handled seperately. The goal here is to make // sure VoltDB crashes. String errmsg = "Site: " + org.voltcore.utils.CoreUtils.hsIdToString(m_siteId) + " ran out of Java memory. " + "This node will shut down."; VoltDB.crashLocalVoltDB(errmsg, true, e); } catch (Throwable t) { String errmsg = "Site: " + org.voltcore.utils.CoreUtils.hsIdToString(m_siteId) + " encountered an " + "unexpected error and will die, taking this VoltDB node down."; VoltDB.crashLocalVoltDB(errmsg, true, t); } try { shutdown(); } finally { CompressionService.releaseThreadLocal(); } } ParticipantTransactionState global_replay_mpTxn = null; boolean replayFromTaskLog(MinimumRatioMaintainer mrm) throws IOException { // not yet time to catch-up. if (m_rejoinState != kStateReplayingRejoin) { return false; } TransactionInfoBaseMessage tibm = m_rejoinTaskLog.getNextMessage(); if (tibm != null) { mrm.didUnrestricted(); if (tibm instanceof Iv2InitiateTaskMessage) { Iv2InitiateTaskMessage m = (Iv2InitiateTaskMessage)tibm; SpProcedureTask t = new SpProcedureTask( m_initiatorMailbox, m.getStoredProcedureName(), null, m); if (!filter(tibm)) { m_currentTxnId = t.getTxnId(); m_lastTxnTime = EstTime.currentTimeMillis(); t.runFromTaskLog(this); } } else if (tibm instanceof FragmentTaskMessage) { FragmentTaskMessage m = (FragmentTaskMessage)tibm; if (global_replay_mpTxn == null) { global_replay_mpTxn = new ParticipantTransactionState(m.getTxnId(), m); } else if (global_replay_mpTxn.txnId != m.getTxnId()) { VoltDB.crashLocalVoltDB("Started a MP transaction during replay before completing " + " open transaction.", false, null); } TransactionTask t; if (m.isSysProcTask()) { t = new SysprocFragmentTask(m_initiatorMailbox, m, global_replay_mpTxn); } else { t = new FragmentTask(m_initiatorMailbox, m, global_replay_mpTxn); } if (!filter(tibm)) { m_currentTxnId = t.getTxnId(); m_lastTxnTime = EstTime.currentTimeMillis(); t.runFromTaskLog(this); } } else if (tibm instanceof CompleteTransactionMessage) { // Needs improvement: completes for sysprocs aren't filterable as sysprocs. // Only complete transactions that are open... if (global_replay_mpTxn != null) { CompleteTransactionMessage m = (CompleteTransactionMessage)tibm; CompleteTransactionTask t = new CompleteTransactionTask(m_initiatorMailbox, global_replay_mpTxn, null, m); if (!m.isRestart()) { global_replay_mpTxn = null; } if (!filter(tibm)) { t.runFromTaskLog(this); } } } else { VoltDB.crashLocalVoltDB("Can not replay message type " + tibm + " during live rejoin. Unexpected error.", false, null); } } // exit replay being careful not to exit in the middle of a multi-partititon // transaction. The SPScheduler doesn't have a valid transaction state for a // partially replayed MP txn and in case of rollback the scheduler's undo token // is wrong. Run MP txns fully kStateRejoining or fully kStateRunning. if (m_rejoinTaskLog.isEmpty() && global_replay_mpTxn == null) { setReplayRejoinComplete(); } return tibm != null; } static boolean filter(TransactionInfoBaseMessage tibm) { // don't log sysproc fragments or iv2 initiate task messages. // this is all jealously; should be refactored to ask tibm // if it wants to be filtered for rejoin and eliminate this // horrible introspection. This implementation mimics the // original live rejoin code for ExecutionSite... // Multi part AdHoc Does not need to be chacked because its an alias and runs procedure as planned. if (tibm instanceof FragmentTaskMessage && ((FragmentTaskMessage)tibm).isSysProcTask()) { if (!SysProcFragmentId.isDurableFragment(((FragmentTaskMessage) tibm).getPlanHash(0))) { return true; } } else if (tibm instanceof Iv2InitiateTaskMessage) { Iv2InitiateTaskMessage itm = (Iv2InitiateTaskMessage) tibm; final SystemProcedureCatalog.Config sysproc = SystemProcedureCatalog.listing.get(itm.getStoredProcedureName()); // All durable sysprocs and non-sysprocs should not get filtered. return sysproc != null && !sysproc.isDurable(); } return false; } public void startShutdown() { m_shouldContinue = false; } void shutdown() { try { if (m_non_voltdb_backend != null) { m_non_voltdb_backend.shutdownInstance(); } if (m_ee != null) { m_ee.release(); } if (m_snapshotter != null) { try { m_snapshotter.shutdown(); } catch (InterruptedException e) { hostLog.warn("Interrupted during shutdown", e); } } if (m_rejoinTaskLog != null) { try { m_rejoinTaskLog.close(); } catch (IOException e) { hostLog.error("Exception closing rejoin task log", e); } } } catch (InterruptedException e) { hostLog.warn("Interrupted shutdown execution site.", e); } } // // SiteSnapshotConnection interface // @Override public void initiateSnapshots( SnapshotFormat format, Deque<SnapshotTableTask> tasks, long txnId, ExtensibleSnapshotDigestData extraSnapshotData) { m_snapshotter.initiateSnapshots(m_sysprocContext, format, tasks, txnId, extraSnapshotData); } /* * Do snapshot work exclusively until there is no more. Also blocks * until the syncing and closing of snapshot data targets has completed. */ @Override public HashSet<Exception> completeSnapshotWork() throws InterruptedException { return m_snapshotter.completeSnapshotWork(m_sysprocContext); } // // Legacy SiteProcedureConnection needed by ProcedureRunner // @Override public long getCorrespondingSiteId() { return m_siteId; } @Override public int getCorrespondingPartitionId() { return m_partitionId; } @Override public int getCorrespondingHostId() { return CoreUtils.getHostIdFromHSId(m_siteId); } @Override public int getCorrespondingClusterId() { return m_context.cluster.getDrclusterid(); } @Override public PartitionDRGateway getDRGateway() { return m_drGateway; } @Override public byte[] loadTable(long txnId, long spHandle, long uniqueId, String clusterName, String databaseName, String tableName, VoltTable data, boolean returnUniqueViolations, boolean shouldDRStream, boolean undo) throws VoltAbortException { Cluster cluster = m_context.cluster; if (cluster == null) { throw new VoltAbortException("cluster '" + clusterName + "' does not exist"); } Database db = cluster.getDatabases().get(databaseName); if (db == null) { throw new VoltAbortException("database '" + databaseName + "' does not exist in cluster " + clusterName); } Table table = db.getTables().getIgnoreCase(tableName); if (table == null) { throw new VoltAbortException("table '" + tableName + "' does not exist in database " + clusterName + "." + databaseName); } return loadTable(txnId, spHandle, uniqueId, table.getRelativeIndex(), data, returnUniqueViolations, shouldDRStream, undo); } @Override public byte[] loadTable(long txnId, long spHandle, long uniqueId, int tableId, VoltTable data, boolean returnUniqueViolations, boolean shouldDRStream, boolean undo) { // Long.MAX_VALUE is a no-op don't track undo token return m_ee.loadTable(tableId, data, txnId, spHandle, m_lastCommittedSpHandle, uniqueId, returnUniqueViolations, shouldDRStream, undo ? getNextUndoToken(m_currentTxnId) : Long.MAX_VALUE); } @Override public void updateBackendLogLevels() { m_ee.setLogLevels(org.voltdb.jni.EELoggers.getLogLevels()); } @Override public Map<Integer, List<VoltTable>> recursableRun( TransactionState currentTxnState) { return currentTxnState.recursableRun(this); } @Override public void setSpHandleForSnapshotDigest(long spHandle) { // During rejoin, the spHandle is updated even though the site is not executing the tasks. If it's a live // rejoin, all logged tasks will be replayed. So the spHandle may go backward and forward again. It should // stop at the same point after replay. m_spHandleForSnapshotDigest = Math.max(m_spHandleForSnapshotDigest, spHandle); } /** * Java level related stuffs that are also needed to roll back * @param undoLog * @param undo */ private static void handleUndoLog(List<UndoAction> undoLog, boolean undo) { if (undoLog == null) return; for (final ListIterator<UndoAction> iterator = undoLog.listIterator(undoLog.size()); iterator.hasPrevious();) { final UndoAction action = iterator.previous(); if (undo) action.undo(); else action.release(); } } private void setLastCommittedSpHandle(long spHandle) { if (TxnEgo.getPartitionId(m_lastCommittedSpHandle) != m_partitionId) { VoltDB.crashLocalVoltDB("Mismatch SpHandle partitiond id " + TxnEgo.getPartitionId(m_lastCommittedSpHandle) + ", " + TxnEgo.getPartitionId(spHandle), true, null); } m_lastCommittedSpHandle = spHandle; setSpHandleForSnapshotDigest(m_lastCommittedSpHandle); } @Override public void truncateUndoLog(boolean rollback, long beginUndoToken, long spHandle, List<UndoAction> undoLog) { // Set the last committed txnId even if there is nothing to undo, as long as the txn is not rolling back. if (!rollback) { setLastCommittedSpHandle(spHandle); } //Any new txnid will create a new undo quantum, including the same txnid again m_latestUndoTxnId = Long.MIN_VALUE; //If the begin undo token is not set the txn never did any work so there is nothing to undo/release if (beginUndoToken == Site.kInvalidUndoToken) return; if (rollback) { m_ee.undoUndoToken(beginUndoToken); } else { assert(m_latestUndoToken != Site.kInvalidUndoToken); assert(m_latestUndoToken >= beginUndoToken); if (m_latestUndoToken > beginUndoToken) { m_ee.releaseUndoToken(m_latestUndoToken); } } // java level roll back handleUndoLog(undoLog, rollback); } @Override public void stashWorkUnitDependencies(Map<Integer, List<VoltTable>> dependencies) { m_ee.stashWorkUnitDependencies(dependencies); } @Override public DependencyPair executeSysProcPlanFragment( TransactionState txnState, Map<Integer, List<VoltTable>> dependencies, long fragmentId, ParameterSet params) { ProcedureRunner runner = m_loadedProcedures.getSysproc(fragmentId); return runner.executeSysProcPlanFragment(txnState, dependencies, fragmentId, params); } @Override public NonVoltDBBackend getNonVoltDBBackendIfExists() { return m_non_voltdb_backend; } @Override public long[] getUSOForExportTable(String signature) { return m_ee.getUSOForExportTable(signature); } @Override public TupleStreamStateInfo getDRTupleStreamStateInfo() { // Set the psetBuffer buffer capacity and clear the buffer m_ee.getParamBufferForExecuteTask(0); ByteBuffer resultBuffer = ByteBuffer.wrap(m_ee.executeTask(TaskType.GET_DR_TUPLESTREAM_STATE, ByteBuffer.allocate(0))); long partitionSequenceNumber = resultBuffer.getLong(); long partitionSpUniqueId = resultBuffer.getLong(); long partitionMpUniqueId = resultBuffer.getLong(); int drVersion = resultBuffer.getInt(); DRLogSegmentId partitionInfo = new DRLogSegmentId(partitionSequenceNumber, partitionSpUniqueId, partitionMpUniqueId); byte hasReplicatedStateInfo = resultBuffer.get(); TupleStreamStateInfo info = null; if (hasReplicatedStateInfo != 0) { long replicatedSequenceNumber = resultBuffer.getLong(); long replicatedSpUniqueId = resultBuffer.getLong(); long replicatedMpUniqueId = resultBuffer.getLong(); DRLogSegmentId replicatedInfo = new DRLogSegmentId(replicatedSequenceNumber, replicatedSpUniqueId, replicatedMpUniqueId); info = new TupleStreamStateInfo(partitionInfo, replicatedInfo, drVersion); } else { info = new TupleStreamStateInfo(partitionInfo, drVersion); } return info; } @Override public void setDRSequenceNumbers(Long partitionSequenceNumber, Long mpSequenceNumber) { if (partitionSequenceNumber == null && mpSequenceNumber == null) return; ByteBuffer paramBuffer = m_ee.getParamBufferForExecuteTask(16); paramBuffer.putLong(partitionSequenceNumber != null ? partitionSequenceNumber : Long.MIN_VALUE); paramBuffer.putLong(mpSequenceNumber != null ? mpSequenceNumber : Long.MIN_VALUE); m_ee.executeTask(TaskType.SET_DR_SEQUENCE_NUMBERS, paramBuffer); } @Override public void toggleProfiler(int toggle) { m_ee.toggleProfiler(toggle); } @Override public void tick() { long time = System.currentTimeMillis(); m_ee.tick(time, m_lastCommittedSpHandle); statsTick(time); } /** * Cache the current statistics. * * @param time */ private void statsTick(long time) { /* * grab the table statistics from ee and put it into the statistics * agent. */ if (m_tableStats != null) { CatalogMap<Table> tables = m_context.database.getTables(); int[] tableIds = new int[tables.size()]; int i = 0; for (Table table : tables) { tableIds[i++] = table.getRelativeIndex(); } // data to aggregate long tupleCount = 0; long tupleDataMem = 0; long tupleAllocatedMem = 0; long indexMem = 0; long stringMem = 0; // update table stats final VoltTable[] s1 = m_ee.getStats(StatsSelector.TABLE, tableIds, false, time); if ((s1 != null) && (s1.length > 0)) { VoltTable stats = s1[0]; assert(stats != null); // rollup the table memory stats for this site while (stats.advanceRow()) { //Assert column index matches name for ENG-4092 assert(stats.getColumnName(7).equals("TUPLE_COUNT")); assert(stats.getColumnName(6).equals("TABLE_TYPE")); if ("PersistentTable".equals(stats.getString(6))){ tupleCount += stats.getLong(7); } assert(stats.getColumnName(8).equals("TUPLE_ALLOCATED_MEMORY")); tupleAllocatedMem += stats.getLong(8); assert(stats.getColumnName(9).equals("TUPLE_DATA_MEMORY")); tupleDataMem += stats.getLong(9); assert(stats.getColumnName(10).equals("STRING_DATA_MEMORY")); stringMem += stats.getLong(10); } stats.resetRowPosition(); m_tableStats.setStatsTable(stats); } else { // the EE returned no table stats, which means there are no tables. // Need to ensure the cached stats are cleared to reflect that m_tableStats.resetStatsTable(); } // update index stats final VoltTable[] s2 = m_ee.getStats(StatsSelector.INDEX, tableIds, false, time); if ((s2 != null) && (s2.length > 0)) { VoltTable stats = s2[0]; assert(stats != null); // rollup the index memory stats for this site while (stats.advanceRow()) { //Assert column index matches name for ENG-4092 assert(stats.getColumnName(11).equals("MEMORY_ESTIMATE")); indexMem += stats.getLong(11); } stats.resetRowPosition(); m_indexStats.setStatsTable(stats); } else { // the EE returned no index stats, which means there are no indexes. // Need to ensure the cached stats are cleared to reflect that m_indexStats.resetStatsTable(); } // update the rolled up memory statistics if (m_memStats != null) { m_memStats.eeUpdateMemStats(m_siteId, tupleCount, tupleDataMem, tupleAllocatedMem, indexMem, stringMem, m_ee.getThreadLocalPoolAllocations()); } } } @Override public void quiesce() { m_ee.quiesce(m_lastCommittedSpHandle); } @Override public void exportAction(boolean syncAction, long ackOffset, Long sequenceNumber, Integer partitionId, String tableSignature) { m_ee.exportAction(syncAction, ackOffset, sequenceNumber, partitionId, tableSignature); } @Override public VoltTable[] getStats(StatsSelector selector, int[] locators, boolean interval, Long now) { return m_ee.getStats(selector, locators, interval, now); } @Override public Future<?> doSnapshotWork() { return m_snapshotter.doSnapshotWork(m_sysprocContext, false); } @Override public void startSnapshotWithTargets(Collection<SnapshotDataTarget> targets) { m_snapshotter.startSnapshotWithTargets(targets, System.currentTimeMillis()); } @Override public void setRejoinComplete( JoinProducerBase.JoinCompletionAction replayComplete, Map<String, Map<Integer, Pair<Long, Long>>> exportSequenceNumbers, Map<Integer, Long> drSequenceNumbers, Map<Integer, Map<Integer, Map<Integer, DRConsumerDrIdTracker>>> allConsumerSiteTrackers, boolean requireExistingSequenceNumbers, long clusterCreateTime) { // transition from kStateRejoining to live rejoin replay. // pass through this transition in all cases; if not doing // live rejoin, will transfer to kStateRunning as usual // as the rejoin task log will be empty. assert(m_rejoinState == kStateRejoining); if (replayComplete == null) { throw new RuntimeException("Null Replay Complete Action."); } if (clusterCreateTime != -1) { VoltDB.instance().setClusterCreateTime(clusterCreateTime); } for (Map.Entry<String, Map<Integer, Pair<Long,Long>>> tableEntry : exportSequenceNumbers.entrySet()) { final Table catalogTable = m_context.tables.get(tableEntry.getKey()); if (catalogTable == null) { VoltDB.crashLocalVoltDB( "Unable to find catalog entry for table named " + tableEntry.getKey(), true, null); } Pair<Long,Long> sequenceNumbers = tableEntry.getValue().get(m_partitionId); if (sequenceNumbers == null) { if (requireExistingSequenceNumbers) { VoltDB.crashLocalVoltDB( "Could not find export sequence numbers for partition " + m_partitionId + " table " + tableEntry.getKey() + " have " + exportSequenceNumbers, false, null); } else { sequenceNumbers = Pair.of(0L,0L); } } exportAction( true, sequenceNumbers.getFirst().longValue(), sequenceNumbers.getSecond(), m_partitionId, catalogTable.getSignature()); } if (drSequenceNumbers != null) { Long partitionDRSequenceNumber = drSequenceNumbers.get(m_partitionId); Long mpDRSequenceNumber = drSequenceNumbers.get(MpInitiator.MP_INIT_PID); setDRSequenceNumbers(partitionDRSequenceNumber, mpDRSequenceNumber); } else if (requireExistingSequenceNumbers) { VoltDB.crashLocalVoltDB("Could not find DR sequence number for partition " + m_partitionId); } if (allConsumerSiteTrackers != null) { Map<Integer, Map<Integer, DRConsumerDrIdTracker>> thisConsumerSiteTrackers = allConsumerSiteTrackers.get(m_partitionId); if (thisConsumerSiteTrackers != null) { m_maxSeenDrLogsBySrcPartition = thisConsumerSiteTrackers; } } m_rejoinState = kStateReplayingRejoin; m_replayCompletionAction = replayComplete; } private void setReplayRejoinComplete() { // transition out of rejoin replay to normal running state. assert(m_rejoinState == kStateReplayingRejoin); m_replayCompletionAction.run(); m_rejoinState = kStateRunning; } @Override public FastDeserializer executePlanFragments( int numFragmentIds, long[] planFragmentIds, long[] inputDepIds, Object[] parameterSets, DeterminismHash determinismHash, String[] sqlTexts, boolean[] isWriteFrags, int[] sqlCRCs, long txnId, long spHandle, long uniqueId, boolean readOnly, boolean traceOn) throws EEException { return m_ee.executePlanFragments( numFragmentIds, planFragmentIds, inputDepIds, parameterSets, determinismHash, sqlTexts, isWriteFrags, sqlCRCs, txnId, spHandle, m_lastCommittedSpHandle, uniqueId, readOnly ? Long.MAX_VALUE : getNextUndoTokenBroken(), traceOn); } @Override public boolean usingFallbackBuffer() { return m_ee.usingFallbackBuffer(); } @Override public ProcedureRunner getProcedureRunner(String procedureName) { return m_loadedProcedures.getProcByName(procedureName); } /** * Update the catalog. If we're the MPI, don't bother with the EE. */ public boolean updateCatalog(String diffCmds, CatalogContext context, CatalogSpecificPlanner csp, boolean requiresSnapshotIsolationboolean, boolean isMPI, long uniqueId, long spHandle, boolean requireCatalogDiffCmdsApplyToEE, boolean requiresNewExportGeneration) { m_context = context; m_ee.setBatchTimeout(m_context.cluster.getDeployment().get("deployment"). getSystemsettings().get("systemsettings").getQuerytimeout()); m_loadedProcedures.loadProcedures(m_context, csp, false); if (isMPI) { // the rest of the work applies to sites with real EEs return true; } if (requireCatalogDiffCmdsApplyToEE == false) { // empty diff cmds for the EE to apply, so skip the JNI call hostLog.info("Skipped applying diff commands on EE."); return true; } CatalogMap<Table> tables = m_context.catalog.getClusters().get("cluster").getDatabases().get("database").getTables(); boolean DRCatalogChange = false; for (Table t : tables) { if (t.getIsdred()) { DRCatalogChange |= diffCmds.contains("tables#" + t.getTypeName()); if (DRCatalogChange) { break; } } } // if a snapshot is in process, wait for it to finish // don't bother if this isn't a schema change // if (requiresSnapshotIsolationboolean && m_snapshotter.isEESnapshotting()) { hostLog.info(String.format("Site %d performing schema change operation must block until snapshot is locally complete.", CoreUtils.getSiteIdFromHSId(m_siteId))); try { m_snapshotter.completeSnapshotWork(m_sysprocContext); hostLog.info(String.format("Site %d locally finished snapshot. Will update catalog now.", CoreUtils.getSiteIdFromHSId(m_siteId))); } catch (InterruptedException e) { VoltDB.crashLocalVoltDB("Unexpected Interrupted Exception while finishing a snapshot for a catalog update.", true, e); } } //Necessary to quiesce before updating the catalog //so export data for the old generation is pushed to Java. m_ee.quiesce(m_lastCommittedSpHandle); m_ee.updateCatalog(m_context.m_uniqueId, requiresNewExportGeneration, diffCmds); if (DRCatalogChange) { final DRCatalogCommands catalogCommands = DRCatalogDiffEngine.serializeCatalogCommandsForDr(m_context.catalog, -1); generateDREvent( EventType.CATALOG_UPDATE, uniqueId, m_lastCommittedSpHandle, spHandle, catalogCommands.commands.getBytes(Charsets.UTF_8)); } return true; } /** * Update the system settings * @param context catalog context * @param csp catalog specific planner * @return true if it succeeds */ public boolean updateSettings(CatalogContext context, CatalogSpecificPlanner csp) { m_context = context; // here you could bring the timeout settings m_loadedProcedures.loadProcedures(m_context, csp); return true; } @Override public void setPerPartitionTxnIds(long[] perPartitionTxnIds, boolean skipMultiPart) { boolean foundMultipartTxnId = skipMultiPart; boolean foundSinglepartTxnId = false; for (long txnId : perPartitionTxnIds) { if (TxnEgo.getPartitionId(txnId) == m_partitionId) { if (foundSinglepartTxnId) { VoltDB.crashLocalVoltDB( "Found multiple transactions ids during restore for a partition", false, null); } foundSinglepartTxnId = true; m_initiatorMailbox.setMaxLastSeenTxnId(txnId); setSpHandleForSnapshotDigest(txnId); } if (!skipMultiPart && TxnEgo.getPartitionId(txnId) == MpInitiator.MP_INIT_PID) { if (foundMultipartTxnId) { VoltDB.crashLocalVoltDB( "Found multiple transactions ids during restore for a multipart txnid", false, null); } foundMultipartTxnId = true; m_initiatorMailbox.setMaxLastSeenMultipartTxnId(txnId); } } if (!foundMultipartTxnId) { VoltDB.crashLocalVoltDB("Didn't find a multipart txnid on restore", false, null); } } public void setNumberOfPartitions(int partitionCount) { m_numberOfPartitions = partitionCount; } @Override public TheHashinator getCurrentHashinator() { return m_hashinator; } @Override public void updateHashinator(TheHashinator hashinator) { Preconditions.checkNotNull(hashinator); m_hashinator = hashinator; m_ee.updateHashinator(hashinator.pGetCurrentConfig()); } /** * For the specified list of table ids, return the number of mispartitioned rows using * the provided hashinator and hashinator config */ @Override public long[] validatePartitioning(long[] tableIds, int hashinatorType, byte[] hashinatorConfig) { ByteBuffer paramBuffer = m_ee.getParamBufferForExecuteTask(4 + (8 * tableIds.length) + 4 + 4 + hashinatorConfig.length); paramBuffer.putInt(tableIds.length); for (long tableId : tableIds) { paramBuffer.putLong(tableId); } paramBuffer.putInt(hashinatorType); paramBuffer.put(hashinatorConfig); ByteBuffer resultBuffer = ByteBuffer.wrap(m_ee.executeTask( TaskType.VALIDATE_PARTITIONING, paramBuffer)); long mispartitionedRows[] = new long[tableIds.length]; for (int ii = 0; ii < tableIds.length; ii++) { mispartitionedRows[ii] = resultBuffer.getLong(); } return mispartitionedRows; } @Override public void setBatch(int batchIndex) { m_ee.setBatch(batchIndex); } @Override public void setProcedureName(String procedureName) { m_ee.setProcedureName(procedureName); } @Override public void notifyOfSnapshotNonce(String nonce, long snapshotSpHandle) { m_initiatorMailbox.notifyOfSnapshotNonce(nonce, snapshotSpHandle); } @Override public long applyBinaryLog(long txnId, long spHandle, long uniqueId, int remoteClusterId, byte log[]) throws EEException { ByteBuffer paramBuffer = m_ee.getParamBufferForExecuteTask(4 + log.length); paramBuffer.putInt(log.length); paramBuffer.put(log); return m_ee.applyBinaryLog(paramBuffer, txnId, spHandle, m_lastCommittedSpHandle, uniqueId, remoteClusterId, getNextUndoToken(m_currentTxnId)); } @Override public void setBatchTimeout(int batchTimeout) { m_ee.setBatchTimeout(batchTimeout); } @Override public int getBatchTimeout() { return m_ee.getBatchTimeout(); } @Override public void setDRProtocolVersion(int drVersion) { ByteBuffer paramBuffer = m_ee.getParamBufferForExecuteTask(4); paramBuffer.putInt(drVersion); m_ee.executeTask(TaskType.SET_DR_PROTOCOL_VERSION, paramBuffer); hostLog.info("DR protocol version has been set to " + drVersion); } @Override public void setDRProtocolVersion(int drVersion, long spHandle, long uniqueId) { setDRProtocolVersion(drVersion); generateDREvent( EventType.DR_STREAM_START, uniqueId, m_lastCommittedSpHandle, spHandle, new byte[0]); } /** * Generate a in-stream DR event which pushes an event buffer to topend */ public void generateDREvent(EventType type, long uniqueId, long lastCommittedSpHandle, long spHandle, byte[] payloads) { m_ee.quiesce(lastCommittedSpHandle); ByteBuffer paramBuffer = m_ee.getParamBufferForExecuteTask(32 + payloads.length); paramBuffer.putInt(type.ordinal()); paramBuffer.putLong(uniqueId); paramBuffer.putLong(lastCommittedSpHandle); paramBuffer.putLong(spHandle); paramBuffer.putInt(payloads.length); paramBuffer.put(payloads); m_ee.executeTask(TaskType.GENERATE_DR_EVENT, paramBuffer); } @Override public SystemProcedureExecutionContext getSystemProcedureExecutionContext() { return m_sysprocContext; } public ExecutionEngine getExecutionEngine() { return m_ee; } }