/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.iv2; import java.io.IOException; import java.util.List; import java.util.Map; import org.voltcore.logging.VoltLogger; import org.voltcore.messaging.Mailbox; import org.voltcore.messaging.TransactionInfoBaseMessage; import org.voltcore.messaging.VoltMessage; import org.voltdb.LoadedProcedureSet; import org.voltdb.SiteProcedureConnection; import org.voltdb.StarvationTracker; import org.voltdb.VoltDB; import org.voltdb.iv2.SpScheduler.DurableUniqueIdListener; import org.voltdb.messaging.MultiPartitionParticipantMessage; import org.voltdb.rejoin.TaskLog; /** * Scheduler's rough current responsibility is to take appropriate local action * based on a received message. * * For new work (InitiateTask, FragmentTask, CompleteTransactionTask): * - Create new TransactionStates for previously unseen transactions * - Look up TransactionStates for in-progress multi-part transactions * - Create appropriate TransactionTasks and offer them to the Site (via * TransactionTaskQueue) * For responses (InitiateResponse, FragmentResponse): * - Perform response de-duping * - Offer responses to the corresponding TransactionState for MP dependency tracking * * Currently, Single- and Multi-partition schedulers extend this class and * provide the specific message handling necessary for the different * transaction types. * IZZY: This class maybe folds into InitiatorMessageHandler nicely; let's see * how it looks once partition replicas are implemented. */ abstract public class Scheduler implements InitiatorMessageHandler { protected static final VoltLogger hostLog = new VoltLogger("HOST"); // A null task that unblocks the site task queue, used during shutdown static final SiteTasker m_nullTask = new SiteTasker() { @Override public void run(SiteProcedureConnection siteConnection) { } @Override public void runForRejoin(SiteProcedureConnection siteConnection, TaskLog taskLog) throws IOException { } }; // The queue which the Site's runloop is going to poll for new work. This // is fronted here by the TransactionTaskQueue and should not be directly // offered work. // IZZY: We should refactor this to be inviolable in the future. final protected SiteTaskerQueue m_tasks; protected Mailbox m_mailbox; protected boolean m_isLeader = false; private TxnEgo m_txnEgo; final protected int m_partitionId; protected LoadedProcedureSet m_procSet; // helper class to put command log work in order protected final ReplaySequencer m_replaySequencer = new ReplaySequencer(); /* * This lock is extremely dangerous to use without known the pattern. * It is the intrinsic lock on the InitiatorMailbox. For an SpInitiator * this is a real thing, but for the MpInitiator the intrinsic lock isn't used * because it uses MpInitiatorMailbox (as subclass of InitiatorMailbox) * which uses a dedicated thread instead of locking. * * In the MpInitiator case locking on this will not provide any isolation because * the InitiatorMailbox thread doesn't use the lock. * * Right now this lock happens to only be used to gain isolation for * command logging while submitting durable tasks. Only SpInitiators log * so this is fine. * * Think twice and ask around before using it for anything else. * You should probably be going through InitiatorMailbox.deliver which automatically * handles the transition between locking vs. submitting to the MpInitiatorMailbox task queue. */ protected Object m_lock; Scheduler(int partitionId, SiteTaskerQueue taskQueue) { m_tasks = taskQueue; m_partitionId = partitionId; m_txnEgo = TxnEgo.makeZero(partitionId); } public void setMaxSeenTxnId(long maxSeenTxnId) { final TxnEgo ego = new TxnEgo(maxSeenTxnId); if (m_txnEgo.getPartitionId() != ego.getPartitionId()) { VoltDB.crashLocalVoltDB( "Received a transaction id at partition " + m_txnEgo.getPartitionId() + " for partition " + ego.getPartitionId() + ". The partition ids should match.", true, null); } if (m_txnEgo.getTxnId() < ego.getTxnId()) { m_txnEgo = ego; } } final protected TxnEgo advanceTxnEgo() { m_txnEgo = m_txnEgo.makeNext(); return m_txnEgo; } final protected long getCurrentTxnId() { return m_txnEgo.getTxnId(); } @Override public void setMailbox(Mailbox mailbox) { m_mailbox = mailbox; } public void setLeaderState(boolean isLeader) { m_isLeader = isLeader; } public SiteTaskerQueue getQueue() { return m_tasks; } public void setStarvationTracker(StarvationTracker tracker) { m_tasks.setStarvationTracker(tracker); } public void setLock(Object o) { m_lock = o; } public void setDurableUniqueIdListener(DurableUniqueIdListener listener) { // Durability Listeners should never be assigned to the MP Scheduler assert false; } public void setProcedureSet(LoadedProcedureSet procSet) { m_procSet = procSet; } /** * Update last seen uniqueIds in the replay sequencer. This is used on MPI repair. * @param message */ public void updateLastSeenUniqueIds(VoltMessage message) { long sequenceWithUniqueId = Long.MIN_VALUE; boolean commandLog = (message instanceof TransactionInfoBaseMessage && (((TransactionInfoBaseMessage)message).isForReplay())); boolean sentinel = message instanceof MultiPartitionParticipantMessage; // if replay if (commandLog || sentinel) { sequenceWithUniqueId = ((TransactionInfoBaseMessage)message).getUniqueId(); // Update last seen and last polled txnId for replicas m_replaySequencer.updateLastSeenUniqueId(sequenceWithUniqueId, (TransactionInfoBaseMessage) message); m_replaySequencer.updateLastPolledUniqueId(sequenceWithUniqueId, (TransactionInfoBaseMessage) message); } } // Dumps the content of the scheduler for debugging public void dump() {} abstract public void shutdown(); @Override abstract public void updateReplicas(List<Long> replicas, Map<Integer, Long> partitionMasters); @Override abstract public void deliver(VoltMessage message); abstract public void enableWritingIv2FaultLog(); abstract public boolean sequenceForReplay(VoltMessage m); }