MpTransactionTaskQueue.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.iv2;

import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;

import org.voltcore.logging.VoltLogger;
import org.voltdb.CatalogContext;
import org.voltdb.CatalogSpecificPlanner;
import org.voltdb.exceptions.TransactionRestartException;
import org.voltdb.messaging.FragmentResponseMessage;
import org.voltdb.messaging.FragmentTaskMessage;

/**
 * Provide an implementation of the TransactionTaskQueue specifically for the MPI.
 * This class will manage separating the stream of reads and writes to different
 * Sites and block appropriately so that reads and writes never execute concurrently.
 */
public class MpTransactionTaskQueue extends TransactionTaskQueue
{
    protected static final VoltLogger tmLog = new VoltLogger("TM");

    // Track the current writes and reads in progress.  If writes contains anything, reads must be empty,
    // and vice versa
    private final Map<Long, TransactionTask> m_currentWrites = new HashMap<Long, TransactionTask>();
    private final Map<Long, TransactionTask> m_currentReads = new HashMap<Long, TransactionTask>();
    private Deque<TransactionTask> m_backlog = new ArrayDeque<TransactionTask>();

    private MpRoSitePool m_sitePool = null;

    MpTransactionTaskQueue(SiteTaskerQueue queue)
    {
        super(queue);
    }

    void setMpRoSitePool(MpRoSitePool sitePool)
    {
        m_sitePool = sitePool;
    }

    synchronized void updateCatalog(String diffCmds, CatalogContext context, CatalogSpecificPlanner csp)
    {
        m_sitePool.updateCatalog(diffCmds, context, csp);
    }

    synchronized void updateSettings(CatalogContext context, CatalogSpecificPlanner csp)
    {
        m_sitePool.updateSettings(context, csp);
    }

    void shutdown()
    {
        if (m_sitePool != null) {
            m_sitePool.shutdown();
        }
    }

    /**
     * Stick this task in the backlog.
     * Many network threads may be racing to reach here, synchronize to
     * serialize queue order.
     * Always returns true in this case, side effect of extending
     * TransactionTaskQueue.
     */
    @Override
    synchronized boolean offer(TransactionTask task)
    {
        Iv2Trace.logTransactionTaskQueueOffer(task);
        m_backlog.addLast(task);
        taskQueueOffer();
        return true;
    }

    // repair is used by MPI repair to inject a repair task into the
    // SiteTaskerQueue.  Before it does this, it unblocks the MP transaction
    // that may be running in the Site thread and causes it to rollback by
    // faking an unsuccessful FragmentResponseMessage.
    synchronized void repair(SiteTasker task, List<Long> masters, Map<Integer, Long> partitionMasters)
    {
        // We know that every Site assigned to the MPI (either the main writer or
        // any of the MP read pool) will only have one active transaction at a time,
        // and that we either have active reads or active writes, but never both.
        // Figure out which we're doing, and then poison all of the appropriate sites.
        Map<Long, TransactionTask> currentSet;
        if (!m_currentReads.isEmpty()) {
            assert(m_currentWrites.isEmpty());
            tmLog.debug("MpTTQ: repairing reads");
            for (Long txnId : m_currentReads.keySet()) {
                m_sitePool.repair(txnId, task);
            }
            currentSet = m_currentReads;
        }
        else {
            tmLog.debug("MpTTQ: repairing writes");
            m_taskQueue.offer(task);
            currentSet = m_currentWrites;
        }
        for (Entry<Long, TransactionTask> e : currentSet.entrySet()) {
            if (e.getValue() instanceof MpProcedureTask) {
                MpProcedureTask next = (MpProcedureTask)e.getValue();
                tmLog.debug("MpTTQ: poisoning task: " + next);
                next.doRestart(masters, partitionMasters);
                MpTransactionState txn = (MpTransactionState)next.getTransactionState();
                // inject poison pill
                FragmentTaskMessage dummy = new FragmentTaskMessage(0L, 0L, 0L, 0L, false, false, false);
                FragmentResponseMessage poison =
                    new FragmentResponseMessage(dummy, 0L); // Don't care about source HSID here
                // Provide a TransactionRestartException which will be converted
                // into a ClientResponse.RESTART, so that the MpProcedureTask can
                // detect the restart and take the appropriate actions.
                TransactionRestartException restart = new TransactionRestartException(
                        "Transaction being restarted due to fault recovery or shutdown.", next.getTxnId());
                poison.setStatus(FragmentResponseMessage.UNEXPECTED_ERROR, restart);
                txn.offerReceivedFragmentResponse(poison);
            }
            else {
                // Don't think that EveryPartitionTasks need to do anything here, since they
                // don't actually run java, they just exist for sequencing.  Any cleanup should be
                // to the duplicate counter in MpScheduler for this transaction.
            }
        }
        // Now, iterate through the backlog and update the partition masters
        // for all ProcedureTasks
        Iterator<TransactionTask> iter = m_backlog.iterator();
        while (iter.hasNext()) {
            TransactionTask tt = iter.next();
            if (tt instanceof MpProcedureTask) {
                MpProcedureTask next = (MpProcedureTask)tt;
                tmLog.debug("Repair updating task: " + next + " with masters: " + masters);
                next.updateMasters(masters, partitionMasters);
            }
            else if (tt instanceof EveryPartitionTask) {
                EveryPartitionTask next = (EveryPartitionTask)tt;
                tmLog.debug("Repair updating EPT task: " + next + " with masters: " + masters);
                next.updateMasters(masters);
            }
        }
    }

    private void taskQueueOffer(TransactionTask task)
    {
        Iv2Trace.logSiteTaskerQueueOffer(task);
        if (task.getTransactionState().isReadOnly()) {
            m_sitePool.doWork(task.getTxnId(), task);
        }
        else {
            m_taskQueue.offer(task);
        }
    }

    private boolean taskQueueOffer()
    {
        // Do we have something to do?
        // - If so, is it a write?
        //   - If so, are there reads or writes outstanding?
        //     - if not, pull it from the backlog, add it to current write set, and queue it
        //     - if so, bail for now
        //   - If not, are there writes outstanding?
        //     - if not, while there are reads on the backlog and the pool has capacity:
        //       - pull the read from the backlog, add it to the current read set, and queue it.
        //       - bail when done
        //     - if so, bail for now

        boolean retval = false;
        if (!m_backlog.isEmpty()) {
            // We may not queue the next task, just peek to get the read-only state
            TransactionTask task = m_backlog.peekFirst();
            if (!task.getTransactionState().isReadOnly()) {
                if (m_currentReads.isEmpty() && m_currentWrites.isEmpty()) {
                    task = m_backlog.pollFirst();
                    m_currentWrites.put(task.getTxnId(), task);
                    taskQueueOffer(task);
                    retval = true;
                }
            }
            else if (m_currentWrites.isEmpty()) {
                while (task != null && task.getTransactionState().isReadOnly() &&
                       m_sitePool.canAcceptWork())
                {
                    task = m_backlog.pollFirst();
                    assert(task.getTransactionState().isReadOnly());
                    m_currentReads.put(task.getTxnId(), task);
                    taskQueueOffer(task);
                    retval = true;
                    // Prime the pump with the head task, if any.  If empty,
                    // task will be null
                    task = m_backlog.peekFirst();
                }
            }
        }
        return retval;
    }

    /**
     * Indicate that the transaction associated with txnId is complete.  Perform
     * management of reads/writes in progress then call taskQueueOffer() to
     * submit additional tasks to be done, determined by whatever the current state is.
     * See giant comment at top of taskQueueOffer() for what happens.
     */
    @Override
    synchronized int flush(long txnId)
    {
        int offered = 0;
        if (m_currentReads.containsKey(txnId)) {
            m_currentReads.remove(txnId);
            m_sitePool.completeWork(txnId);
        }
        else {
            assert(m_currentWrites.containsKey(txnId));
            m_currentWrites.remove(txnId);
            assert(m_currentWrites.isEmpty());
        }
        if (taskQueueOffer()) {
            ++offered;
        }
        return offered;
    }

    /**
     * Restart the current task at the head of the queue.  This will be called
     * instead of flush by the currently blocking MP transaction in the event a
     * restart is necessary.
     */
    @Override
    synchronized void restart()
    {
        if (!m_currentReads.isEmpty()) {
            // re-submit all the tasks in the current read set to the pool.
            // the pool will ensure that things submitted with the same
            // txnID will go to the the MpRoSite which is currently running it
            for (TransactionTask task : m_currentReads.values()) {
                taskQueueOffer(task);
            }
        }
        else {
            assert(!m_currentWrites.isEmpty());
            TransactionTask task;
            // There currently should only ever be one current write.  This
            // is the awkward way to get a single value out of a Map
            task = m_currentWrites.entrySet().iterator().next().getValue();
            taskQueueOffer(task);
        }
    }

    /**
     * How many Tasks are un-runnable?
     * @return
     */
    @Override
    synchronized int size()
    {
        return m_backlog.size();
    }

    @Override
    public String toString()
    {
        StringBuilder sb = new StringBuilder();
        sb.append("MpTransactionTaskQueue:").append("\n");
        sb.append("\tSIZE: ").append(m_backlog.size()).append("\n");
        if (!m_backlog.isEmpty()) {
            sb.append("\tHEAD: ").append(m_backlog.getFirst()).append("\n");
        }
        return sb.toString();
    }
}