TestLeaderAppointer.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining
 * a copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sublicense, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * The above copyright notice and this permission notice shall be
 * included in all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 */
package org.voltdb.iv2;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import static org.mockito.Mockito.*;

import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicBoolean;

import org.apache.zookeeper_voltpatches.KeeperException;
import org.apache.zookeeper_voltpatches.ZooKeeper;
import org.json_voltpatches.JSONException;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import org.voltcore.messaging.HostMessenger;
import org.voltcore.zk.LeaderElector;
import org.voltcore.zk.ZKTestBase;
import org.voltcore.zk.ZKUtil;
import org.voltdb.AbstractTopology;
import org.voltdb.ReplicationRole;
import org.voltdb.TheHashinator;
import org.voltdb.VoltDB;
import org.voltdb.VoltDBInterface;
import org.voltdb.VoltZK;

import com.google_voltpatches.common.collect.ImmutableMap;
import com.google_voltpatches.common.collect.Maps;
import com.google_voltpatches.common.collect.Sets;

public class TestLeaderAppointer extends ZKTestBase {

    private final int NUM_AGREEMENT_SITES = 1;
    private int m_kfactor;
    private AbstractTopology m_topo;
    private Set<Integer> m_hostIds;
    private Map<Integer, String> m_hostGroups;
    private MpInitiator m_mpi = null;
    private HostMessenger m_hm = null;
    private ZooKeeper m_zk = null;
    private LeaderCache m_cache = null;
    private final AtomicBoolean m_newAppointee = new AtomicBoolean(false);

    private LeaderAppointer m_dut = null;

    LeaderCache.Callback m_changeCallback = new LeaderCache.Callback()
    {
        @Override
        public void run(ImmutableMap<Integer, Long> cache) {
            m_newAppointee.set(true);
        }
    };

    @Before
    public void setUp() throws Exception
    {
        final VoltDBInterface mock = mock(VoltDBInterface.class);
        when(mock.getReplicationRole()).thenReturn(ReplicationRole.NONE);
        VoltDB.replaceVoltDBInstanceForTest(mock);
        VoltDB.ignoreCrash = false;
        VoltDB.wasCrashCalled = false;
        setUpZK(NUM_AGREEMENT_SITES);
    }

    @After
    public void tearDown() throws Exception
    {
        if (m_dut != null) {
            m_dut.shutdown();
        }
        if (m_cache != null) {
            m_cache.shutdown();
        }
        if (m_zk != null) {
            m_zk.close();
        }
        tearDownZK();
    }

    void configure(int hostCount, int sitesPerHost, int replicationFactor,
            boolean enablePPD) throws JSONException, Exception
    {
        m_hm = mock(HostMessenger.class);
        m_zk = getClient(0);
        when(m_hm.getZK()).thenReturn(m_zk);
        VoltZK.createPersistentZKNodes(m_zk);

        Map<Integer, Integer> sphMap = Maps.newHashMap();
        for (int hostId = 0; hostId < hostCount; hostId++) {
            sphMap.put(hostId, sitesPerHost);
        }
        m_hostIds = Sets.newTreeSet();
        m_hostGroups = Maps.newHashMap();
        for (int i = 0; i < hostCount; i++) {
            m_hostIds.add(i);
            m_hostGroups.put(i, "0");
        }
        m_kfactor = replicationFactor;
        m_topo = AbstractTopology.getTopology(sphMap, m_hostGroups, replicationFactor);
        int partitionCount = m_topo.getPartitionCount();
        TheHashinator.initialize(TheHashinator.getConfiguredHashinatorClass(), TheHashinator.getConfigureBytes(partitionCount));
        when(m_hm.getLiveHostIds()).thenReturn(m_hostIds);
        m_mpi = mock(MpInitiator.class);
        createAppointer(enablePPD);

        m_cache = new LeaderCache(m_zk, VoltZK.iv2appointees, m_changeCallback);
        m_cache.start(true);
    }

    void createAppointer(boolean enablePPD) throws JSONException
    {
        KSafetyStats stats = new KSafetyStats();
        m_dut = new LeaderAppointer(m_hm, m_topo.getPartitionCount(),
                m_kfactor,
                m_topo.topologyToJSON(), m_mpi, stats, false);
        m_dut.onReplayCompletion();
    }

    void addReplica(int partitionId, long HSId) throws KeeperException, InterruptedException, Exception
    {
        LeaderElector.createParticipantNode(m_zk,
                LeaderElector.electionDirForPartition(VoltZK.leaders_initiators, partitionId),
                Long.toString(HSId), null);
    }

    void deleteReplica(int partitionId, long HSId) throws KeeperException, InterruptedException
    {
        String dir = LeaderElector.electionDirForPartition(VoltZK.leaders_initiators, partitionId);
        List<String> children = m_zk.getChildren(dir, false);
        for (String child : children) {
            if (LeaderElector.getPrefixFromChildName(child).equals(Long.toString(HSId))) {
                String victim = ZKUtil.joinZKPath(dir, child);
                m_zk.delete(victim, -1);
            }
        }
    }

    void registerLeader(int partitionId, long HSId) throws KeeperException, InterruptedException
    {
        LeaderCacheWriter iv2masters = new LeaderCache(m_zk, VoltZK.iv2masters);
        iv2masters.put(partitionId, HSId);
    }

    void waitForAppointee(int partitionId) throws InterruptedException
    {
        while (m_cache.pointInTimeCache().get(partitionId) == null) {
            Thread.sleep(0);
        }
    }

    @Test
    public void testBasicStartup() throws Exception
    {
        configure(2, 2, 0, false);

        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };

        dutthread.start();

        addReplica(0, 0L);
        addReplica(1, 1L);
        addReplica(2, 2L);
        addReplica(3, 3L);

        waitForAppointee(3);
        assertEquals(0L, (long)m_cache.pointInTimeCache().get(0));
        assertEquals(1L, (long)m_cache.pointInTimeCache().get(1));
        assertEquals(2L, (long)m_cache.pointInTimeCache().get(2));
        assertEquals(3L, (long)m_cache.pointInTimeCache().get(3));

        registerLeader(0, 0L);
        registerLeader(1, 1L);
        registerLeader(2, 2L);
        registerLeader(3, 3L);

        dutthread.join();

        verify(m_mpi, times(1)).acceptPromotion();
    }

    @Test
    public void testStartupFailure() throws Exception
    {
        configure(2, 2, 1, false);
        // Write an appointee before we start to simulate a failure during startup
        m_cache.put(0, 0L);
        VoltDB.ignoreCrash = true;
        boolean threw = false;
        try {
            m_dut.acceptPromotion();
        }
        catch (ExecutionException e) {
            if (e.getCause() instanceof AssertionError) {
                threw = true;
            }
        }
        assertTrue(threw);
        assertTrue(VoltDB.wasCrashCalled);
    }

    @Test
    public void testStartupFailure2() throws Exception
    {
        configure(2, 2, 1, false);
        // Write the appointees and one master before we start to simulate a failure during startup
        m_cache.put(0, 0L);
        m_cache.put(1, 1L);
        waitForAppointee(1);
        registerLeader(0, 0L);
        VoltDB.ignoreCrash = true;
        boolean threw = false;
        try {
            m_dut.acceptPromotion();
        }
        catch (ExecutionException e) {
            if (e.getCause() instanceof AssertionError) {
                threw = true;
            }
        }
        assertTrue(threw);
        assertTrue(VoltDB.wasCrashCalled);
    }

    @Test
    public void testFailureDuringReplay() throws Exception
    {
        configure(2, 2, 1, false);
        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };
        dutthread.start();
        // Need to sleep so we don't write to ZK before the LeaderAppointer appears or we'll crash
        Thread.sleep(1000);
        addReplica(0, 0L);
        addReplica(0, 1L);
        addReplica(1, 2L);
        addReplica(1, 3L);
        waitForAppointee(1);
        registerLeader(0, m_cache.pointInTimeCache().get(0));
        registerLeader(1, m_cache.pointInTimeCache().get(1));
        dutthread.join();
        // kill the appointer and delete one of the leaders
        m_dut.shutdown();
        deleteReplica(0, m_cache.pointInTimeCache().get(0));
        // create a new appointer and start it up in the replay state
        m_dut = new LeaderAppointer(m_hm,
                                    m_topo.getPartitionCount(),
                                    m_kfactor,
                                    m_topo.topologyToJSON(),
                                    m_mpi,
                                    new KSafetyStats(),
                                    false);
        m_newAppointee.set(false);
        VoltDB.ignoreCrash = true;
        boolean threw = false;
        try {
            m_dut.acceptPromotion();
        } catch (ExecutionException e) {
            threw = true;
        }
        assertTrue(threw);
        assertTrue(VoltDB.wasCrashCalled);
    }

    @Test
    public void testWaitsForAllReplicasAndLeaders() throws Exception
    {
        configure(2, 2, 1, false);
        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };
        dutthread.start();
        // Need to sleep so we don't write to ZK before the LeaderAppointer appears or we'll crash
        Thread.sleep(1000);

        // Write the appointees and one master before we start to simulate a failure during startup
        addReplica(0, 0L);
        // Sleep just a little to give time for the above put to hopefully
        // trickle into ZK.  Not really a better condition to wait on here.
        Thread.sleep(500);
        System.out.println("pitc: " + m_cache.pointInTimeCache());
        assertTrue(m_cache.pointInTimeCache().get(0) == null);
        addReplica(0, 1L);
        waitForAppointee(0);
        addReplica(1, 2L);
        addReplica(1, 3L);
        waitForAppointee(1);
        // NOTE: these might not be the real masters, but they'll make progress for now,
        // unless we choose to add sanity checking to the LeaderAppointer
        registerLeader(0, m_cache.pointInTimeCache().get(0));
        // Bleh, still need to sleep here, since we're basically waiting on no change to state
        Thread.sleep(1000);
        verify(m_mpi, times(0)).acceptPromotion();
        registerLeader(1, m_cache.pointInTimeCache().get(1));
        dutthread.join();
        verify(m_mpi, times(1)).acceptPromotion();
    }

    @Test
    public void testPromotesOnReplicaDeathAndDiesOnKSafety() throws Exception
    {
        // run once to get to a startup state
        configure(2, 2, 1, false);
        VoltDB.ignoreCrash = true;
        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };
        dutthread.start();
        // Need to sleep so we don't write to ZK before the LeaderAppointer appears or we'll crash
        Thread.sleep(1000);
        addReplica(0, 0L);
        addReplica(0, 1L);
        addReplica(1, 2L);
        addReplica(1, 3L);
        waitForAppointee(1);
        registerLeader(0, m_cache.pointInTimeCache().get(0));
        registerLeader(1, m_cache.pointInTimeCache().get(1));
        dutthread.join();
        // Now, delete the leader of partition 0 from ZK
        m_newAppointee.set(false);
        deleteReplica(0, m_cache.pointInTimeCache().get(0));
        while (!m_newAppointee.get()) {
            Thread.sleep(0);
        }
        assertEquals(1L, (long)m_cache.pointInTimeCache().get(0));
        // now, kill the other replica and watch everything BURN
        deleteReplica(0, m_cache.pointInTimeCache().get(0));
        while (!VoltDB.wasCrashCalled) {
            Thread.sleep(0);
        }
    }

    @Test
    public void testAppointerPromotion() throws Exception
    {
        // run once to get to a startup state
        configure(2, 2, 1, false);
        VoltDB.ignoreCrash = true;
        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };
        dutthread.start();
        // Need to sleep so we don't write to ZK before the LeaderAppointer appears or we'll crash
        Thread.sleep(1000);
        addReplica(0, 0L);
        addReplica(0, 1L);
        addReplica(1, 2L);
        addReplica(1, 3L);
        waitForAppointee(1);
        registerLeader(0, m_cache.pointInTimeCache().get(0));
        registerLeader(1, m_cache.pointInTimeCache().get(1));
        dutthread.join();
        // kill the appointer and delete one of the leaders
        m_dut.shutdown();
        deleteReplica(0, m_cache.pointInTimeCache().get(0));
        // create a new appointer and start it up
        createAppointer(false);
        m_newAppointee.set(false);
        m_dut.acceptPromotion();
        while (!m_newAppointee.get()) {
            Thread.sleep(0);
        }
        assertEquals(1L, (long)m_cache.pointInTimeCache().get(0));

        // Add a new partition with two replicas, see if the newly elected leader appointer picks up the new
        // partition and elects a new leader
        addReplica(2, 4L);
        addReplica(2, 5L);
        waitForAppointee(2);
    }

    @Test
    public void testAddPartition() throws Exception
    {
        // run once to get to a startup state
        configure(2, 2, 1, false);
        VoltDB.ignoreCrash = true;
        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };
        dutthread.start();
        // Need to sleep so we don't write to ZK before the LeaderAppointer appears or we'll crash
        Thread.sleep(1000);
        addReplica(0, 0L);
        addReplica(0, 1L);
        addReplica(1, 2L);
        addReplica(1, 3L);
        waitForAppointee(1);
        registerLeader(0, m_cache.pointInTimeCache().get(0));
        registerLeader(1, m_cache.pointInTimeCache().get(1));
        dutthread.join();
        assertFalse(VoltDB.wasCrashCalled);

        // Create a partition dir
        LeaderElector.createRootIfNotExist(m_zk, LeaderElector.electionDirForPartition(VoltZK.leaders_initiators, 2));
        Thread.sleep(500); // I'm evil
        assertFalse(VoltDB.wasCrashCalled);

        // Now, add a replica for partition 2, should be promoted
        m_newAppointee.set(false);
        addReplica(2, 4L);
        while (!m_newAppointee.get()) {
            Thread.sleep(0);
        }
        assertEquals(4L, (long)m_cache.pointInTimeCache().get(2));


        // Now deleting the only replica for partition 2 shouldn't crash because it isn't on the ring
        // for elastic, but for legacy it should crash immediately
        VoltDB.wasCrashCalled = false;
        deleteReplica(2, m_cache.pointInTimeCache().get(2));

        if (TheHashinator.getConfiguredHashinatorType() == TheHashinator.HashinatorType.LEGACY) {
            while (!VoltDB.wasCrashCalled) {
                Thread.yield();
            }
            return;
        }

        //For elastic hashinator do more testing
        Thread.sleep(1000);
        assertFalse(VoltDB.wasCrashCalled);

        // Now, add a replica for partition 2, should be promoted
        m_newAppointee.set(false);
        addReplica(2, 4L);
        while (!m_newAppointee.get()) {
            Thread.sleep(0);
        }
        assertEquals(4L, (long)m_cache.pointInTimeCache().get(2));

        TheHashinator.initialize(TheHashinator.getConfiguredHashinatorClass(), TheHashinator.getConfigureBytes(4));
        //Deleting it now should cause a crash, now that the partition is on the ring
        deleteReplica(2, m_cache.pointInTimeCache().get(2));
        while (!VoltDB.wasCrashCalled) {
            Thread.yield();
        }
    }

    @Test
    public void testFailureDuringSyncSnapshot() throws Exception
    {
        final VoltDBInterface mVolt = mock(VoltDBInterface.class);
        doReturn(ReplicationRole.REPLICA).when(mVolt).getReplicationRole();
        VoltDB.replaceVoltDBInstanceForTest(mVolt);

        configure(2, 2, 1, false);
        Thread dutthread = new Thread() {
            @Override
            public void run() {
                try {
                    m_dut.acceptPromotion();
                } catch (Exception e) {
                }
            }
        };
        dutthread.start();
        // Need to sleep so we don't write to ZK before the LeaderAppointer appears or we'll crash
        Thread.sleep(1000);
        addReplica(0, 0L);
        addReplica(0, 1L);
        addReplica(1, 2L);
        addReplica(1, 3L);
        waitForAppointee(1);
        registerLeader(0, m_cache.pointInTimeCache().get(0));
        registerLeader(1, m_cache.pointInTimeCache().get(1));
        dutthread.join();
        // kill the appointer and delete one of the leaders
        m_dut.shutdown();
        deleteReplica(0, m_cache.pointInTimeCache().get(0));
        // create a new appointer and start it up with expectSyncSnapshot=true
        m_dut = new LeaderAppointer(m_hm,
                                    m_topo.getPartitionCount(),
                                    m_kfactor,
                                    m_topo.topologyToJSON(),
                                    m_mpi,
                                    new KSafetyStats(),
                                    true);
        m_dut.onReplayCompletion();
        m_newAppointee.set(false);
        VoltDB.ignoreCrash = true;
        boolean threw = false;
        try {
            m_dut.acceptPromotion();
        } catch (ExecutionException e) {
            threw = true;
        }
        assertTrue(threw);
        assertTrue(VoltDB.wasCrashCalled);

        // Promote the replica to a master before sync snapshot, failure should not crash now.
        doReturn(ReplicationRole.NONE).when(mVolt).getReplicationRole();
        VoltDB.wasCrashCalled = false;
        m_dut.acceptPromotion();
        assertFalse(VoltDB.wasCrashCalled);
    }
}