/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * Permission is hereby granted, free of charge, to any person obtaining * a copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including * without limitation the rights to use, copy, modify, merge, publish, * distribute, sublicense, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: * * The above copyright notice and this permission notice shall be * included in all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. */ package org.voltdb.iv2; import static junit.framework.Assert.assertEquals; import static junit.framework.Assert.assertFalse; import static junit.framework.Assert.assertTrue; import static junit.framework.Assert.fail; import static org.mockito.Matchers.any; import static org.mockito.Mockito.inOrder; import static org.mockito.Mockito.mock; import static org.mockito.Mockito.times; import static org.mockito.Mockito.verify; import static org.mockito.Mockito.when; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; import java.util.Random; import java.util.concurrent.CancellationException; import java.util.concurrent.Future; import java.util.concurrent.atomic.AtomicBoolean; import org.junit.BeforeClass; import org.junit.Test; import org.mockito.InOrder; import org.voltcore.messaging.TransactionInfoBaseMessage; import org.voltcore.messaging.VoltMessage; import org.voltcore.utils.Pair; import org.voltdb.TheHashinator; import org.voltdb.TheHashinator.HashinatorType; import org.voltdb.iv2.RepairAlgo.RepairResult; import org.voltdb.messaging.CompleteTransactionMessage; import org.voltdb.messaging.Iv2InitiateTaskMessage; import org.voltdb.messaging.Iv2RepairLogResponseMessage; public class TestSpPromoteAlgo { Iv2RepairLogResponseMessage makeResponse(long spHandle) { Iv2RepairLogResponseMessage m = mock(Iv2RepairLogResponseMessage.class); when(m.getHandle()).thenReturn(spHandle); Iv2InitiateTaskMessage im = mock(Iv2InitiateTaskMessage.class); when(m.getPayload()).thenReturn(im); return m; } Iv2RepairLogResponseMessage makeStaleResponse(long spHandle, long requestId) { Iv2RepairLogResponseMessage m = makeResponse(spHandle); when(m.getRequestId()).thenReturn(requestId); Iv2InitiateTaskMessage im = mock(Iv2InitiateTaskMessage.class); when(m.getPayload()).thenReturn(im); return m; } @BeforeClass static public void initializeHashinator() { TheHashinator.setConfiguredHashinatorType(HashinatorType.ELASTIC); TheHashinator.initialize(TheHashinator.getConfiguredHashinatorClass(), TheHashinator.getConfigureBytes(8)); } // verify that responses are correctly unioned and ordered. @Test public void testUnion() throws Exception { SpPromoteAlgo term = new SpPromoteAlgo(null, null, "Test", 1); // returned sphandles in a non-trivial order, with duplicates. long returnedSpHandles[] = new long[]{1L, 5L, 2L, 5L, 6L, 3L, 5L, 1L}; long expectedUnion[] = new long[]{1L, 2L, 3L, 5L, 6L}; for (long sp : returnedSpHandles) { term.m_repairLogUnion.add(makeResponse(sp)); } assertEquals(expectedUnion.length, term.m_repairLogUnion.size()); int i = 0; for (Iv2RepairLogResponseMessage li : term.m_repairLogUnion) { assertEquals(li.getHandle(), expectedUnion[i++]); } } // verify that bad request ids are not submitted to the log. @Test public void testStaleResponse() throws Exception { SpPromoteAlgo term = new SpPromoteAlgo(null, null, "Test", 1); term.deliver(makeStaleResponse(1L, term.getRequestId() + 1)); assertEquals(0L, term.m_repairLogUnion.size()); } // verify that the all-done logic works on replica repair structs @Test public void testRepairLogsAreComplete() { SpPromoteAlgo term = new SpPromoteAlgo(null, null, "Test", 1); SpPromoteAlgo.ReplicaRepairStruct notDone1 = new SpPromoteAlgo.ReplicaRepairStruct(); notDone1.m_receivedResponses = 1; notDone1.m_expectedResponses = 2; assertFalse(notDone1.logsComplete()); SpPromoteAlgo.ReplicaRepairStruct notDone2 = new SpPromoteAlgo.ReplicaRepairStruct(); notDone2.m_receivedResponses = 0; notDone2.m_expectedResponses = 10; SpPromoteAlgo.ReplicaRepairStruct done1 = new SpPromoteAlgo.ReplicaRepairStruct(); done1.m_receivedResponses = 5; done1.m_expectedResponses = 5; assertTrue(done1.logsComplete()); SpPromoteAlgo.ReplicaRepairStruct done2 = new SpPromoteAlgo.ReplicaRepairStruct(); done2.m_receivedResponses = 10; done2.m_expectedResponses = 10; term.m_replicaRepairStructs.put(0L, notDone1); term.m_replicaRepairStructs.put(1L, done1); term.m_replicaRepairStructs.put(2L, notDone2); term.m_replicaRepairStructs.put(3L, done2); assertFalse(term.areRepairLogsComplete()); term.m_replicaRepairStructs.clear(); term.m_replicaRepairStructs.put(2L, notDone2); assertFalse(term.areRepairLogsComplete()); term.m_replicaRepairStructs.clear(); term.m_replicaRepairStructs.put(1L, done1); term.m_replicaRepairStructs.put(3L, done2); assertTrue(term.areRepairLogsComplete()); term.m_replicaRepairStructs.clear(); term.m_replicaRepairStructs.put(3L, done2); assertTrue(term.areRepairLogsComplete()); } // verify that Term asks initMailbox to send the expected repair messages. @SuppressWarnings("unchecked") @Test public void testRepairSurvivors() { InitiatorMailbox mailbox = mock(InitiatorMailbox.class); SpPromoteAlgo term = new SpPromoteAlgo(null, mailbox, "Test", 1); // missing 4, 5 SpPromoteAlgo.ReplicaRepairStruct r1 = new SpPromoteAlgo.ReplicaRepairStruct(); r1.m_maxSpHandleSeen = 3L; // complete SpPromoteAlgo.ReplicaRepairStruct r2 = new SpPromoteAlgo.ReplicaRepairStruct(); r2.m_maxSpHandleSeen = 5L; // missing 3, 4, 5 SpPromoteAlgo.ReplicaRepairStruct r3 = new SpPromoteAlgo.ReplicaRepairStruct(); r3.m_maxSpHandleSeen = 2L; term.m_replicaRepairStructs.put(1L, r1); term.m_replicaRepairStructs.put(2L, r2); term.m_replicaRepairStructs.put(3L, r3); // build the log long spHandles[] = new long[]{0L, 1L, 2L, 3L, 4L, 5L}; Iv2RepairLogResponseMessage msgs[] = new Iv2RepairLogResponseMessage[6]; for (int i=1; i < spHandles.length; ++i) { msgs[i] = makeResponse(spHandles[i]); term.m_repairLogUnion.add(msgs[i]); } // call the function being tested... term.repairSurvivors(); List<Long> repair3 = new ArrayList<Long>(); repair3.add(3L); verify(mailbox).repairReplicasWith(repair3, msgs[3].getPayload()); List<Long> repair4And5 = new ArrayList<Long>(); repair4And5.add(1L); repair4And5.add(3L); verify(mailbox).repairReplicasWith(repair4And5, msgs[4].getPayload()); verify(mailbox).repairReplicasWith(repair4And5, msgs[5].getPayload()); // verify exactly 3 repairs happened. verify(mailbox, times(3)).repairReplicasWith(any(repair3.getClass()), any(VoltMessage.class)); } // be a little a paranoid about order. note that the union test also verifies // order; unsure this is interesting... This test is on one replica because // I'm not sure that we guarentee the orer of iteration across replicas. @SuppressWarnings("unchecked") @Test public void testRepairSurvivorOrder() { InitiatorMailbox mailbox = mock(InitiatorMailbox.class); InOrder inOrder = inOrder(mailbox); SpPromoteAlgo term = new SpPromoteAlgo(null, mailbox, "Test", 1); // missing 3, 4, 5 SpPromoteAlgo.ReplicaRepairStruct r3 = new SpPromoteAlgo.ReplicaRepairStruct(); r3.m_maxSpHandleSeen = 2L; term.m_replicaRepairStructs.put(3L, r3); // build the log long spHandles[] = new long[]{0L, 1L, 2L, 3L, 4L, 5L}; Iv2RepairLogResponseMessage msgs[] = new Iv2RepairLogResponseMessage[6]; for (int i=1; i < spHandles.length; ++i) { msgs[i] = makeResponse(spHandles[i]); term.m_repairLogUnion.add(msgs[i]); } // call the function being tested... term.repairSurvivors(); List<Long> repair3 = new ArrayList<Long>(); repair3.add(3L); // verify that r3 saw 3, 4, 5 inOrder.verify(mailbox).repairReplicasWith(repair3, msgs[3].getPayload()); inOrder.verify(mailbox).repairReplicasWith(repair3, msgs[4].getPayload()); inOrder.verify(mailbox).repairReplicasWith(repair3, msgs[5].getPayload()); // verify exactly 3 repairs happened. verify(mailbox, times(3)).repairReplicasWith(any(repair3.getClass()), any(Iv2RepairLogResponseMessage.class)); } // Verify that a babysitter update causes the term to be cancelled. @Test public void testMidPromotionReplicaUpdate() throws Exception { final AtomicBoolean promotionResult = new AtomicBoolean(true); final InitiatorMailbox mailbox = mock(InitiatorMailbox.class); // Stub some portions of a concrete Term instance - this is the // object being tested. final SpPromoteAlgo term = new SpPromoteAlgo(null, mailbox, "Test", 1) { // there aren't replicas to ask for repair logs @Override void prepareForFaultRecovery() { } }; Thread promotionThread = new Thread() { @Override public void run() { try { boolean success = false; try { term.start().get(); success = true; } catch (CancellationException e) {} promotionResult.set(success); } catch (Exception e) { System.out.println("Promotion thread threw: " + e); throw new RuntimeException(e); } } }; promotionThread.start(); // cancel the term as if updateReplica() triggered. term.cancel(); promotionThread.join(); // promotion success must be false after cancel. assertFalse(promotionResult.get()); } @Test public void testFuzz() throws Exception { InitiatorMailbox mbox = mock(InitiatorMailbox.class); Map<Long, List<TransactionInfoBaseMessage>> finalStreams = new HashMap<Long, List<TransactionInfoBaseMessage>>(); Random rand = new Random(System.currentTimeMillis()); // Generate a random message stream to several "replicas", interrupted // at random points to all but one. Validate that promotion repair // results in identical, correct, repair streams to all replicas. TxnEgo sphandle = TxnEgo.makeZero(0); UniqueIdGenerator spbuig = new UniqueIdGenerator(0, 0); UniqueIdGenerator mpbuig = new UniqueIdGenerator(0, 0); sphandle = sphandle.makeNext(); RandomMsgGenerator msgGen = new RandomMsgGenerator(); boolean[] stops = new boolean[3]; RepairLog[] logs = new RepairLog[3]; for (int i = 0; i < 3; i++) { logs[i] = new RepairLog(); stops[i] = false; finalStreams.put((long)i, new ArrayList<TransactionInfoBaseMessage>()); } long maxBinaryLogSpUniqueId = Long.MIN_VALUE; for (int i = 0; i < 4000; i++) { // get next message, update the sphandle according to SpScheduler rules, // but only submit messages that would have been forwarded by the master // to the repair log. TransactionInfoBaseMessage msg = msgGen.generateRandomMessageInStream(); msg.setSpHandle(sphandle.getTxnId()); if (msg instanceof Iv2InitiateTaskMessage) { Pair<Long, Long> uids = TestRepairLog.setBinaryLogUniqueId(msg, spbuig, mpbuig); maxBinaryLogSpUniqueId = Math.max(maxBinaryLogSpUniqueId, uids.getFirst()); } sphandle = sphandle.makeNext(); if (!msg.isReadOnly() || msg instanceof CompleteTransactionMessage) { if (!stops[0]) { logs[0].deliver(msg); } if (!stops[1]) { logs[1].deliver(msg); } logs[2].deliver(msg); // Putting this inside this loop // guarantees at least one message in everyone's repair log, // which avoids having to check for the special case where a node // has an empty repair log on account of rejoin and shouldn't // be fed any transactions for (int j = 0; j < 2; j++) { // Hacky way to get spaced failures if (rand.nextDouble() < (.01 / ((j + 1) * 5))) { stops[j] = true; } } } } List<Long> survivors = new ArrayList<Long>(); survivors.add(0l); survivors.add(1l); survivors.add(2l); SpPromoteAlgo dut = new SpPromoteAlgo(survivors, mbox, "bleh ", 0); Future<RepairResult> result = dut.start(); for (int i = 0; i < 3; i++) { List<Iv2RepairLogResponseMessage> stuff = logs[i].contents(dut.getRequestId(), false); System.out.println("Repair log size from: " + i + ": " + stuff.size()); for (Iv2RepairLogResponseMessage msg : stuff) { msg.m_sourceHSId = i; dut.deliver(msg); // First message is metadata only, skip it in validation stream if (msg.getSequence() > 0) { //System.out.println("finalstreams: " + finalStreams); //System.out.println("get(i): " + i + ": " + finalStreams.get((long)i)); //System.out.println("msg: " + msg); finalStreams.get((long)i).add((TransactionInfoBaseMessage)msg.getPayload()); } } } assertFalse(result.isCancelled()); assertTrue(result.isDone()); // Unfortunately, it's painful to try to stub things to make repairSurvivors() work, so we'll // go and inspect the guts of SpPromoteAlgo instead. This iteration is largely a copy of the inner loop // of repairSurvivors() for (Iv2RepairLogResponseMessage li : dut.m_repairLogUnion) { for (Entry<Long, SpPromoteAlgo.ReplicaRepairStruct> entry : dut.m_replicaRepairStructs.entrySet()) { if (entry.getValue().needs(li.getHandle())) { // append the missing message for this 'node' to the list of messages that node has seen finalStreams.get(entry.getKey()).add((TransactionInfoBaseMessage)li.getPayload()); } } } // check that all the lists for all the nodes are identical after repair int longest = Integer.MIN_VALUE; for (Entry<Long, List<TransactionInfoBaseMessage>> entry : finalStreams.entrySet()) { System.out.println("SIZE: " + entry.getValue().size()); if (entry.getValue().size() > longest) { if (longest == Integer.MIN_VALUE) { longest = entry.getValue().size(); } else { fail("Mismatch in repair stream size!"); } } } for (int i = 0; i < longest; i++) { TransactionInfoBaseMessage current = null; for (Entry<Long, List<TransactionInfoBaseMessage>> entry : finalStreams.entrySet()) { TransactionInfoBaseMessage msg = entry.getValue().get(i); if (current == null) { current = msg; } else { assertEquals(current.getSpHandle(), msg.getSpHandle()); assertEquals(current.getClass(), msg.getClass()); } } } } }