package org.apache.bookkeeper.client; /* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ import java.io.IOException; import java.nio.ByteBuffer; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.bookie.BookieException; import org.apache.bookkeeper.client.AsyncCallback.AddCallback; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookkeeperInternalCallbacks.WriteCallback; import org.apache.bookkeeper.test.BaseTestCase; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.*; /** * This unit test tests ledger recovery. * */ public class LedgerRecoveryTest extends BaseTestCase { private final static Logger LOG = LoggerFactory.getLogger(LedgerRecoveryTest.class); DigestType digestType; public LedgerRecoveryTest(DigestType digestType) { super(3); this.digestType = digestType; } private void testInternal(int numEntries) throws Exception { /* * Create ledger. */ LedgerHandle beforelh = null; beforelh = bkc.createLedger(digestType, "".getBytes()); String tmp = "BookKeeper is cool!"; for (int i = 0; i < numEntries; i++) { beforelh.addEntry(tmp.getBytes()); } long length = (long) (numEntries * tmp.length()); /* * Try to open ledger. */ LedgerHandle afterlh = bkc.openLedger(beforelh.getId(), digestType, "".getBytes()); /* * Check if has recovered properly. */ assertTrue("Has not recovered correctly: " + afterlh.getLastAddConfirmed(), afterlh.getLastAddConfirmed() == numEntries - 1); assertTrue("Has not set the length correctly: " + afterlh.getLength() + ", " + length, afterlh.getLength() == length); } @Test(timeout=60000) public void testLedgerRecovery() throws Exception { testInternal(100); } @Test(timeout=60000) public void testEmptyLedgerRecoveryOne() throws Exception { testInternal(1); } @Test(timeout=60000) public void testEmptyLedgerRecovery() throws Exception { testInternal(0); } @Test(timeout=60000) public void testLedgerRecoveryWithWrongPassword() throws Exception { // Create a ledger byte[] ledgerPassword = "aaaa".getBytes(); LedgerHandle lh = bkc.createLedger(digestType, ledgerPassword); // bkc.initMessageDigest("SHA1"); long ledgerId = lh.getId(); LOG.info("Ledger ID: " + lh.getId()); String tmp = "BookKeeper is cool!"; int numEntries = 30; for (int i = 0; i < numEntries; i++) { lh.addEntry(tmp.getBytes()); } // Using wrong password ledgerPassword = "bbbb".getBytes(); try { lh = bkc.openLedger(ledgerId, digestType, ledgerPassword); fail("Opening ledger with wrong password should fail"); } catch (BKException e) { // should failed } } @Test(timeout=60000) public void testLedgerRecoveryWithNotEnoughBookies() throws Exception { int numEntries = 3; // Create a ledger LedgerHandle beforelh = null; beforelh = bkc.createLedger(3, 3, digestType, "".getBytes()); String tmp = "BookKeeper is cool!"; for (int i = 0; i < numEntries; i++) { beforelh.addEntry(tmp.getBytes()); } // shutdown first bookie server bs.get(0).shutdown(); bs.remove(0); /* * Try to open ledger. */ try { bkc.openLedger(beforelh.getId(), digestType, "".getBytes()); fail("should not reach here!"); } catch (Exception e) { // should thrown recovery exception } // start a new bookie server startNewBookie(); LedgerHandle afterlh = bkc.openLedger(beforelh.getId(), digestType, "".getBytes()); /* * Check if has recovered properly. */ assertEquals(numEntries - 1, afterlh.getLastAddConfirmed()); } @Test(timeout=60000) public void testLedgerRecoveryWithSlowBookie() throws Exception { for (int i = 0; i < 3; i++) { LOG.info("TestLedgerRecoveryWithAckQuorum @ slow bookie {}", i); ledgerRecoveryWithSlowBookie(3, 3, 2, 1, i); } } private void ledgerRecoveryWithSlowBookie(int ensembleSize, int writeQuorumSize, int ackQuorumSize, int numEntries, int slowBookieIdx) throws Exception { // Create a ledger LedgerHandle beforelh = null; beforelh = bkc.createLedger(ensembleSize, writeQuorumSize, ackQuorumSize, digestType, "".getBytes()); // kill first bookie server to start a fake one to simulate a slow bookie // and failed to add entry on crash // until write succeed BookieSocketAddress host = beforelh.getLedgerMetadata().currentEnsemble.get(slowBookieIdx); ServerConfiguration conf = killBookie(host); Bookie fakeBookie = new Bookie(conf) { @Override public void addEntry(ByteBuffer entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { // drop request to simulate a slow and failed bookie } }; bsConfs.add(conf); bs.add(startBookie(conf, fakeBookie)); // avoid not-enough-bookies case startNewBookie(); // write would still succeed with 2 bookies ack String tmp = "BookKeeper is cool!"; for (int i = 0; i < numEntries; i++) { beforelh.addEntry(tmp.getBytes()); } conf = killBookie(host); bsConfs.add(conf); // the bookie goes normally bs.add(startBookie(conf)); /* * Try to open ledger. */ LedgerHandle afterlh = bkc.openLedger(beforelh.getId(), digestType, "".getBytes()); /* * Check if has recovered properly. */ assertEquals(numEntries - 1, afterlh.getLastAddConfirmed()); } /** * {@link https://issues.apache.org/jira/browse/BOOKKEEPER-355} * A recovery during a rolling restart shouldn't affect the ability * to recovery the ledger later. * We have a ledger on ensemble B1,B2,B3. * The sequence of events is * 1. B1 brought down for maintenance * 2. Ledger recovery started * 3. B2 answers read last confirmed. * 4. B1 replaced in ensemble by B4 * 5. Write to B4 fails for some reason * 6. B1 comes back up. * 7. B2 goes down for maintenance. * 8. Ledger recovery starts (ledger is now unavailable) */ @Test(timeout=60000) public void testLedgerRecoveryWithRollingRestart() throws Exception { LedgerHandle lhbefore = bkc.createLedger(numBookies, 2, digestType, "".getBytes()); for (int i = 0; i < (numBookies*3)+1; i++) { lhbefore.addEntry("data".getBytes()); } // Add a dead bookie to the cluster ServerConfiguration conf = newServerConfiguration(); Bookie deadBookie1 = new Bookie(conf) { @Override public void recoveryAddEntry(ByteBuffer entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { // drop request to simulate a slow and failed bookie throw new IOException("Couldn't write for some reason"); } }; bsConfs.add(conf); bs.add(startBookie(conf, deadBookie1)); // kill first bookie server BookieSocketAddress bookie1 = lhbefore.getLedgerMetadata().currentEnsemble.get(0); ServerConfiguration conf1 = killBookie(bookie1); // Try to recover and fence the ledger after killing one bookie in the // ensemble in the ensemble, and another bookie is available in zk, but not writtable try { bkc.openLedger(lhbefore.getId(), digestType, "".getBytes()); fail("Shouldn't be able to open ledger, there should be entries missing"); } catch (BKException.BKLedgerRecoveryException e) { // expected } // restart the first server, kill the second bsConfs.add(conf1); bs.add(startBookie(conf1)); BookieSocketAddress bookie2 = lhbefore.getLedgerMetadata().currentEnsemble.get(1); ServerConfiguration conf2 = killBookie(bookie2); // using async, because this could trigger an assertion final AtomicInteger returnCode = new AtomicInteger(0); final CountDownLatch openLatch = new CountDownLatch(1); bkc.asyncOpenLedger(lhbefore.getId(), digestType, "".getBytes(), new AsyncCallback.OpenCallback() { public void openComplete(int rc, LedgerHandle lh, Object ctx) { returnCode.set(rc); openLatch.countDown(); if (rc == BKException.Code.OK) { try { lh.close(); } catch (Exception e) { LOG.error("Exception closing ledger handle", e); } } } }, null); assertTrue("Open call should have completed", openLatch.await(5, TimeUnit.SECONDS)); assertFalse("Open should not have succeeded", returnCode.get() == BKException.Code.OK); bsConfs.add(conf2); bs.add(startBookie(conf2)); LedgerHandle lhafter = bkc.openLedger(lhbefore.getId(), digestType, "".getBytes()); assertEquals("Fenced ledger should have correct lastAddConfirmed", lhbefore.getLastAddConfirmed(), lhafter.getLastAddConfirmed()); } /** * {@link https://issues.apache.org/jira/browse/BOOKKEEPER-355} * Verify that if a recovery happens with 1 replica missing, and it's replaced * with a faulty bookie, it doesn't break future recovery from happening. * 1. Ledger is created with quorum size as 2, and entries are written * 2. Now first bookie is in the ensemble is brought down. * 3. Another client fence and trying to recover the same ledger * 4. During this time ensemble change will happen * and new bookie will be added. But this bookie is not able to write. * 5. This recovery will fail. * 7. A new non-faulty bookie comes up * 8. Another client trying to recover the same ledger. */ @Test(timeout=60000) public void testBookieFailureDuringRecovery() throws Exception { LedgerHandle lhbefore = bkc.createLedger(numBookies, 2, digestType, "".getBytes()); for (int i = 0; i < (numBookies*3)+1; i++) { lhbefore.addEntry("data".getBytes()); } // Add a dead bookie to the cluster ServerConfiguration conf = newServerConfiguration(); Bookie deadBookie1 = new Bookie(conf) { @Override public void recoveryAddEntry(ByteBuffer entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { // drop request to simulate a slow and failed bookie throw new IOException("Couldn't write for some reason"); } }; bsConfs.add(conf); bs.add(startBookie(conf, deadBookie1)); // kill first bookie server BookieSocketAddress bookie1 = lhbefore.getLedgerMetadata().currentEnsemble.get(0); killBookie(bookie1); // Try to recover and fence the ledger after killing one bookie in the // ensemble in the ensemble, and another bookie is available in zk but not writtable try { bkc.openLedger(lhbefore.getId(), digestType, "".getBytes()); fail("Shouldn't be able to open ledger, there should be entries missing"); } catch (BKException.BKLedgerRecoveryException e) { // expected } // start a new good server startNewBookie(); LedgerHandle lhafter = bkc.openLedger(lhbefore.getId(), digestType, "".getBytes()); assertEquals("Fenced ledger should have correct lastAddConfirmed", lhbefore.getLastAddConfirmed(), lhafter.getLastAddConfirmed()); } /** * Verify that it doesn't break the recovery when changing ensemble in * recovery add. */ @Test(timeout = 60000) public void testEnsembleChangeDuringRecovery() throws Exception { LedgerHandle lh = bkc.createLedger(numBookies, 2, 2, digestType, "".getBytes()); int numEntries = (numBookies * 3) + 1; final AtomicInteger numPendingAdds = new AtomicInteger(numEntries); final CountDownLatch addDone = new CountDownLatch(1); for (int i = 0; i < numEntries; i++) { lh.asyncAddEntry("data".getBytes(), new AddCallback() { @Override public void addComplete(int rc, LedgerHandle lh, long entryId, Object ctx) { if (BKException.Code.OK != rc) { addDone.countDown(); return; } if (numPendingAdds.decrementAndGet() == 0) { addDone.countDown(); } } }, null); } addDone.await(10, TimeUnit.SECONDS); if (numPendingAdds.get() > 0) { fail("Failed to add " + numEntries + " to ledger handle " + lh.getId()); } // kill first 2 bookies to replace bookies BookieSocketAddress bookie1 = lh.getLedgerMetadata().currentEnsemble.get(0); ServerConfiguration conf1 = killBookie(bookie1); BookieSocketAddress bookie2 = lh.getLedgerMetadata().currentEnsemble.get(1); ServerConfiguration conf2 = killBookie(bookie2); // replace these two bookies startDeadBookie(conf1); startDeadBookie(conf2); // kick in two brand new bookies startNewBookie(); startNewBookie(); // two dead bookies are put in the ensemble which would cause ensemble // change LedgerHandle recoveredLh = bkc.openLedger(lh.getId(), digestType, "".getBytes()); assertEquals("Fenced ledger should have correct lastAddConfirmed", lh.getLastAddConfirmed(), recoveredLh.getLastAddConfirmed()); } private void startDeadBookie(ServerConfiguration conf) throws Exception { Bookie rBookie = new Bookie(conf) { @Override public void recoveryAddEntry(ByteBuffer entry, WriteCallback cb, Object ctx, byte[] masterKey) throws IOException, BookieException { // drop request to simulate a dead bookie throw new IOException("Couldn't write entries for some reason"); } }; bsConfs.add(conf); bs.add(startBookie(conf, rBookie)); } }