package org.apache.bookkeeper.client; /* * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. * */ import java.util.HashMap; import java.util.Map; import java.util.concurrent.TimeUnit; import org.apache.bookkeeper.bookie.Bookie; import org.apache.bookkeeper.conf.ClientConfiguration; import org.apache.bookkeeper.conf.ServerConfiguration; import org.apache.bookkeeper.client.BookKeeper.DigestType; import org.apache.bookkeeper.net.BookieSocketAddress; import org.apache.bookkeeper.proto.BookieServer; import org.apache.bookkeeper.test.BookKeeperClusterTestCase; import org.apache.bookkeeper.util.MathUtils; import org.junit.Test; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.junit.Assert.*; /** * Tests of the main BookKeeper client */ public class BookKeeperDiskSpaceWeightedLedgerPlacementTest extends BookKeeperClusterTestCase { private final static Logger LOG = LoggerFactory.getLogger(BookKeeperDiskSpaceWeightedLedgerPlacementTest.class); public BookKeeperDiskSpaceWeightedLedgerPlacementTest() { super(10); } private BookieServer restartBookie(ServerConfiguration conf, final long initialFreeDiskSpace, final long finallFreeDiskSpace, final int delaySecs) throws Exception { Bookie bookieWithCustomFreeDiskSpace = new Bookie(conf) { long startTime = System.currentTimeMillis(); @Override public long getTotalFreeSpace() { if (startTime == 0) { startTime = System.currentTimeMillis(); } if (delaySecs == 0 || ((System.currentTimeMillis()) - startTime < delaySecs*1000)) { return initialFreeDiskSpace; } else { // after delaySecs, advertise finallFreeDiskSpace; before that advertise initialFreeDiskSpace return finallFreeDiskSpace; } } }; bsConfs.add(conf); BookieServer server = startBookie(conf, bookieWithCustomFreeDiskSpace); bs.add(server); return server; } private BookieServer replaceBookieWithCustomFreeDiskSpaceBookie(int bookieIdx, final long freeDiskSpace) throws Exception { LOG.info("Killing bookie " + bs.get(bookieIdx).getLocalAddress()); bs.get(bookieIdx).getLocalAddress(); ServerConfiguration conf = killBookie(bookieIdx); return restartBookie(conf, freeDiskSpace, freeDiskSpace, 0); } private BookieServer replaceBookieWithCustomFreeDiskSpaceBookie(BookieServer bookie, final long freeDiskSpace) throws Exception { for (int i=0; i < bs.size(); i++) { if (bs.get(i).getLocalAddress().equals(bookie.getLocalAddress())) { return replaceBookieWithCustomFreeDiskSpaceBookie(i, freeDiskSpace); } } return null; } private BookieServer replaceBookieWithCustomFreeDiskSpaceBookie(int bookieIdx, long initialFreeDiskSpace, long finalFreeDiskSpace, int delay) throws Exception { LOG.info("Killing bookie " + bs.get(bookieIdx).getLocalAddress()); bs.get(bookieIdx).getLocalAddress(); ServerConfiguration conf = killBookie(bookieIdx); return restartBookie(conf, initialFreeDiskSpace, finalFreeDiskSpace, delay); } /** * Test to show that weight based selection honors the disk weight of bookies */ @Test(timeout=60000) public void testDiskSpaceWeightedBookieSelection() throws Exception { long freeDiskSpace=1000000L; int multiple=3; for (int i=0; i < numBookies; i++) { // the first 8 bookies have freeDiskSpace of 1MB; While the remaining 2 have 3MB if (i < numBookies-2) { replaceBookieWithCustomFreeDiskSpaceBookie(0, freeDiskSpace); } else { replaceBookieWithCustomFreeDiskSpaceBookie(0, multiple*freeDiskSpace); } } Map<BookieSocketAddress, Integer> m = new HashMap<BookieSocketAddress, Integer>(); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } // wait a 100 msecs each for the bookies to come up and the bookieInfo to be retrieved by the client ClientConfiguration conf = new ClientConfiguration() .setZkServers(zkUtil.getZooKeeperConnectString()).setDiskWeightBasedPlacementEnabled(true). setBookieMaxWeightMultipleForWeightBasedPlacement(multiple); Thread.sleep(200); final BookKeeper client = new BookKeeper(conf); Thread.sleep(200); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } client.close(); // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i=0; i < numBookies-2; i++) { double ratio1 = (double)m.get(bs.get(numBookies-2).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1-multiple), Math.abs(ratio1-multiple) < 1); double ratio2 = (double)m.get(bs.get(numBookies-1).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2-multiple), Math.abs(ratio2-multiple) < 1); } } /** * Test to show that weight based selection honors the disk weight of bookies and also adapts * when the bookies's weight changes. */ @Test(timeout=60000) public void testDiskSpaceWeightedBookieSelectionWithChangingWeights() throws Exception { long freeDiskSpace=1000000L; int multiple=3; for (int i=0; i < numBookies; i++) { // the first 8 bookies have freeDiskSpace of 1MB; While the remaining 2 have 3MB if (i < numBookies-2) { replaceBookieWithCustomFreeDiskSpaceBookie(0, freeDiskSpace); } else { replaceBookieWithCustomFreeDiskSpaceBookie(0, multiple*freeDiskSpace); } } Map<BookieSocketAddress, Integer> m = new HashMap<BookieSocketAddress, Integer>(); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } // wait a 100 msecs each for the bookies to come up and the bookieInfo to be retrieved by the client ClientConfiguration conf = new ClientConfiguration() .setZkServers(zkUtil.getZooKeeperConnectString()).setDiskWeightBasedPlacementEnabled(true). setBookieMaxWeightMultipleForWeightBasedPlacement(multiple); Thread.sleep(100); final BookKeeper client = new BookKeeper(conf); Thread.sleep(100); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i=0; i < numBookies-2; i++) { double ratio1 = (double)m.get(bs.get(numBookies-2).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1-multiple), Math.abs(ratio1-multiple) < 1); double ratio2 = (double)m.get(bs.get(numBookies-1).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2-multiple), Math.abs(ratio2-multiple) < 1); } // Restart the bookies in such a way that the first 2 bookies go from 1MB to 3MB free space and the last // 2 bookies go from 3MB to 1MB BookieServer server1 = bs.get(0); BookieServer server2 = bs.get(1); BookieServer server3 = bs.get(numBookies-2); BookieServer server4 = bs.get(numBookies-1); server1 = replaceBookieWithCustomFreeDiskSpaceBookie(server1, multiple*freeDiskSpace); server2 = replaceBookieWithCustomFreeDiskSpaceBookie(server2, multiple*freeDiskSpace); server3 = replaceBookieWithCustomFreeDiskSpaceBookie(server3, freeDiskSpace); server4 = replaceBookieWithCustomFreeDiskSpaceBookie(server4, freeDiskSpace); Thread.sleep(100); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i=0; i < numBookies; i++) { if (server1.getLocalAddress().equals(bs.get(i).getLocalAddress()) || server2.getLocalAddress().equals(bs.get(i).getLocalAddress())) { continue; } double ratio1 = (double)m.get(server1.getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1-multiple), Math.abs(ratio1-multiple) < 1); double ratio2 = (double)m.get(server2.getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2-multiple), Math.abs(ratio2-multiple) < 1); } client.close(); } /** * Test to show that weight based selection honors the disk weight of bookies and also adapts * when bookies go away permanently. */ @Test(timeout=60000) public void testDiskSpaceWeightedBookieSelectionWithBookiesDying() throws Exception { long freeDiskSpace=1000000L; int multiple=3; for (int i=0; i < numBookies; i++) { // the first 8 bookies have freeDiskSpace of 1MB; While the remaining 2 have 1GB if (i < numBookies-2) { replaceBookieWithCustomFreeDiskSpaceBookie(0, freeDiskSpace); } else { replaceBookieWithCustomFreeDiskSpaceBookie(0, multiple*freeDiskSpace); } } Map<BookieSocketAddress, Integer> m = new HashMap<BookieSocketAddress, Integer>(); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } // wait a couple of 100 msecs each for the bookies to come up and the bookieInfo to be retrieved by the client ClientConfiguration conf = new ClientConfiguration() .setZkServers(zkUtil.getZooKeeperConnectString()).setDiskWeightBasedPlacementEnabled(true). setBookieMaxWeightMultipleForWeightBasedPlacement(multiple); Thread.sleep(100); final BookKeeper client = new BookKeeper(conf); Thread.sleep(100); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight are chosen 3X as often as the median; // since the number of ledgers is small (2000), there may be variation double ratio1 = (double)m.get(bs.get(numBookies-2).getLocalAddress())/(double)m.get(bs.get(0).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1-multiple), Math.abs(ratio1-multiple) < 1); double ratio2 = (double)m.get(bs.get(numBookies-1).getLocalAddress())/(double)m.get(bs.get(1).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2-multiple), Math.abs(ratio2-multiple) < 1); // Bring down the 2 bookies that had higher weight; after this the allocation to all // the remaining bookies should be uniform for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } BookieServer server1 = bs.get(numBookies-2); BookieServer server2 = bs.get(numBookies-1); killBookie(numBookies-1); killBookie(numBookies-2); // give some time for the cluster to become stable Thread.sleep(100); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight are chosen 3X as often as the median; for (int i=0; i < numBookies-3; i++) { double delta = Math.abs((double)m.get(bs.get(i).getLocalAddress())-(double)m.get(bs.get(i+1).getLocalAddress())); delta = (delta*100)/(double)m.get(bs.get(i+1).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + delta, delta <= 30); // the deviation should be less than 30% } // since the following 2 bookies were down, they shouldn't ever be selected assertTrue("Weigheted placement is not honored" + m.get(server1.getLocalAddress()), m.get(server1.getLocalAddress()) == 0); assertTrue("Weigheted placement is not honored" + m.get(server2.getLocalAddress()), m.get(server2.getLocalAddress()) == 0); client.close(); } /** * Test to show that weight based selection honors the disk weight of bookies and also adapts * when bookies are added. */ @Test(timeout=60000) public void testDiskSpaceWeightedBookieSelectionWithBookiesBeingAdded() throws Exception { long freeDiskSpace=1000000L; int multiple=3; for (int i=0; i < numBookies; i++) { // all the bookies have freeDiskSpace of 1MB replaceBookieWithCustomFreeDiskSpaceBookie(0, freeDiskSpace); } // let the last two bookies be down initially ServerConfiguration conf1 = killBookie(numBookies-1); ServerConfiguration conf2 = killBookie(numBookies-2); Map<BookieSocketAddress, Integer> m = new HashMap<BookieSocketAddress, Integer>(); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } // wait a bit for the bookies to come up and the bookieInfo to be retrieved by the client ClientConfiguration conf = new ClientConfiguration() .setZkServers(zkUtil.getZooKeeperConnectString()).setDiskWeightBasedPlacementEnabled(true). setBookieMaxWeightMultipleForWeightBasedPlacement(multiple); Thread.sleep(100); final BookKeeper client = new BookKeeper(conf); Thread.sleep(100); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight are chosen 3X as often as the median; // since the number of ledgers is small (2000), there may be variation for (int i=0; i < numBookies-3; i++) { double delta = Math.abs((double)m.get(bs.get(i).getLocalAddress())-(double)m.get(bs.get(i+1).getLocalAddress())); delta = (delta*100)/(double)m.get(bs.get(i+1).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + delta, delta <= 30); // the deviation should be less than 30% } // bring up the two dead bookies; they'll also have 3X more free space than the rest of the bookies restartBookie(conf1, multiple*freeDiskSpace, multiple*freeDiskSpace, 0); restartBookie(conf2, multiple*freeDiskSpace, multiple*freeDiskSpace, 0); // give some time for the cluster to become stable Thread.sleep(100); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i=0; i < numBookies-2; i++) { double ratio1 = (double)m.get(bs.get(numBookies-2).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1-multiple), Math.abs(ratio1-multiple) < 1); double ratio2 = (double)m.get(bs.get(numBookies-1).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2-multiple), Math.abs(ratio2-multiple) < 1); } client.close(); } /** * Tests that the bookie selection is based on the amount of free disk space a bookie has. Also make sure that * the periodic bookieInfo read is working and causes the new weights to be taken into account. */ @Test(timeout=60000) public void testDiskSpaceWeightedBookieSelectionWithPeriodicBookieInfoUpdate() throws Exception { long freeDiskSpace=1000000L; int multiple=3; for (int i=0; i < numBookies; i++) { // the first 8 bookies have freeDiskSpace of 1MB; the remaining 2 will advertise 1MB for // the first 3 seconds but then they'll advertise 3MB after the first 3 seconds if (i < numBookies-2) { replaceBookieWithCustomFreeDiskSpaceBookie(0, freeDiskSpace); } else { replaceBookieWithCustomFreeDiskSpaceBookie(0, freeDiskSpace, multiple*freeDiskSpace, 2); } } Map<BookieSocketAddress, Integer> m = new HashMap<BookieSocketAddress, Integer>(); for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } // the periodic bookieInfo is read once every 7 seconds int updateIntervalSecs = 6; ClientConfiguration conf = new ClientConfiguration() .setZkServers(zkUtil.getZooKeeperConnectString()).setDiskWeightBasedPlacementEnabled(true). setBookieMaxWeightMultipleForWeightBasedPlacement(multiple). setGetBookieInfoIntervalSeconds(updateIntervalSecs, TimeUnit.SECONDS); // wait a bit for the bookies to come up and the bookieInfo to be retrieved by the client Thread.sleep(100); final BookKeeper client = new BookKeeper(conf); Thread.sleep(100); long startMsecs = MathUtils.now(); for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } long elapsedMsecs = MathUtils.now() - startMsecs; // make sure that all the bookies are chosen pretty much uniformly int bookiesToCheck = numBookies-1; if (elapsedMsecs > updateIntervalSecs*1000) { // if this task longer than updateIntervalSecs, the weight for the last 2 bookies will be // higher, so skip checking them bookiesToCheck = numBookies-3; } for (int i=0; i < bookiesToCheck; i++) { double delta = Math.abs((double)m.get(bs.get(i).getLocalAddress())-(double)m.get(bs.get(i+1).getLocalAddress())); delta = (delta*100)/(double)m.get(bs.get(i+1).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + delta, delta <= 30); // the deviation should be <30% } if (elapsedMsecs < updateIntervalSecs*1000) { // sleep until periodic bookie info retrieval kicks in and it gets the updated // freeDiskSpace for the last 2 bookies Thread.sleep(updateIntervalSecs*1000 - elapsedMsecs); } for (BookieServer b : bs) { m.put(b.getLocalAddress(), 0); } for (int i = 0; i < 2000; i++) { LedgerHandle lh = client.createLedger(3, 3, DigestType.CRC32, "testPasswd".getBytes()); for (BookieSocketAddress b : lh.getLedgerMetadata().getEnsemble(0)) { m.put(b, m.get(b)+1); } } // make sure that bookies with higher weight(the last 2 bookies) are chosen 3X as often as the median; // since the number of ledgers created is small (2000), we allow a range of 2X to 4X instead of the exact 3X for (int i=0; i < numBookies-2; i++) { double ratio1 = (double)m.get(bs.get(numBookies-2).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio1-multiple), Math.abs(ratio1-multiple) < 1); double ratio2 = (double)m.get(bs.get(numBookies-1).getLocalAddress())/(double)m.get(bs.get(i).getLocalAddress()); assertTrue("Weigheted placement is not honored: " + Math.abs(ratio2-multiple), Math.abs(ratio2-multiple) < 1); } client.close(); } }