/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.agent.durability; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.File; import java.io.IOException; import java.util.Collection; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import org.apache.log4j.Level; import org.apache.log4j.Logger; import org.junit.Before; import org.junit.Ignore; import org.junit.Test; import com.cloudera.flume.agent.DirectMasterRPC; import com.cloudera.flume.agent.FlumeNode; import com.cloudera.flume.agent.LogicalNode; import com.cloudera.flume.conf.Context; import com.cloudera.flume.conf.FlumeSpecException; import com.cloudera.flume.core.EventSink; import com.cloudera.flume.core.EventSource; import com.cloudera.flume.core.EventUtil; import com.cloudera.flume.handlers.debug.NoNlASCIISynthSource; import com.cloudera.flume.handlers.endtoend.AckChecksumChecker; import com.cloudera.flume.handlers.endtoend.AckListener; import com.cloudera.flume.handlers.rolling.TimeTrigger; import com.cloudera.flume.master.FlumeMaster; import com.cloudera.flume.master.StatusManager.NodeState; import com.cloudera.flume.reporter.ReportManager; import com.cloudera.flume.reporter.aggregator.CounterSink; import com.cloudera.util.BenchmarkHarness; import com.cloudera.util.Clock; import com.cloudera.util.FileUtil; /** * This tests concurrent wal managers writing to two different directories. Not * the best solution but workable. */ public class TestConcurrentWALMan { public static Logger LOG = Logger.getLogger(TestConcurrentWALMan.class); @Before public void setDebug() { Logger.getRootLogger().setLevel(Level.DEBUG); } @Test public void test1thread() throws IOException, InterruptedException { doTestConcurrentWALMans(1, 10000, 60000); } @Test public void test10thread() throws IOException, InterruptedException { doTestConcurrentWALMans(10, 10000, 60000); } @Test public void test100thread() throws IOException, InterruptedException { doTestConcurrentWALMans(100, 1000, 60000); } @Ignore("This test times out because of inefficient ack retrial/actions") @Test public void test1000thread() throws IOException, InterruptedException { doTestConcurrentWALMans(1000, 100, 120000); } @Test public void test1LogicalNode() throws IOException, InterruptedException, FlumeSpecException { doTestContextConcurrentWALMans(1, 10000, 120000); } @Test public void test10logicalNodes() throws IOException, InterruptedException, FlumeSpecException { doTestContextConcurrentWALMans(10, 10000, 120000); } @Test public void test100logicalNodes() throws IOException, InterruptedException, FlumeSpecException { doTestContextConcurrentWALMans(100, 1000, 120000); } @Ignore("this test currently blows up") @Test public void test1000logicalNodes() throws IOException, InterruptedException, FlumeSpecException { doTestContextConcurrentWALMans(1000, 100, 180000); } /** * Bang on the wal mechanism as hard was you want with number of concurrent * threads, number of events per thread. Timeout in millis, */ public void doTestConcurrentWALMans(final int threads, final int events, int timeout) throws IOException, InterruptedException { final CountDownLatch started = new CountDownLatch(threads); final CountDownLatch done = new CountDownLatch(threads); for (int i = 0; i < threads; i++) { final int idx = i; new Thread() { @Override public void run() { try { File f1 = FileUtil.mktempdir(); CounterSink cnt1 = new CounterSink("count." + idx); AckChecksumChecker<EventSink> chk = new AckChecksumChecker<EventSink>( cnt1); NaiveFileWALManager wman1 = new NaiveFileWALManager(f1); EventSink snk = new NaiveFileWALDeco<EventSink>(new Context(), chk, wman1, new TimeTrigger(1000000), new AckListener.Empty(), 100); ReportManager.get().add(cnt1); // make each parallel instance send a slightly different number of // messages. EventSource src = new NoNlASCIISynthSource(events + idx, 100); src.open(); snk.open(); started.countDown(); EventUtil.dumpAll(src, snk); src.close(); snk.close(); FileUtil.rmr(f1); } catch (Exception e) { LOG.error(e, e); } finally { done.countDown(); } } }.start(); } started.await(); boolean ok = done.await(timeout, TimeUnit.MILLISECONDS); assertTrue("Test timed out", ok); for (int i = 0; i < threads; i++) { CounterSink cnt = (CounterSink) ReportManager.get().getReportable( "count." + i); // check for the slightly different counts based on thread. int exp = events + i; LOG.info("expected " + exp + " but got " + cnt.getCount()); assertEquals(exp, (int) cnt.getCount()); } } boolean isDone(Collection<LogicalNode> lns) { for (LogicalNode n : lns) { if (!n.getName().startsWith("report")) { // skip the default logical node. continue; } if (n.getConfigVersion() == 0 || NodeState.IDLE != n.getStatus().state) { return false; } } return true; } public void doTestContextConcurrentWALMans(final int threads, final int events, int timeout) throws IOException, InterruptedException, FlumeSpecException { BenchmarkHarness.setupLocalWriteDir(); FlumeMaster master = new FlumeMaster(); FlumeNode node = new FlumeNode(new DirectMasterRPC(master), false, false); for (int i = 0; i < threads; i++) { String name = "test." + i; String report = "report." + i; int count = events + i; String src = "asciisynth(" + count + ",100)"; String snk = " { ackedWriteAhead(15000) => {ackChecker => counter(\"" + report + "\") }}"; node.getLogicalNodeManager().testingSpawn(name, src, snk); } // wait for WALs to flush. waitForEmptyWALs(master, node, timeout); // check to make sure everyone got the right number of events boolean success = true; for (int i = 0; i < threads; i++) { CounterSink cnt = (CounterSink) ReportManager.get().getReportable( "report." + i); LOG.info("expected " + (events + i) + " but got " + cnt.getCount()); success &= ((events + i) == cnt.getCount()); assertEquals(events + i, cnt.getCount()); } assertTrue("Counts did not line up", success); BenchmarkHarness.cleanupLocalWriteDir(); } /** * Wait until the flume node's WALs are empty. */ private void waitForEmptyWALs(FlumeMaster master, FlumeNode node, int timeout) throws InterruptedException { boolean done = false; long start = System.currentTimeMillis(); while (!done) { if (System.currentTimeMillis() - start > timeout) { fail("Test took too long"); } done = areWALsDone(node, node.getLogicalNodeManager().getNodes()); if (!done) { Clock.sleep(2500); } LOG.info("Pending acks at the " + "master: " + master.getAckMan().getPending()); node.getAckChecker().checkAcks(); } } /** * Checks if the WALs associated with logical node list are currently empty. */ boolean areWALsDone(FlumeNode node, Collection<LogicalNode> lns) { for (LogicalNode n : lns) { if (0 >= n.getReport().getLongMetric(LogicalNode.A_RECONFIGURES)) { // reconfigure count still at 0 LOG.warn("Logical node reconfigure count <= 0"); return false; } if (n.getStatus().state != NodeState.IDLE) { // config not done. LOG.warn("Logical node was not IDLE"); return false; } if (n.getConfigVersion() <= 0) { LOG.warn("Odd, version was 0 but should not be"); // still on original null/null config return false; } WALManager wal = node.getWalManager(n.getName()); if (!wal.isEmpty()) { LOG.warn(n.getName() + ": wal not empty"); LOG.warn("pending acks from node point of view:" + node.getAckChecker().getPendingAckTags()); return false; } } return true; } }