/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.master; import static org.junit.Assert.assertEquals; import java.io.File; import java.io.IOException; import java.util.Map; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.thrift.transport.TTransportException; import org.junit.After; import org.junit.Before; import org.junit.Test; import com.cloudera.flume.conf.FlumeConfiguration; import com.cloudera.flume.conf.FlumeSpecException; import com.cloudera.flume.conf.FlumeConfigData; import com.cloudera.flume.master.StatusManager.NodeState; import com.cloudera.flume.master.availability.ConsistentHashFailoverChainManager; import com.cloudera.flume.master.availability.FailoverChainManager; import com.cloudera.flume.master.failover.FailoverConfigurationManager; import com.cloudera.flume.master.logical.LogicalConfigurationManager; import com.cloudera.util.Clock; import com.cloudera.util.FileUtil; import com.cloudera.util.NetUtils; /** * This tests all kinds of auto update scenarios where a user or new machine * update causes an incremental update to some machine configurations. This * specifically tests the DFO. * */ public class TestMasterAutoUpdatesE2E { public static final Logger LOG = LoggerFactory .getLogger(TestMasterAutoUpdatesE2E.class); // ///////// protected FlumeMaster flumeMaster = null; private File tmpdir = null; protected ConfigManager cfgMan; /** * This creates an environment where we have configurations set and then * serving starts. This simulates a zk configstore load and then the serve * call being run. * * Ideally we'd create a SetupTranslatingZKMasterTestEnv, but there is an * issue when trying to start/shutdown and start a new master in the same * process/jvm. * */ @Before public void setCfgAndStartMaster() throws TTransportException, IOException, FlumeSpecException { // Give ZK a temporary directory, otherwise it's possible we'll reload some // old configs tmpdir = FileUtil.mktempdir(); FlumeConfiguration.createTestableConfiguration(); FlumeConfiguration.get().set(FlumeConfiguration.MASTER_STORE, "memory"); buildMaster(); // Instead of loading from a ZK Store, we just see the config in the "deep" // config manager. Any translations will not occur. ConfigurationManager loaded = cfgMan; loaded.setConfig("node1", "flow", "autoCollectorSource", "null"); loaded.setConfig("node2", "flow", "autoCollectorSource", "null"); loaded.setConfig("node3", "flow", "autoCollectorSource", "null"); loaded.setConfig("node4", "flow", "autoCollectorSource", "null"); loaded.setConfig("agent", "flow", "null", "autoE2EChain"); // this is the outer configman, should have no translation. ConfigurationManager cfgman1 = flumeMaster.getSpecMan(); Map<String, FlumeConfigData> cfgs1 = cfgman1.getTranslatedConfigs(); assertEquals(0, cfgs1.size()); // no translations happened // start the master (which should trigger an update and translation flumeMaster.serve(); } /** * Build but do not start a master. * * This exposes a hook to the deepest cfgMan which would ideally be a saved ZK * backed version being reloaded from a restarted master. */ void buildMaster() throws IOException { cfgMan = new ConfigManager(FlumeMaster.createConfigStore(FlumeConfiguration .get())); FailoverChainManager fcMan = new ConsistentHashFailoverChainManager(3); ConfigurationManager self2 = new ConfigManager(); ConfigurationManager failover = new FailoverConfigurationManager(cfgMan, self2, fcMan); StatusManager statman = new StatusManager(); ConfigurationManager self = new ConfigManager(); ConfigurationManager logical = new LogicalConfigurationManager(failover, self, statman); flumeMaster = new FlumeMaster(new CommandManager(), logical, statman, new MasterAckManager(), FlumeConfiguration.get()); } @After public void stopMaster() throws IOException { if (flumeMaster != null) { flumeMaster.shutdown(); flumeMaster = null; } if (tmpdir != null) { FileUtil.rmr(tmpdir); tmpdir = null; } } // /////// end stuff that should be refactored /** * Ideally, start a master (calling serve), set a configuration, kill the * master, and then reload it. We make sure that there was an attempt to * translate the configuration. This simulates a ZK-backed master going down * and coming back up with the previously specfied configuration. */ @Test public void testReloadRefresh() throws IOException, InterruptedException, FlumeSpecException { ConfigurationManager cfgman2 = flumeMaster.getSpecMan(); Map<String, FlumeConfigData> cfgs2 = cfgman2.getTranslatedConfigs(); assertEquals(5, cfgs2.size()); } /** * The configuration here has no live nodes. It translates the failchains but * fail on logicalSink translations. This is the base case for most of the * subsequent tests. */ @Test public void testMasterNoNode() { Map<String, FlumeConfigData> xcfgs = flumeMaster.getSpecMan() .getTranslatedConfigs(); FlumeConfigData agentFcd = xcfgs.get("agent"); String ans1 = "{ ackedWriteAhead => { stubbornAppend => { insistentOpen => " + "< fail( \"logicalSink( \\\"node4\\\" )\" ) ?" + " < fail( \"logicalSink( \\\"node2\\\" )\" ) ?" + " fail( \"logicalSink( \\\"node1\\\" )\" ) > > } } }"; assertEquals(agentFcd.sinkConfig, ans1); } /** * A user triggered reconfigure of a collector to a non collector should cause * a configuration that depends on the removed configuration to be removed. */ @Test public void testCollectorReconfigAutoUpdate() throws IOException, FlumeSpecException { // a user initiated removal of a node would cause the config to change. flumeMaster.getSpecMan().setConfig("node2", "flow", "null", "null"); // Look, no explicit updates! // check new config Map<String, FlumeConfigData> xcfgs2 = flumeMaster.getSpecMan() .getTranslatedConfigs(); FlumeConfigData agentFcd2 = xcfgs2.get("agent"); String ans2 = "{ ackedWriteAhead => { stubbornAppend => { insistentOpen => " + "< fail( \"logicalSink( \\\"node4\\\" )\" ) ?" + " < fail( \"logicalSink( \\\"node1\\\" )\" ) ?" + " fail( \"logicalSink( \\\"node3\\\" )\" ) > > } } }"; assertEquals(agentFcd2.sinkConfig, ans2); } /** * A user triggered decommission should cause a configuration that depends on * the removed configuration to be removed. */ @Test public void testDecommission() throws IOException { // a user initiated removal of a node would cause the config to change. flumeMaster.getSpecMan().removeLogicalNode("node2"); // Look, no explicit update call! // check new config Map<String, FlumeConfigData> xcfgs2 = flumeMaster.getSpecMan() .getTranslatedConfigs(); FlumeConfigData agentFcd2 = xcfgs2.get("agent"); // This is wrong -- there should be a different logicalSink replacing node2 String ans2 = "{ ackedWriteAhead => { stubbornAppend => { insistentOpen => " + "< fail( \"logicalSink( \\\"node4\\\" )\" ) ?" + " < fail( \"logicalSink( \\\"node1\\\" )\" ) ?" + " fail( \"logicalSink( \\\"node3\\\" )\" ) > > } } }"; assertEquals(agentFcd2.sinkConfig, ans2); } /** * Add a new collectorSource node, and make sure the agent's configuration is * updated. */ @Test public void testMasterNodeNewCollectorAutoUpdate() throws IOException, FlumeSpecException { // a user initiated removal of a node would cause the config to change. flumeMaster.getSpecMan().setConfig("nodeNew", "flow", "autoCollectorSource", "null"); // Look, no explicit update call! // check new config Map<String, FlumeConfigData> xcfgs2 = flumeMaster.getSpecMan() .getTranslatedConfigs(); FlumeConfigData agentFcd2 = xcfgs2.get("agent"); String ans2 = "{ ackedWriteAhead => { stubbornAppend => { insistentOpen =>" + " < fail( \"logicalSink( \\\"nodeNew\\\" )\" ) ?" + " < fail( \"logicalSink( \\\"node4\\\" )\" ) ?" + " fail( \"logicalSink( \\\"node2\\\" )\" ) > > } } }"; assertEquals(agentFcd2.sinkConfig, ans2); } /** * Test that an autoUpdate happens when a physical node information * (heartbeat) shows up and allows for a logicalSink/Source translation * * This condition is assumed in the following test -- * testMasterNodeUnmapAutoUpdate() */ @Test public void testMasterNodeAutoUpdate() throws IOException, FlumeSpecException { // First, heart beats String host = NetUtils.localhost(); long ver = Clock.unixTime(); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node1", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node2", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node3", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node4", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "agent", NodeState.IDLE, ver); // Next spawn so that all are mapped onto a node and now gets a physical flumeMaster.getSpecMan().addLogicalNode(host, "node1"); flumeMaster.getSpecMan().addLogicalNode(host, "node2"); flumeMaster.getSpecMan().addLogicalNode(host, "node3"); flumeMaster.getSpecMan().addLogicalNode(host, "node4"); flumeMaster.getSpecMan().addLogicalNode(host, "agent"); // Look, no explicit update call! // check new config Map<String, FlumeConfigData> xcfgs2 = flumeMaster.getSpecMan() .getTranslatedConfigs(); FlumeConfigData agentFcd2 = xcfgs2.get("agent"); // This is wrong -- there should be a different logicalSink replacing node2 String ans2 = "{ ackedWriteAhead => { stubbornAppend => { insistentOpen =>" + " < rpcSink( \"" + host + "\", 35856 ) ?" + " < rpcSink( \"" + host + "\", 35854 ) ?" + " rpcSink( \"" + host + "\", 35853 ) > > } } }"; assertEquals(ans2, agentFcd2.sinkConfig); } /** * This heartbeats to provide physical node info and allows the translators to * build fully physical configurations. */ @Test public void testMasterNodeUnmapAutoUpdate() throws IOException { // First, heart beats String host = NetUtils.localhost(); long ver = Clock.unixTime(); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node1", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node2", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node3", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "node4", NodeState.IDLE, ver); flumeMaster.getStatMan().updateHeartbeatStatus(host, "physnode", "agent", NodeState.IDLE, ver); // First, spawn so that all are mapped onto a node and now gets a physical // node info flumeMaster.getSpecMan().addLogicalNode("host", "node1"); flumeMaster.getSpecMan().addLogicalNode("host", "node2"); flumeMaster.getSpecMan().addLogicalNode("host", "node3"); flumeMaster.getSpecMan().addLogicalNode("host", "node4"); flumeMaster.getSpecMan().addLogicalNode("host", "agent"); // Now do a user initiated unmap should make the config go back to a failing // version with logicalSinks flumeMaster.getSpecMan().unmapAllLogicalNodes(); // Look, no explicit update call! // check new config Map<String, FlumeConfigData> xcfgs2 = flumeMaster.getSpecMan() .getTranslatedConfigs(); FlumeConfigData agentFcd2 = xcfgs2.get("agent"); String ans2 = "{ ackedWriteAhead => { stubbornAppend => { insistentOpen =>" + " < fail( \"logicalSink( \\\"node4\\\" )\" ) ?" + " < fail( \"logicalSink( \\\"node2\\\" )\" ) ?" + " fail( \"logicalSink( \\\"node1\\\" )\" ) > > } } }"; assertEquals(ans2, agentFcd2.sinkConfig); } }