/** * Licensed to Cloudera, Inc. under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. Cloudera, Inc. licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.cloudera.flume.master; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStreamWriter; import java.io.Writer; import java.util.Set; import org.apache.commons.cli.CommandLine; import org.apache.commons.cli.CommandLineParser; import org.apache.commons.cli.HelpFormatter; import org.apache.commons.cli.Options; import org.apache.commons.cli.ParseException; import org.apache.commons.cli.PosixParser; import org.apache.thrift.transport.TTransportException; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.cloudera.flume.agent.FlumeNode; import com.cloudera.flume.conf.FlumeConfiguration; import com.cloudera.flume.master.flows.FlowConfigManager; import com.cloudera.flume.master.logical.LogicalConfigurationManager; import com.cloudera.flume.reporter.ReportEvent; import com.cloudera.flume.reporter.ReportManager; import com.cloudera.flume.reporter.Reportable; import com.cloudera.flume.reporter.server.AvroReportServer; import com.cloudera.flume.reporter.server.ThriftReportServer; import com.cloudera.flume.util.FlumeVMInfo; import com.cloudera.flume.util.SystemInfo; import com.cloudera.util.CheckJavaVersion; import com.cloudera.util.NetUtils; import com.cloudera.util.StatusHttpServer; /** * This is a first cut at a server for distributing configurations to different * flume client machines. Right now this is a SPOF and not reliable or * persistent, but could eventually hooked into BDB or ZK or something to make * configurations more reliable. */ public class FlumeMaster implements Reportable { protected static final String ZK_CFG_STORE = "zookeeper"; protected static final String MEMORY_CFG_STORE = "memory"; protected final FlumeConfiguration cfg; static final Logger LOG = LoggerFactory.getLogger(FlumeMaster.class); /** report key -- hostname of this master */ static final String REPORTKEY_HOSTNAME = "hostname"; /** report key -- number of nodes reporting to this master */ static final String REPORTKEY_NODES_REPORTING_COUNT = "nodes_reporting_count"; MasterAdminServer configServer; MasterClientServer controlServer; /* * We create instances of both AvroReportServer and ThriftReportServer, and * start the one defined by the flag flume.report.server.rpc.type in the * configuration file. */ ThriftReportServer thriftReportServer = null; AvroReportServer avroReportServer = null; StatusHttpServer http = null; final boolean doHttp; final CommandManager cmdman; final ConfigurationManager specman; final StatusManager statman; final MasterAckManager ackman; final String uniqueMasterName; Thread reaper; // This is a static instance for commands and for the web interface to get to. static FlumeMaster instance; /** * Warning - do not use this constructor if you think it has been called * anywhere else! This is also not thread safe. * * TODO(henry): Proper singleton implementation * * TODO (jon): make doHttp a FlumeConfiguraiton option */ public FlumeMaster() { this(FlumeConfiguration.get(), true); } /** * Constructs a FlumeMaster using the default FlumeConfiguration, with the web * server off */ public FlumeMaster(FlumeConfiguration cfg) { this(cfg, false); } /** * Warning - do not use this constructor if you think it has been called * anywhere else! This is also not thread safe. * * TODO(henry): Proper singleton implementation */ public FlumeMaster(FlumeConfiguration cfg, boolean doHttp) { this.cfg = cfg; instance = this; this.uniqueMasterName = "flume-master-" + cfg.getMasterServerId(); this.doHttp = doHttp; this.cmdman = new CommandManager(); ConfigStore cfgStore = createConfigStore(FlumeConfiguration.get()); this.statman = new StatusManager(); // configuration manager translate user entered configs // TODO (jon) semantics have changed slightly -- different translations have // thier configurations partitioned now, only the user entered root // configurations are saved. ConfigurationManager base = new ConfigManager(cfgStore); ConfigurationManager flowedFailovers = new FlowConfigManager.FailoverFlowConfigManager( base, statman); this.specman = new LogicalConfigurationManager(flowedFailovers, new ConfigManager(), statman); if (FlumeConfiguration.get().getMasterIsDistributed()) { this.ackman = new GossipedMasterAckManager(FlumeConfiguration.get()); } else { this.ackman = new MasterAckManager(); } } /** * Completely generic and pluggable Flume master constructor. Used for test * cases. Webserver is by default off. */ public FlumeMaster(CommandManager cmd, ConfigurationManager cfgMan, StatusManager stat, MasterAckManager ack, FlumeConfiguration cfg) { instance = this; this.doHttp = false; this.cmdman = cmd; this.specman = cfgMan; this.statman = stat; this.ackman = ack; this.cfg = cfg; this.uniqueMasterName = "flume-master-" + cfg.getMasterServerId(); } /** * This hook makes it easy for web apps and jsps to get the current FlumeNode * instance. This is used to test the FlumeNode related jsps. */ synchronized public static FlumeMaster getInstance() { if (instance == null) { instance = new FlumeMaster(); } return instance; } /** * Helper function to parse the configuration to decide which kind of config * store to start */ public static ConfigStore createConfigStore(FlumeConfiguration cfg) { ConfigStore cfgStore; if (cfg.getMasterStore().equals(ZK_CFG_STORE)) { cfgStore = new ZooKeeperConfigStore(); } else if (cfg.getMasterStore().equals(MEMORY_CFG_STORE)) { if (cfg.getMasterIsDistributed()) { throw new IllegalStateException("Can't use non-zookeeper store with " + "distributed Master"); } cfgStore = new MemoryBackedConfigStore(); } else { throw new IllegalArgumentException("Unsupported config store: " + cfg.getMasterStore()); } return cfgStore; } /** * Returns a cmd id number that can be used to check status of the command. */ public long submit(Command cmd) { return cmdman.submit(cmd); } public void serve() throws IOException { if (cfg.getMasterStore().equals(ZK_CFG_STORE)) { try { ZooKeeperService.getAndInit(cfg); } catch (InterruptedException e) { throw new IOException("Unexpected interrupt when starting ZooKeeper", e); } } ReportManager.get().add(new FlumeVMInfo(this.uniqueMasterName + ".")); ReportManager.get().add(new SystemInfo(this.uniqueMasterName + ".")); if (doHttp) { String webPath = FlumeNode.getWebPath(cfg); this.http = new StatusHttpServer("flumeconfig", webPath, "0.0.0.0", cfg .getMasterHttpPort(), false); http.start(); } controlServer = new MasterClientServer(this, FlumeConfiguration.get()); configServer = new MasterAdminServer(this, FlumeConfiguration.get()); /* * We instantiate both kinds of report servers below, but no resources are * allocated till we call serve() on them. */ avroReportServer = new AvroReportServer(FlumeConfiguration.get() .getReportServerPort()); thriftReportServer = new ThriftReportServer(FlumeConfiguration.get() .getReportServerPort()); ReportManager.get().add(this); try { controlServer.serve(); configServer.serve(); /* * Start the Avro/Thrift ReportServer based on the flag set in the * configuration file. */ if (cfg.getReportServerRPC() == cfg.RPC_TYPE_AVRO) { avroReportServer.serve(); } else { thriftReportServer.serve(); } } catch (TTransportException e1) { throw new IOException("Error starting control or config server", e1); } cmdman.start(); ackman.start(); specman.start(); // TODO (jon) clean shutdown reaper = new Thread("Lost node reaper") { @Override public void run() { try { while (true) { Thread.sleep(FlumeConfiguration.get().getConfigHeartbeatPeriod()); statman.checkup(); } } catch (InterruptedException e) { LOG.error("Reaper thread unexpectedly interrupted:" + e.getMessage()); LOG.debug("Lost node reaper unexpectedly interrupted", e); } } }; reaper.start(); } /** * Shutdown all the various servers. */ public void shutdown() { try { if (http != null) { try { http.stop(); } catch (Exception e) { LOG.error("Error stopping FlumeMaster", e); } http = null; } cmdman.stop(); ackman.stop(); if (configServer != null) { configServer.stop(); configServer = null; } if (controlServer != null) { controlServer.stop(); controlServer = null; } /* * Close the reportserver which started. */ if (cfg.getReportServerRPC() == cfg.RPC_TYPE_AVRO) { if (avroReportServer != null) { avroReportServer.stop(); avroReportServer = null; } } else { if (thriftReportServer != null) { thriftReportServer.stop(); thriftReportServer = null; } } specman.stop(); reaper.interrupt(); FlumeConfiguration cfg = FlumeConfiguration.get(); if (cfg.getMasterStore().equals(ZK_CFG_STORE)) { ZooKeeperService.get().shutdown(); } } catch (IOException e) { LOG.error("Exception when shutting down master!", e); } catch (Exception e) { LOG.error("Exception when shutting down master!", e); } } /** * Used by internal web app for generating web page with master status * information. */ public String reportHtml() { try { ByteArrayOutputStream baos = new ByteArrayOutputStream(); OutputStreamWriter w = new OutputStreamWriter(baos); reportHtml(w); w.flush(); return baos.toString(); } catch (IOException e) { LOG.error("html report generation failed", e); } return ""; } /** * Generates html 1.0 data that displays the configuration status of the flume * system. */ public void reportHtml(Writer o) throws IOException { statman.getReport().toHtml(o); specman.getReport().toHtml(o); cmdman.getReport().toHtml(o); } /** * Return a list of the names of any nodes that have been seen. Used by the * web interface to populate choice inputs. */ public Set<String> getKnownNodes() { return statman.getNodeStatuses().keySet(); } public ConfigurationManager getSpecMan() { return specman; } public StatusManager getStatMan() { return statman; } public MasterAckManager getAckMan() { return ackman; } public CommandManager getCmdMan() { return cmdman; } @Override public String getName() { return this.uniqueMasterName; } @Override public ReportEvent getReport() { ReportEvent rpt = new ReportEvent(getName()); rpt.setStringMetric(REPORTKEY_HOSTNAME, NetUtils.localhost()); rpt.setLongMetric(REPORTKEY_NODES_REPORTING_COUNT, this.getKnownNodes() .size()); return rpt; } /** * This is the method that gets run when bin/flume master is executed. */ public static void main(String[] argv) { FlumeNode.logVersion(LOG); FlumeNode.logEnvironment(LOG); // Make sure the Java version is not older than 1.6 if (!CheckJavaVersion.isVersionOk()) { LOG .error("Exiting because of an old Java version or Java version in bad format"); System.exit(-1); } FlumeConfiguration.hardExitLoadConfig(); // if config file is bad hardexit. CommandLine cmd = null; Options options = new Options(); options.addOption("c", true, "Load config from file"); options.addOption("f", false, "Use fresh (empty) flume configs"); options.addOption("i", true, "Server id (an integer from 0 up)"); try { CommandLineParser parser = new PosixParser(); cmd = parser.parse(options, argv); } catch (ParseException e) { HelpFormatter fmt = new HelpFormatter(); fmt.printHelp("FlumeNode", options, true); System.exit(0); } String nodeconfig = FlumeConfiguration.get().getMasterSavefile(); if (cmd != null && cmd.hasOption("c")) { nodeconfig = cmd.getOptionValue("c"); } if (cmd != null && cmd.hasOption("i")) { String sid = cmd.getOptionValue("i"); LOG.info("Setting serverid from command line to be " + sid); try { int serverid = Integer.parseInt(cmd.getOptionValue("i")); FlumeConfiguration.get().setInt(FlumeConfiguration.MASTER_SERVER_ID, serverid); } catch (NumberFormatException e) { LOG.error("Couldn't parse server id as integer: " + sid); System.exit(0); } } // This will instantiate and read FlumeConfiguration - so make sure that // this is *after* we set the MASTER_SERVER_ID above. FlumeMaster config = new FlumeMaster(); LOG.info("Starting flume master on: " + NetUtils.localhost()); LOG.info(" Working Directory is: " + new File(".").getAbsolutePath()); try { boolean autoload = FlumeConfiguration.get().getMasterSavefileAutoload(); try { if (autoload && (cmd == null || (cmd != null && !cmd.hasOption("f")))) { // autoload a config? config.getSpecMan().loadConfigFile(nodeconfig); } } catch (IOException e) { LOG.warn("Could not autoload config from " + nodeconfig + " because " + e.getMessage()); } config.serve(); } catch (IOException e) { LOG.error("IO problem: " + e.getMessage()); LOG.debug("IOException", e); } } }