/* * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. The ASF licenses this file * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.cassandra.service; import java.io.File; import java.io.IOException; import java.lang.management.ManagementFactory; import java.lang.management.MemoryPoolMXBean; import java.net.InetAddress; import java.net.UnknownHostException; import java.rmi.registry.LocateRegistry; import java.rmi.server.RMIServerSocketFactory; import java.util.*; import java.util.concurrent.TimeUnit; import javax.management.MBeanServer; import javax.management.ObjectName; import javax.management.StandardMBean; import javax.management.remote.JMXConnectorServer; import javax.management.remote.JMXServiceURL; import javax.management.remote.rmi.RMIConnectorServer; import com.google.common.collect.Iterables; import com.google.common.util.concurrent.Uninterruptibles; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.addthis.metrics.reporter.config.ReporterConfig; import org.apache.cassandra.concurrent.JMXEnabledThreadPoolExecutor; import org.apache.cassandra.concurrent.ScheduledExecutors; import org.apache.cassandra.concurrent.Stage; import org.apache.cassandra.concurrent.StageManager; import org.apache.cassandra.config.CFMetaData; import org.apache.cassandra.config.DatabaseDescriptor; import org.apache.cassandra.config.Schema; import org.apache.cassandra.db.*; import org.apache.cassandra.db.commitlog.CommitLog; import org.apache.cassandra.db.compaction.CompactionManager; import org.apache.cassandra.exceptions.ConfigurationException; import org.apache.cassandra.io.FSError; import org.apache.cassandra.io.sstable.CorruptSSTableException; import org.apache.cassandra.io.util.FileUtils; import org.apache.cassandra.metrics.StorageMetrics; import org.apache.cassandra.thrift.ThriftServer; import org.apache.cassandra.tracing.Tracing; import org.apache.cassandra.utils.*; /** * The <code>CassandraDaemon</code> is an abstraction for a Cassandra daemon * service, which defines not only a way to activate and deactivate it, but also * hooks into its lifecycle methods (see {@link #setup()}, {@link #start()}, * {@link #stop()} and {@link #setup()}). */ public class CassandraDaemon { public static final String MBEAN_NAME = "org.apache.cassandra.db:type=NativeAccess"; public static JMXConnectorServer jmxServer = null; private static final Logger logger = LoggerFactory.getLogger(CassandraDaemon.class); private static void maybeInitJmx() { String jmxPort = System.getProperty("com.sun.management.jmxremote.port"); if (jmxPort == null) { logger.warn("JMX is not enabled to receive remote connections. Please see cassandra-env.sh for more info."); jmxPort = System.getProperty("cassandra.jmx.local.port"); if (jmxPort == null) { logger.error("cassandra.jmx.local.port missing from cassandra-env.sh, unable to start local JMX service." + jmxPort); } else { System.setProperty("java.rmi.server.hostname", InetAddress.getLoopbackAddress().getHostAddress()); try { RMIServerSocketFactory serverFactory = new RMIServerSocketFactoryImpl(); LocateRegistry.createRegistry(Integer.valueOf(jmxPort), null, serverFactory); StringBuffer url = new StringBuffer(); url.append("service:jmx:"); url.append("rmi://localhost/jndi/"); url.append("rmi://localhost:").append(jmxPort).append("/jmxrmi"); Map env = new HashMap(); env.put(RMIConnectorServer.RMI_SERVER_SOCKET_FACTORY_ATTRIBUTE, serverFactory); jmxServer = new RMIConnectorServer( new JMXServiceURL(url.toString()), env, ManagementFactory.getPlatformMBeanServer() ); jmxServer.start(); } catch (IOException e) { logger.error("Error starting local jmx server: ", e); } } } else { logger.info("JMX is enabled to receive remote connections on port: " + jmxPort); } } private static final CassandraDaemon instance = new CassandraDaemon(); /** * The earliest legit timestamp a casandra instance could have ever launched. * Date roughly taken from http://perspectives.mvdirona.com/2008/07/12/FacebookReleasesCassandraAsOpenSource.aspx * We use this to ensure the system clock is at least somewhat correct at startup. */ private static final long EARLIEST_LAUNCH_DATE = 1215820800000L; public Server thriftServer; public Server nativeServer; /** * This is a hook for concrete daemons to initialize themselves suitably. * * Subclasses should override this to finish the job (listening on ports, etc.) * * @throws IOException */ protected void setup() { try { logger.info("Hostname: {}", InetAddress.getLocalHost().getHostName()); } catch (UnknownHostException e1) { logger.info("Could not resolve local host"); } long now = System.currentTimeMillis(); if (now < EARLIEST_LAUNCH_DATE) { logger.error("current machine time is {}, but that is seemingly incorrect. exiting now.", new Date(now)); System.exit(3); } // log warnings for different kinds of sub-optimal JVMs. tldr use 64-bit Oracle >= 1.6u32 if (!DatabaseDescriptor.hasLargeAddressSpace()) logger.info("32bit JVM detected. It is recommended to run Cassandra on a 64bit JVM for better performance."); String javaVersion = System.getProperty("java.version"); String javaVmName = System.getProperty("java.vm.name"); logger.info("JVM vendor/version: {}/{}", javaVmName, javaVersion); if (javaVmName.contains("OpenJDK")) { // There is essentially no QA done on OpenJDK builds, and // clusters running OpenJDK have seen many heap and load issues. logger.warn("OpenJDK is not recommended. Please upgrade to the newest Oracle Java release"); } else if (!javaVmName.contains("HotSpot")) { logger.warn("Non-Oracle JVM detected. Some features, such as immediate unmap of compacted SSTables, may not work as intended"); } /* else { String[] java_version = javaVersion.split("_"); String java_major = java_version[0]; int java_minor; try { java_minor = (java_version.length > 1) ? Integer.parseInt(java_version[1]) : 0; } catch (NumberFormatException e) { // have only seen this with java7 so far but no doubt there are other ways to break this logger.info("Unable to parse java version {}", Arrays.toString(java_version)); java_minor = 32; } } */ logger.info("Heap size: {}/{}", Runtime.getRuntime().totalMemory(), Runtime.getRuntime().maxMemory()); for(MemoryPoolMXBean pool: ManagementFactory.getMemoryPoolMXBeans()) logger.info("{} {}: {}", pool.getName(), pool.getType(), pool.getPeakUsage()); logger.info("Classpath: {}", System.getProperty("java.class.path")); // Fail-fast if JNA is not available or failing to initialize properly // except with -Dcassandra.boot_without_jna=true. See CASSANDRA-6575. if (!CLibrary.jnaAvailable()) { boolean jnaRequired = !Boolean.getBoolean("cassandra.boot_without_jna"); if (jnaRequired) { logger.error("JNA failing to initialize properly. Use -Dcassandra.boot_without_jna=true to bootstrap even so."); System.exit(3); } } CLibrary.tryMlockall(); maybeInitJmx(); Thread.setDefaultUncaughtExceptionHandler(new Thread.UncaughtExceptionHandler() { public void uncaughtException(Thread t, Throwable e) { StorageMetrics.exceptions.inc(); logger.error("Exception in thread {}", t, e); Tracing.trace("Exception in thread {}", t, e); for (Throwable e2 = e; e2 != null; e2 = e2.getCause()) { JVMStabilityInspector.inspectThrowable(e2); if (e2 instanceof FSError) { if (e2 != e) // make sure FSError gets logged exactly once. logger.error("Exception in thread {}", t, e2); FileUtils.handleFSError((FSError) e2); } if (e2 instanceof CorruptSSTableException) { if (e2 != e) logger.error("Exception in thread " + t, e2); FileUtils.handleCorruptSSTable((CorruptSSTableException) e2); } } } }); // check all directories(data, commitlog, saved cache) for existence and permission Iterable<String> dirs = Iterables.concat(Arrays.asList(DatabaseDescriptor.getAllDataFileLocations()), Arrays.asList(DatabaseDescriptor.getCommitLogLocation(), DatabaseDescriptor.getSavedCachesLocation())); for (String dataDir : dirs) { logger.debug("Checking directory {}", dataDir); File dir = new File(dataDir); // check that directories exist. if (!dir.exists()) { logger.error("Directory {} doesn't exist", dataDir); // if they don't, failing their creation, stop cassandra. if (!dir.mkdirs()) { logger.error("Has no permission to create {} directory", dataDir); System.exit(3); } } // if directories exist verify their permissions if (!Directories.verifyFullPermissions(dir, dataDir)) { // if permissions aren't sufficient, stop cassandra. System.exit(3); } } if (CacheService.instance == null) // should never happen throw new RuntimeException("Failed to initialize Cache Service."); // check the system keyspace to keep user from shooting self in foot by changing partitioner, cluster name, etc. // we do a one-off scrub of the system keyspace first; we can't load the list of the rest of the keyspaces, // until system keyspace is opened. for (CFMetaData cfm : Schema.instance.getKeyspaceMetaData(Keyspace.SYSTEM_KS).values()) ColumnFamilyStore.scrubDataDirectories(cfm); try { SystemKeyspace.checkHealth(); } catch (ConfigurationException e) { logger.error("Fatal exception during initialization", e); System.exit(100); } // load keyspace descriptions. DatabaseDescriptor.loadSchemas(); // clean up compaction leftovers Map<Pair<String, String>, Map<Integer, UUID>> unfinishedCompactions = SystemKeyspace.getUnfinishedCompactions(); for (Pair<String, String> kscf : unfinishedCompactions.keySet()) { CFMetaData cfm = Schema.instance.getCFMetaData(kscf.left, kscf.right); // CFMetaData can be null if CF is already dropped if (cfm != null) ColumnFamilyStore.removeUnfinishedCompactionLeftovers(cfm, unfinishedCompactions.get(kscf)); } SystemKeyspace.discardCompactionsInProgress(); // clean up debris in the rest of the keyspaces for (String keyspaceName : Schema.instance.getKeyspaces()) { // Skip system as we've already cleaned it if (keyspaceName.equals(Keyspace.SYSTEM_KS)) continue; for (CFMetaData cfm : Schema.instance.getKeyspaceMetaData(keyspaceName).values()) ColumnFamilyStore.scrubDataDirectories(cfm); } Keyspace.setInitialized(); // initialize keyspaces for (String keyspaceName : Schema.instance.getKeyspaces()) { if (logger.isDebugEnabled()) logger.debug("opening keyspace {}", keyspaceName); // disable auto compaction until commit log replay ends for (ColumnFamilyStore cfs : Keyspace.open(keyspaceName).getColumnFamilyStores()) { for (ColumnFamilyStore store : cfs.concatWithIndexes()) { store.disableAutoCompaction(); } } } if (CacheService.instance.keyCache.size() > 0) logger.info("completed pre-loading ({} keys) key cache.", CacheService.instance.keyCache.size()); if (CacheService.instance.rowCache.size() > 0) logger.info("completed pre-loading ({} keys) row cache.", CacheService.instance.rowCache.size()); try { GCInspector.register(); } catch (Throwable t) { JVMStabilityInspector.inspectThrowable(t); logger.warn("Unable to start GCInspector (currently only supported on the Sun JVM)"); } // replay the log if necessary try { CommitLog.instance.recover(); } catch (IOException e) { throw new RuntimeException(e); } // enable auto compaction for (Keyspace keyspace : Keyspace.all()) { for (ColumnFamilyStore cfs : keyspace.getColumnFamilyStores()) { for (final ColumnFamilyStore store : cfs.concatWithIndexes()) { if (store.getCompactionStrategy().shouldBeEnabled()) store.enableAutoCompaction(); } } } // start compactions in five minutes (if no flushes have occurred by then to do so) Runnable runnable = new Runnable() { public void run() { for (Keyspace keyspaceName : Keyspace.all()) { for (ColumnFamilyStore cf : keyspaceName.getColumnFamilyStores()) { for (ColumnFamilyStore store : cf.concatWithIndexes()) CompactionManager.instance.submitBackground(store); } } } }; ScheduledExecutors.optionalTasks.schedule(runnable, 5, TimeUnit.MINUTES); SystemKeyspace.finishStartup(); // start server internals StorageService.instance.registerDaemon(this); try { StorageService.instance.initServer(); } catch (ConfigurationException e) { logger.error("Fatal configuration error", e); System.err.println(e.getMessage() + "\nFatal configuration error; unable to start server. See log for stacktrace."); System.exit(1); } Mx4jTool.maybeLoad(); // Metrics String metricsReporterConfigFile = System.getProperty("cassandra.metricsReporterConfigFile"); if (metricsReporterConfigFile != null) { logger.info("Trying to load metrics-reporter-config from file: {}", metricsReporterConfigFile); try { String reportFileLocation = CassandraDaemon.class.getClassLoader().getResource(metricsReporterConfigFile).getFile(); ReporterConfig.loadFromFile(reportFileLocation).enableAll(); } catch (Exception e) { logger.warn("Failed to load metrics-reporter-config, metric sinks will not be activated", e); } } if (!FBUtilities.getBroadcastAddress().equals(InetAddress.getLoopbackAddress())) waitForGossipToSettle(); // schedule periodic dumps of table size estimates into SystemKeyspace.SIZE_ESTIMATES_CF // set cassandra.size_recorder_interval to 0 to disable int sizeRecorderInterval = Integer.getInteger("cassandra.size_recorder_interval", 5 * 60); if (sizeRecorderInterval > 0) ScheduledExecutors.optionalTasks.scheduleWithFixedDelay(SizeEstimatesRecorder.instance, 30, sizeRecorderInterval, TimeUnit.SECONDS); // Thrift InetAddress rpcAddr = DatabaseDescriptor.getRpcAddress(); int rpcPort = DatabaseDescriptor.getRpcPort(); int listenBacklog = DatabaseDescriptor.getRpcListenBacklog(); thriftServer = new ThriftServer(rpcAddr, rpcPort, listenBacklog); // Native transport InetAddress nativeAddr = DatabaseDescriptor.getRpcAddress(); int nativePort = DatabaseDescriptor.getNativeTransportPort(); nativeServer = new org.apache.cassandra.transport.Server(nativeAddr, nativePort); } /** * Initialize the Cassandra Daemon based on the given <a * href="http://commons.apache.org/daemon/jsvc.html">Commons * Daemon</a>-specific arguments. To clarify, this is a hook for JSVC. * * @param arguments * the arguments passed in from JSVC * @throws IOException */ public void init(String[] arguments) throws IOException { setup(); } /** * Start the Cassandra Daemon, assuming that it has already been * initialized via {@link #init(String[])} * * Hook for JSVC */ public void start() { String nativeFlag = System.getProperty("cassandra.start_native_transport"); if ((nativeFlag != null && Boolean.parseBoolean(nativeFlag)) || (nativeFlag == null && DatabaseDescriptor.startNativeTransport())) nativeServer.start(); else logger.info("Not starting native transport as requested. Use JMX (StorageService->startNativeTransport()) or nodetool (enablebinary) to start it"); String rpcFlag = System.getProperty("cassandra.start_rpc"); if ((rpcFlag != null && Boolean.parseBoolean(rpcFlag)) || (rpcFlag == null && DatabaseDescriptor.startRpc())) thriftServer.start(); else logger.info("Not starting RPC server as requested. Use JMX (StorageService->startRPCServer()) or nodetool (enablethrift) to start it"); } /** * Stop the daemon, ideally in an idempotent manner. * * Hook for JSVC / Procrun */ public void stop() { // On linux, this doesn't entirely shut down Cassandra, just the RPC server. // jsvc takes care of taking the rest down logger.info("Cassandra shutting down..."); thriftServer.stop(); nativeServer.stop(); // On windows, we need to stop the entire system as prunsrv doesn't have the jsvc hooks // We rely on the shutdown hook to drain the node if (FBUtilities.isWindows()) System.exit(0); if (jmxServer != null) { try { jmxServer.stop(); } catch (IOException e) { logger.error("Error shutting down local JMX server: ", e); } } } /** * Clean up all resources obtained during the lifetime of the daemon. This * is a hook for JSVC. */ public void destroy() {} /** * A convenience method to initialize and start the daemon in one shot. */ public void activate() { String pidFile = System.getProperty("cassandra-pidfile"); try { try { MBeanServer mbs = ManagementFactory.getPlatformMBeanServer(); mbs.registerMBean(new StandardMBean(new NativeAccess(), NativeAccessMBean.class), new ObjectName(MBEAN_NAME)); } catch (Exception e) { logger.error("error registering MBean {}", MBEAN_NAME, e); //Allow the server to start even if the bean can't be registered } setup(); if (pidFile != null) { new File(pidFile).deleteOnExit(); } if (System.getProperty("cassandra-foreground") == null) { System.out.close(); System.err.close(); } start(); } catch (Throwable e) { logger.error("Exception encountered during startup", e); // try to warn user on stdout too, if we haven't already detached e.printStackTrace(); System.out.println("Exception encountered during startup: " + e.getMessage()); System.exit(3); } } /** * A convenience method to stop and destroy the daemon in one shot. */ public void deactivate() { stop(); destroy(); } private void waitForGossipToSettle() { int forceAfter = Integer.getInteger("cassandra.skip_wait_for_gossip_to_settle", -1); if (forceAfter == 0) { return; } final int GOSSIP_SETTLE_MIN_WAIT_MS = 5000; final int GOSSIP_SETTLE_POLL_INTERVAL_MS = 1000; final int GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED = 3; logger.info("Waiting for gossip to settle before accepting client requests..."); Uninterruptibles.sleepUninterruptibly(GOSSIP_SETTLE_MIN_WAIT_MS, TimeUnit.MILLISECONDS); int totalPolls = 0; int numOkay = 0; JMXEnabledThreadPoolExecutor gossipStage = (JMXEnabledThreadPoolExecutor)StageManager.getStage(Stage.GOSSIP); while (numOkay < GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED) { Uninterruptibles.sleepUninterruptibly(GOSSIP_SETTLE_POLL_INTERVAL_MS, TimeUnit.MILLISECONDS); long completed = gossipStage.getCompletedTasks(); long active = gossipStage.getActiveCount(); long pending = gossipStage.getPendingTasks(); totalPolls++; if (active == 0 && pending == 0) { logger.debug("Gossip looks settled. CompletedTasks: {}", completed); numOkay++; } else { logger.info("Gossip not settled after {} polls. Gossip Stage active/pending/completed: {}/{}/{}", totalPolls, active, pending, completed); numOkay = 0; } if (forceAfter > 0 && totalPolls > forceAfter) { logger.warn("Gossip not settled but startup forced by cassandra.skip_wait_for_gossip_to_settle. Gossip Stage total/active/pending/completed: {}/{}/{}/{}", totalPolls, active, pending, completed); break; } } if (totalPolls > GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED) logger.info("Gossip settled after {} extra polls; proceeding", totalPolls - GOSSIP_SETTLE_POLL_SUCCESSES_REQUIRED); else logger.info("No gossip backlog; proceeding"); } public static void stop(String[] args) { instance.deactivate(); } public static void main(String[] args) { instance.activate(); } static class NativeAccess implements NativeAccessMBean { public boolean isAvailable() { return CLibrary.jnaAvailable(); } public boolean isMemoryLockable() { return CLibrary.jnaMemoryLockable(); } } public interface Server { /** * Start the server. * This method shoud be able to restart a server stopped through stop(). * Should throw a RuntimeException if the server cannot be started */ public void start(); /** * Stop the server. * This method should be able to stop server started through start(). * Should throw a RuntimeException if the server cannot be stopped */ public void stop(); /** * Returns whether the server is currently running. */ public boolean isRunning(); } }