/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * http://www.apache.org/licenses/LICENSE-2.0 * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package tachyon.worker; import java.io.IOException; import java.net.InetSocketAddress; import java.net.URI; import java.net.URISyntaxException; import java.net.UnknownHostException; import org.apache.log4j.Logger; import org.apache.thrift.TException; import org.apache.thrift.server.TServer; import org.apache.thrift.server.TThreadedSelectorServer; import org.apache.thrift.transport.TNonblockingServerSocket; import org.apache.thrift.transport.TTransportException; import tachyon.Constants; import tachyon.Version; import tachyon.conf.WorkerConf; import tachyon.thrift.BlockInfoException; import tachyon.thrift.Command; import tachyon.thrift.WorkerService; import tachyon.util.CommonUtils; import tachyon.util.NetworkUtils; /** * Entry point for a worker daemon. */ public class TachyonWorker implements Runnable { private static final Logger LOG = Logger.getLogger(Constants.LOGGER_TYPE); /** * Create a new TachyonWorker * * @param masterAddress * The TachyonMaster's address * @param workerAddress * This TachyonWorker's address * @param dataPort * This TachyonWorker's data server's port * @param selectorThreads * The number of selector threads of the worker's thrift server * @param acceptQueueSizePerThreads * The accept queue size per thread of the worker's thrift server * @param workerThreads * The number of threads of the worker's thrift server * @param localFolder * This TachyonWorker's local folder's path * @param spaceLimitBytes * The maximum memory space this TachyonWorker can use, in bytes * @return The new TachyonWorker */ public static synchronized TachyonWorker createWorker(InetSocketAddress masterAddress, InetSocketAddress workerAddress, int dataPort, int selectorThreads, int acceptQueueSizePerThreads, int workerThreads, String localFolder, long spaceLimitBytes) { return new TachyonWorker(masterAddress, workerAddress, dataPort, selectorThreads, acceptQueueSizePerThreads, workerThreads, localFolder, spaceLimitBytes); } /** * Create a new TachyonWorker * * @param masterAddress * The TachyonMaster's address. e.g., localhost:19998 * @param workerAddress * This TachyonWorker's address. e.g., localhost:29998 * @param dataPort * This TachyonWorker's data server's port * @param selectorThreads * The number of selector threads of the worker's thrift server * @param acceptQueueSizePerThreads * The accept queue size per thread of the worker's thrift server * @param workerThreads * The number of threads of the worker's thrift server * @param localFolder * This TachyonWorker's local folder's path * @param spaceLimitBytes * The maximum memory space this TachyonWorker can use, in bytes * @return The new TachyonWorker */ public static synchronized TachyonWorker createWorker(String masterAddress, String workerAddress, int dataPort, int selectorThreads, int acceptQueueSizePerThreads, int workerThreads, String localFolder, long spaceLimitBytes) { String[] address = masterAddress.split(":"); InetSocketAddress master = new InetSocketAddress(address[0], Integer.parseInt(address[1])); address = workerAddress.split(":"); InetSocketAddress worker = new InetSocketAddress(address[0], Integer.parseInt(address[1])); return new TachyonWorker(master, worker, dataPort, selectorThreads, acceptQueueSizePerThreads, workerThreads, localFolder, spaceLimitBytes); } private static String getMasterLocation(String[] args) { WorkerConf wConf = WorkerConf.get(); String confFileMasterLoc = wConf.MASTER_HOSTNAME + ":" + wConf.MASTER_PORT; String masterLocation; if (args.length < 2) { masterLocation = confFileMasterLoc; } else { masterLocation = args[1]; if (masterLocation.indexOf(":") == -1) { masterLocation += ":" + wConf.MASTER_PORT; } if (!masterLocation.equals(confFileMasterLoc)) { LOG.warn("Master Address in configuration file(" + confFileMasterLoc + ") is different " + "from the command line one(" + masterLocation + ")."); } } return masterLocation; } public static void main(String[] args) throws UnknownHostException { if (args.length < 1 || args.length > 2) { LOG.info("Usage: java -cp target/tachyon-" + Version.VERSION + "-jar-with-dependencies.jar " + "tachyon.Worker <WorkerHost> [<MasterHost:Port>]"); System.exit(-1); } WorkerConf wConf = WorkerConf.get(); String resolvedWorkerHost; try { resolvedWorkerHost = NetworkUtils.resolveHostName(args[0]); } catch (UnknownHostException e) { resolvedWorkerHost = args[0]; } TachyonWorker worker = TachyonWorker.createWorker(getMasterLocation(args), resolvedWorkerHost + ":" + wConf.PORT, wConf.DATA_PORT, wConf.SELECTOR_THREADS, wConf.QUEUE_SIZE_PER_SELECTOR, wConf.SERVER_THREADS, wConf.DATA_FOLDER, wConf.MEMORY_SIZE); try { worker.start(); } catch (Exception e) { LOG.error("Uncaught exception terminating worker", e); throw new RuntimeException(e); } } private final InetSocketAddress MasterAddress; private final InetSocketAddress WorkerAddress; private TServer mServer; private TNonblockingServerSocket mServerTNonblockingServerSocket; private WorkerStorage mWorkerStorage; private WorkerServiceHandler mWorkerServiceHandler; private DataServer mDataServer; private Thread mDataServerThread; private Thread mHeartbeatThread; private volatile boolean mStop = false; /** * @param masterAddress * The TachyonMaster's address. * @param workerAddress * This TachyonWorker's address. * @param dataPort * This TachyonWorker's data server's port * @param selectorThreads * The number of selector threads of the worker's thrift server * @param acceptQueueSizePerThreads * The accept queue size per thread of the worker's thrift server * @param workerThreads * The number of threads of the worker's thrift server * @param dataFolder * This TachyonWorker's local folder's path * @param memoryCapacityBytes * The maximum memory space this TachyonWorker can use, in bytes */ private TachyonWorker(InetSocketAddress masterAddress, InetSocketAddress workerAddress, int dataPort, int selectorThreads, int acceptQueueSizePerThreads, int workerThreads, String dataFolder, long memoryCapacityBytes) { MasterAddress = masterAddress; WorkerAddress = workerAddress; mWorkerStorage = new WorkerStorage(MasterAddress, WorkerAddress, dataFolder, memoryCapacityBytes); LOG.info("before WorkerServiceHandler"); mWorkerServiceHandler = new WorkerServiceHandler(mWorkerStorage); LOG.info("after WorkerServiceHandler"); try { mDataServer = new DataServer(new URI("rdma://" + workerAddress.getHostName() + ":" + dataPort), mWorkerStorage); } catch (URISyntaxException e) { LOG.info("can not init DataServer"); CommonUtils.runtimeException(e); } mDataServerThread = new Thread(mDataServer); mHeartbeatThread = new Thread(this); try { LOG.info("The worker server tries to start @ " + workerAddress); WorkerService.Processor<WorkerServiceHandler> processor = new WorkerService.Processor<WorkerServiceHandler>(mWorkerServiceHandler); mServerTNonblockingServerSocket = new TNonblockingServerSocket(workerAddress); mServer = new TThreadedSelectorServer(new TThreadedSelectorServer.Args( mServerTNonblockingServerSocket).processor(processor) .selectorThreads(selectorThreads) .acceptQueueSizePerThread(acceptQueueSizePerThreads).workerThreads(workerThreads)); } catch (TTransportException e) { LOG.error(e.getMessage(), e); CommonUtils.runtimeException(e); } } /** * Get the worker server handler class. This is for unit test only. * * @return the WorkerServiceHandler */ WorkerServiceHandler getWorkerServiceHandler() { return mWorkerServiceHandler; } @Override public void run() { long lastHeartbeatMs = System.currentTimeMillis(); Command cmd = null; while (!mStop) { long diff = System.currentTimeMillis() - lastHeartbeatMs; if (diff < WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS) { LOG.debug("Heartbeat process takes " + diff + " ms."); CommonUtils.sleepMs(LOG, WorkerConf.get().TO_MASTER_HEARTBEAT_INTERVAL_MS - diff); } else { LOG.error("Heartbeat process takes " + diff + " ms."); } try { cmd = mWorkerStorage.heartbeat(); lastHeartbeatMs = System.currentTimeMillis(); } catch (BlockInfoException e) { LOG.error(e.getMessage(), e); } catch (TException e) { LOG.error(e.getMessage(), e); try { mWorkerStorage.resetMasterClient(); } catch (TException e2) { LOG.error("Received exception while attempting to reset client", e2); } CommonUtils.sleepMs(LOG, Constants.SECOND_MS); cmd = null; if (System.currentTimeMillis() - lastHeartbeatMs >= WorkerConf.get().HEARTBEAT_TIMEOUT_MS) { CommonUtils.runtimeException("Timebeat timeout " + (System.currentTimeMillis() - lastHeartbeatMs) + "ms"); } } if (cmd != null) { switch (cmd.mCommandType) { case Unknown: LOG.error("Unknown command: " + cmd); break; case Nothing: LOG.debug("Nothing command: " + cmd); break; case Register: LOG.info("Register command: " + cmd); mWorkerStorage.register(); break; case Free: mWorkerStorage.freeBlocks(cmd.mData); LOG.info("Free command: " + cmd); break; case Delete: LOG.info("Delete command: " + cmd); break; default: CommonUtils.runtimeException("Un-recognized command from master " + cmd.toString()); } } mWorkerStorage.checkStatus(); } } /** * Start the data server thread and heartbeat thread of this TachyonWorker. */ public void start() { mDataServerThread.start(); mHeartbeatThread.start(); LOG.info("The worker server started @ " + WorkerAddress); mServer.serve(); LOG.info("The worker server ends @ " + WorkerAddress); } /** * Stop this TachyonWorker. Stop all the threads belong to this TachyonWorker. * * @throws IOException * @throws InterruptedException */ public void stop() throws IOException, InterruptedException { mStop = true; mWorkerStorage.stop(); mDataServer.close(); mServer.stop(); mServerTNonblockingServerSocket.close(); while (!mDataServer.isClosed() || mServer.isServing() || mHeartbeatThread.isAlive()) { // TODO The reason to stop and close again is due to some issues in Thrift. mServer.stop(); mServerTNonblockingServerSocket.close(); CommonUtils.sleepMs(null, 100); } mHeartbeatThread.join(); } }