/* * Copyright 2014 University of Southern California * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package edu.usc.pgroup.floe.flake.statemanager; import com.codahale.metrics.Counter; import com.codahale.metrics.Histogram; import com.codahale.metrics.Meter; import com.codahale.metrics.MetricRegistry; import com.codahale.metrics.SlidingTimeWindowReservoir; import com.codahale.metrics.Snapshot; import edu.usc.pgroup.floe.config.ConfigProperties; import edu.usc.pgroup.floe.config.FloeConfig; import edu.usc.pgroup.floe.flake.FlakeComponent; import edu.usc.pgroup.floe.flake.PelletExecutor; import edu.usc.pgroup.floe.flake.QueueLenMonitor; import edu.usc.pgroup.floe.flake.messaging.MsgReceiverComponent; import edu.usc.pgroup.floe.utils.Utils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.zeromq.ZMQ; import java.util.concurrent.TimeUnit; /** * @author kumbhare */ public class StateCheckpointComponent extends FlakeComponent { /** * the global logger instance. */ private static final Logger LOGGER = LoggerFactory.getLogger(StateCheckpointComponent.class); /** * CHeckpointing period. */ private final int checkpointPeriod; /** * State manager instance. */ private final StateManagerComponent stateManager; /** * Port to bind the socket to send periodic state checkpoints. */ private int port; /** * Constructor. * @param metricRegistry Metrics registry used to log various metrics. * @param flakeId Flake's id to which this component belongs. * @param componentName Unique name of the component. * @param ctx Shared zmq context. * @param stateMgr State manager component. * @param stateChkptPort port to use for connections to checkpoint state. */ public StateCheckpointComponent(final MetricRegistry metricRegistry, final String flakeId, final String componentName, final ZMQ.Context ctx, final StateManagerComponent stateMgr, final int stateChkptPort) { super(metricRegistry, flakeId, componentName, ctx); this.stateManager = stateMgr; this.port = stateChkptPort; this.checkpointPeriod = FloeConfig.getConfig().getInt(ConfigProperties .FLAKE_STATE_CHECKPOINT_PERIOD) * Utils.Constants.MILLI; } /** * Starts all the sub parts of the given component and notifies when * components starts completely. This will be in a different thread, * so no need to worry.. block as much as you want. * * @param terminateSignalReceiver terminate signal receiver. */ @Override protected final void runComponent( final ZMQ.Socket terminateSignalReceiver) { notifyStarted(true); ZMQ.Poller pollerItems = new ZMQ.Poller(1); pollerItems.register(terminateSignalReceiver, ZMQ.Poller.POLLIN); /** * ZMQ socket connection publish the state to the backups. */ ZMQ.Socket stateSoc = getContext().socket(ZMQ.PUSH); String ssConnetStr = Utils.Constants.FLAKE_STATE_PUB_SOCK + port; LOGGER.info("binding STATE CHECKPOINTER to socket at: {}", ssConnetStr); TimeUnit durationUnit = TimeUnit.MILLISECONDS; double durationFactor = 1.0 / durationUnit.toNanos(1); stateSoc.bind(ssConnetStr); /*Meter qhist = getMetricRegistry().meter( MetricRegistry.name(QueueLenMonitor.class, "q.len.histo"));*/ LOGGER.error("Hists: {}", getMetricRegistry().getHistograms()); /*Histogram qhist = getMetricRegistry() .getHistograms() .get(MetricRegistry.name( QueueLenMonitor.class, "q.len.histo"));*/ Counter queLen = getMetricRegistry().counter( MetricRegistry.name(MsgReceiverComponent.class, "queue.len")); final int windowlen = 30; //seconds. Histogram qhist = getMetricRegistry() .register( MetricRegistry.name(QueueLenMonitor.class, "q.len.histo"), new Histogram(new SlidingTimeWindowReservoir(windowlen, TimeUnit.SECONDS))); QueueLenMonitor monitor = new QueueLenMonitor(getMetricRegistry(), queLen, qhist); monitor.start(); Meter msgProcessedMeter = getMetricRegistry().meter( MetricRegistry.name(PelletExecutor.class, "processed")); notifyStarted(true); Boolean done = false; long starttime = System.currentTimeMillis(); final int qLenThreshold = 10; while (!done && !Thread.currentThread().isInterrupted()) { int polled = pollerItems.poll(checkpointPeriod); if (pollerItems.pollin(0)) { //terminate. LOGGER.warn("Terminating state checkpointing"); terminateSignalReceiver.recv(); done = true; } Snapshot snp = qhist.getSnapshot(); //snp.dump(System.out); LOGGER.info("fid:{}; q 95->{}; 75->{}; 99->{}; msgs procd: {}", getFid(), snp.get95thPercentile(), //* durationFactor, snp.get75thPercentile(), //* durationFactor, snp.get99thPercentile(), //* durationFactor, msgProcessedMeter.getOneMinuteRate()); //double last1min = qhist.; /*LOGGER.error("fid:{}; q 1min->{}; msgs procd: {}", getFid(), last1min, msgProcessedMeter.getOneMinuteRate());*/ Boolean reqLB = false; //double a80 = (snp.get95thPercentile() * durationFactor + //snp.get75thPercentile() * durationFactor) / 2.0; double a80 = snp.get95thPercentile(); if (stableenough(starttime)) { if (a80 > qLenThreshold) { LOGGER.error("Initiating loadbalancing."); reqLB = true; } starttime = System.currentTimeMillis(); } LOGGER.info("Checkpointing State"); byte[] checkpointdata = stateManager.checkpointState(); stateSoc.sendMore(getFid()); stateSoc.sendMore(done.toString()); stateSoc.sendMore(reqLB.toString()); stateSoc.send(checkpointdata, 0); } stateSoc.close(); monitor.interrupt(); notifyStopped(true); } /** * Checks if enough time has passed (say a min). * @param starttime start time since when the time must be checked. * @return true if the time passed is more than a min. */ private boolean stableenough(final long starttime) { long now = System.currentTimeMillis(); final int secs = 30; final int th = 1000; if ((now - starttime) / th >= secs) { return true; } return false; } }