/* * Copyright (c) 2008-2017, Hazelcast, Inc. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.hazelcast.spi.impl.operationservice.impl; import com.hazelcast.core.Member; import com.hazelcast.core.MemberLeftException; import com.hazelcast.instance.MemberImpl; import com.hazelcast.internal.cluster.ClusterService; import com.hazelcast.internal.metrics.MetricsProvider; import com.hazelcast.internal.metrics.MetricsRegistry; import com.hazelcast.internal.metrics.Probe; import com.hazelcast.internal.serialization.InternalSerializationService; import com.hazelcast.internal.util.counters.SwCounter; import com.hazelcast.logging.ILogger; import com.hazelcast.nio.Address; import com.hazelcast.nio.Packet; import com.hazelcast.spi.CallsPerMember; import com.hazelcast.spi.CanCancelOperations; import com.hazelcast.spi.LiveOperationsTracker; import com.hazelcast.spi.OperationControl; import com.hazelcast.spi.impl.NodeEngineImpl; import com.hazelcast.spi.impl.PacketHandler; import com.hazelcast.spi.impl.operationexecutor.OperationHostileThread; import com.hazelcast.spi.impl.servicemanager.ServiceManager; import com.hazelcast.spi.properties.HazelcastProperties; import com.hazelcast.util.Clock; import java.util.Map.Entry; import java.util.Set; import java.util.concurrent.ConcurrentHashMap; import java.util.concurrent.ConcurrentMap; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.ScheduledThreadPoolExecutor; import java.util.concurrent.ThreadFactory; import java.util.concurrent.atomic.AtomicLong; import java.util.logging.Level; import static com.hazelcast.instance.OutOfMemoryErrorDispatcher.inspectOutOfMemoryError; import static com.hazelcast.internal.metrics.ProbeLevel.MANDATORY; import static com.hazelcast.internal.util.counters.SwCounter.newSwCounter; import static com.hazelcast.nio.Packet.FLAG_OP_CONTROL; import static com.hazelcast.nio.Packet.FLAG_URGENT; import static com.hazelcast.spi.properties.GroupProperty.OPERATION_BACKUP_TIMEOUT_MILLIS; import static com.hazelcast.spi.properties.GroupProperty.OPERATION_CALL_TIMEOUT_MILLIS; import static com.hazelcast.util.ThreadUtil.createThreadName; import static java.util.concurrent.TimeUnit.MILLISECONDS; import static java.util.concurrent.TimeUnit.SECONDS; import static java.util.logging.Level.FINE; import static java.util.logging.Level.INFO; /** * The InvocationMonitor monitors all pending invocations and determines if there are any problems like timeouts. It uses the * {@link InvocationRegistry} to access the pending invocations. * * The {@link InvocationMonitor} sends Operation heartbeats to the other member informing them about if the operation is still * alive. Also if no operations are running, it will still send a period packet to each member. This is a different system than * the regular heartbeats, but it has similar characteristics. The reason the packet is always send is for debugging purposes. */ class InvocationMonitor implements PacketHandler, MetricsProvider { private static final long ON_MEMBER_LEFT_DELAY_MILLIS = 1111; private static final int HEARTBEAT_CALL_TIMEOUT_RATIO = 4; private static final long MAX_DELAY_MILLIS = SECONDS.toMillis(10); private final NodeEngineImpl nodeEngine; private final InternalSerializationService serializationService; private final ServiceManager serviceManager; private final InvocationRegistry invocationRegistry; private final ILogger logger; private final ScheduledExecutorService scheduler; private final Address thisAddress; private final ConcurrentMap<Address, AtomicLong> lastHeartbeatPerMember = new ConcurrentHashMap<Address, AtomicLong>(); @Probe(name = "backupTimeouts", level = MANDATORY) private final SwCounter backupTimeoutsCount = newSwCounter(); @Probe(name = "normalTimeouts", level = MANDATORY) private final SwCounter normalTimeoutsCount = newSwCounter(); @Probe private final SwCounter heartbeatPacketsReceived = newSwCounter(); @Probe private final SwCounter heartbeatPacketsSend = newSwCounter(); @Probe private final SwCounter delayedExecutionCount = newSwCounter(); @Probe private final long backupTimeoutMillis; @Probe private final long invocationTimeoutMillis; @Probe private final long heartbeatBroadcastPeriodMillis; @Probe private final long invocationScanPeriodMillis = SECONDS.toMillis(1); //todo: we need to get rid of the nodeEngine dependency InvocationMonitor(NodeEngineImpl nodeEngine, Address thisAddress, HazelcastProperties properties, InvocationRegistry invocationRegistry, ILogger logger, InternalSerializationService serializationService, ServiceManager serviceManager) { this.nodeEngine = nodeEngine; this.thisAddress = thisAddress; this.serializationService = serializationService; this.serviceManager = serviceManager; this.invocationRegistry = invocationRegistry; this.logger = logger; this.backupTimeoutMillis = backupTimeoutMillis(properties); this.invocationTimeoutMillis = invocationTimeoutMillis(properties); this.heartbeatBroadcastPeriodMillis = heartbeatBroadcastPeriodMillis(properties); this.scheduler = newScheduler(nodeEngine.getHazelcastInstance().getName()); } @Override public void provideMetrics(MetricsRegistry registry) { registry.scanAndRegister(this, "operation.invocations"); } private static ScheduledExecutorService newScheduler(final String hzName) { // the scheduler is configured with a single thread; so prevent concurrency problems. return new ScheduledThreadPoolExecutor(1, new ThreadFactory() { @Override public Thread newThread(Runnable r) { return new InvocationMonitorThread(r, hzName); } }); } private long invocationTimeoutMillis(HazelcastProperties properties) { long heartbeatTimeoutMillis = properties.getMillis(OPERATION_CALL_TIMEOUT_MILLIS); if (logger.isFinestEnabled()) { logger.finest("Operation invocation timeout is " + heartbeatTimeoutMillis + " ms"); } return heartbeatTimeoutMillis; } private long backupTimeoutMillis(HazelcastProperties properties) { long backupTimeoutMillis = properties.getMillis(OPERATION_BACKUP_TIMEOUT_MILLIS); if (logger.isFinestEnabled()) { logger.finest("Operation backup timeout is " + backupTimeoutMillis + " ms"); } return backupTimeoutMillis; } private long heartbeatBroadcastPeriodMillis(HazelcastProperties properties) { // The heartbeat period is configured to be 1/4 of the call timeout. So with default settings, every 15 seconds, // every member in the cluster, will notify every other member in the cluster about all calls that are pending. // This is done quite efficiently; imagine at any given moment one node has 1000 concurrent calls pending on another // node; then every 15 seconds an 8 KByte packet is send to the other member. // Another advantage is that multiple heartbeat timeouts are allowed to get lost; without leading to premature // abortion of the invocation. int callTimeoutMs = properties.getInteger(OPERATION_CALL_TIMEOUT_MILLIS); long periodMs = Math.max(SECONDS.toMillis(1), callTimeoutMs / HEARTBEAT_CALL_TIMEOUT_RATIO); if (logger.isFinestEnabled()) { logger.finest("Operation heartbeat period is " + periodMs + " ms"); } return periodMs; } void onMemberLeft(MemberImpl member) { // postpone notifying invocations since real response may arrive in the mean time. scheduler.schedule(new OnMemberLeftTask(member), ON_MEMBER_LEFT_DELAY_MILLIS, MILLISECONDS); } void execute(Runnable runnable) { scheduler.execute(runnable); } void schedule(Runnable command, long delayMillis) { scheduler.schedule(command, delayMillis, MILLISECONDS); } @Override public void handle(Packet packet) { scheduler.execute(new ProcessOperationControlTask(packet)); } public void start() { MonitorInvocationsTask monitorInvocationsTask = new MonitorInvocationsTask(invocationScanPeriodMillis); scheduler.scheduleAtFixedRate( monitorInvocationsTask, 0, monitorInvocationsTask.periodMillis, MILLISECONDS); BroadcastOperationControlTask broadcastOperationControlTask = new BroadcastOperationControlTask(heartbeatBroadcastPeriodMillis); scheduler.scheduleAtFixedRate( broadcastOperationControlTask, 0, broadcastOperationControlTask.periodMillis, MILLISECONDS); } public void shutdown() { scheduler.shutdown(); } public void awaitTermination(long timeoutMillis) throws InterruptedException { scheduler.awaitTermination(timeoutMillis, MILLISECONDS); } long getLastMemberHeartbeatMillis(Address memberAddress) { if (memberAddress == null) { return 0; } AtomicLong heartbeat = lastHeartbeatPerMember.get(memberAddress); return heartbeat == null ? 0 : heartbeat.get(); } private abstract class MonitorTask implements Runnable { @Override public void run() { try { run0(); } catch (Throwable t) { inspectOutOfMemoryError(t); logger.severe(t); } } protected abstract void run0(); } abstract class FixedRateMonitorTask implements Runnable { final long periodMillis; private long expectedNextMillis = System.currentTimeMillis(); FixedRateMonitorTask(long periodMillis) { this.periodMillis = periodMillis; } @Override public void run() { long currentTimeMillis = System.currentTimeMillis(); try { if (expectedNextMillis + MAX_DELAY_MILLIS < currentTimeMillis) { logger.warning(getClass().getSimpleName() + " delayed " + (currentTimeMillis - expectedNextMillis) + " ms"); delayedExecutionCount.inc(); } run0(); } catch (Throwable t) { // we want to catch the exception; if we don't and the executor runs into it, it will abort the task. inspectOutOfMemoryError(t); logger.severe(t); } finally { expectedNextMillis = currentTimeMillis + periodMillis; } } protected abstract void run0(); } /** * The MonitorTask iterates over all pending invocations and sees what needs to be done. Currently its tasks are: * - getting rid of duplicates * - checking for heartbeat timeout * - checking for backup timeout * * In the future additional checks can be added here like checking if a retry is needed etc. */ private final class MonitorInvocationsTask extends FixedRateMonitorTask { private MonitorInvocationsTask(long periodMillis) { super(periodMillis); } @Override public void run0() { if (logger.isFinestEnabled()) { logger.finest("Scanning all invocations"); } if (invocationRegistry.size() == 0) { return; } int backupTimeouts = 0; int normalTimeouts = 0; int invocationCount = 0; for (Entry<Long, Invocation> e : invocationRegistry.entrySet()) { invocationCount++; Invocation inv = e.getValue(); try { if (inv.detectAndHandleTimeout(invocationTimeoutMillis)) { normalTimeouts++; } else if (inv.detectAndHandleBackupTimeout(backupTimeoutMillis)) { backupTimeouts++; } } catch (Throwable t) { inspectOutOfMemoryError(t); logger.severe("Failed to check invocation:" + inv, t); } } backupTimeoutsCount.inc(backupTimeouts); normalTimeoutsCount.inc(normalTimeouts); log(invocationCount, backupTimeouts, normalTimeouts); } private void log(int invocationCount, int backupTimeouts, int invocationTimeouts) { Level logLevel = null; if (backupTimeouts > 0 || invocationTimeouts > 0) { logLevel = INFO; } else if (logger.isFineEnabled()) { logLevel = FINE; } if (logLevel != null) { logger.log(logLevel, "Invocations:" + invocationCount + " timeouts:" + invocationTimeouts + " backup-timeouts:" + backupTimeouts); } } } private final class OnMemberLeftTask extends MonitorTask { private final MemberImpl leftMember; private OnMemberLeftTask(MemberImpl leftMember) { this.leftMember = leftMember; } @Override public void run0() { lastHeartbeatPerMember.remove(leftMember.getAddress()); for (Invocation invocation : invocationRegistry) { if (hasMemberLeft(invocation)) { invocation.notifyError(new MemberLeftException(leftMember)); } } } private boolean hasMemberLeft(Invocation invocation) { MemberImpl targetMember = invocation.targetMember; if (targetMember == null) { Address invTarget = invocation.invTarget; return leftMember.getAddress().equals(invTarget); } else { return leftMember.getUuid().equals(targetMember.getUuid()); } } } private final class ProcessOperationControlTask extends MonitorTask { // either OperationControl or Packet that contains it private final Object payload; private final Address sender; ProcessOperationControlTask(OperationControl payload) { this.payload = payload; this.sender = thisAddress; } ProcessOperationControlTask(Packet payload) { this.payload = payload; this.sender = payload.getConn().getEndPoint(); } @Override public void run0() { heartbeatPacketsReceived.inc(); long timeMillis = Clock.currentTimeMillis(); updateMemberHeartbeat(timeMillis); final OperationControl opControl = serializationService.toObject(payload); for (long callId : opControl.runningOperations()) { updateHeartbeat(callId, timeMillis); } for (CanCancelOperations service : serviceManager.getServices(CanCancelOperations.class)) { final long[] opsToCancel = opControl.operationsToCancel(); for (int i = 0; i < opsToCancel.length; i++) { if (opsToCancel[i] != -1 && service.cancelOperation(sender, opsToCancel[i])) { opsToCancel[i] = -1; } } } } private void updateMemberHeartbeat(long timeMillis) { AtomicLong lastMemberHeartbeat = lastHeartbeatPerMember.get(sender); if (lastMemberHeartbeat == null) { lastMemberHeartbeat = new AtomicLong(); lastHeartbeatPerMember.put(sender, lastMemberHeartbeat); } lastMemberHeartbeat.set(timeMillis); } private void updateHeartbeat(long callId, long timeMillis) { Invocation invocation = invocationRegistry.get(callId); if (invocation == null) { // the invocation doesn't exist anymore, so we are done. return; } invocation.lastHeartbeatMillis = timeMillis; } } private final class BroadcastOperationControlTask extends FixedRateMonitorTask { private final CallsPerMember calls = new CallsPerMember(thisAddress); private BroadcastOperationControlTask(long periodMillis) { super(periodMillis); } @Override public void run0() { CallsPerMember calls = populate(); Set<Address> addresses = calls.addresses(); if (logger.isFinestEnabled()) { logger.finest("Broadcasting operation control packets to: " + addresses.size() + " members"); } for (Address address : addresses) { sendOpControlPacket(address, calls.toOpControl(address)); } } private CallsPerMember populate() { calls.clear(); ClusterService clusterService = nodeEngine.getClusterService(); calls.ensureMember(thisAddress); for (Member member : clusterService.getMembers()) { calls.ensureMember(member.getAddress()); } for (LiveOperationsTracker tracker : serviceManager.getServices(LiveOperationsTracker.class)) { tracker.populate(calls); } for (Invocation invocation : invocationRegistry) { if (invocation.future.isCancelled()) { calls.addOpToCancel(invocation.invTarget, invocation.op.getCallId()); } } return calls; } private void sendOpControlPacket(Address address, OperationControl opControl) { heartbeatPacketsSend.inc(); if (address.equals(thisAddress)) { scheduler.execute(new ProcessOperationControlTask(opControl)); } else { Packet packet = new Packet(serializationService.toBytes(opControl)) .setPacketType(Packet.Type.OPERATION) .raiseFlags(FLAG_OP_CONTROL | FLAG_URGENT); nodeEngine.getNode().getConnectionManager().transmit(packet, address); } } } /** * This class needs to implement the {@link OperationHostileThread} interface to make sure that the OperationExecutor * is not going to schedule any operations on this thread due to retry. */ private static final class InvocationMonitorThread extends Thread implements OperationHostileThread { private InvocationMonitorThread(Runnable task, String hzName) { super(task, createThreadName(hzName, "InvocationMonitorThread")); } } }