/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.controller;
import org.apache.commons.lang3.StringUtils;
import org.apache.nifi.authorization.AbstractPolicyBasedAuthorizer;
import org.apache.nifi.authorization.Authorizer;
import org.apache.nifi.bundle.Bundle;
import org.apache.nifi.cluster.ConnectionException;
import org.apache.nifi.cluster.coordination.ClusterCoordinator;
import org.apache.nifi.cluster.coordination.node.DisconnectionCode;
import org.apache.nifi.cluster.coordination.node.NodeConnectionState;
import org.apache.nifi.cluster.coordination.node.NodeConnectionStatus;
import org.apache.nifi.cluster.exception.NoClusterCoordinatorException;
import org.apache.nifi.cluster.protocol.ConnectionRequest;
import org.apache.nifi.cluster.protocol.ConnectionResponse;
import org.apache.nifi.cluster.protocol.DataFlow;
import org.apache.nifi.cluster.protocol.NodeIdentifier;
import org.apache.nifi.cluster.protocol.ProtocolException;
import org.apache.nifi.cluster.protocol.ProtocolHandler;
import org.apache.nifi.cluster.protocol.StandardDataFlow;
import org.apache.nifi.cluster.protocol.impl.NodeProtocolSenderListener;
import org.apache.nifi.cluster.protocol.message.ConnectionRequestMessage;
import org.apache.nifi.cluster.protocol.message.DisconnectMessage;
import org.apache.nifi.cluster.protocol.message.FlowRequestMessage;
import org.apache.nifi.cluster.protocol.message.FlowResponseMessage;
import org.apache.nifi.cluster.protocol.message.ProtocolMessage;
import org.apache.nifi.cluster.protocol.message.ReconnectionRequestMessage;
import org.apache.nifi.cluster.protocol.message.ReconnectionResponseMessage;
import org.apache.nifi.components.state.Scope;
import org.apache.nifi.components.state.StateManager;
import org.apache.nifi.controller.serialization.FlowSerializationException;
import org.apache.nifi.controller.serialization.FlowSynchronizationException;
import org.apache.nifi.encrypt.StringEncryptor;
import org.apache.nifi.engine.FlowEngine;
import org.apache.nifi.events.BulletinFactory;
import org.apache.nifi.groups.ProcessGroup;
import org.apache.nifi.lifecycle.LifeCycleStartException;
import org.apache.nifi.logging.LogLevel;
import org.apache.nifi.nar.NarClassLoaders;
import org.apache.nifi.persistence.FlowConfigurationDAO;
import org.apache.nifi.persistence.StandardXMLFlowConfigurationDAO;
import org.apache.nifi.persistence.TemplateDeserializer;
import org.apache.nifi.reporting.Bulletin;
import org.apache.nifi.services.FlowService;
import org.apache.nifi.util.FormatUtils;
import org.apache.nifi.util.NiFiProperties;
import org.apache.nifi.util.file.FileUtils;
import org.apache.nifi.web.api.dto.TemplateDTO;
import org.apache.nifi.web.revision.RevisionManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.stream.Collectors;
import java.util.zip.GZIPInputStream;
import java.util.zip.GZIPOutputStream;
public class StandardFlowService implements FlowService, ProtocolHandler {
private static final String EVENT_CATEGORY = "Controller";
private static final String CLUSTER_NODE_CONFIG = "Cluster Node Configuration";
// state keys
private static final String NODE_UUID = "Node UUID";
private final FlowController controller;
private final Path flowXml;
private final FlowConfigurationDAO dao;
private final int gracefulShutdownSeconds;
private final boolean autoResumeState;
private final Authorizer authorizer;
// Lock is used to protect the flow.xml file.
private final ReentrantReadWriteLock rwLock = new ReentrantReadWriteLock();
private final Lock readLock = rwLock.readLock();
private final Lock writeLock = rwLock.writeLock();
private final AtomicBoolean running = new AtomicBoolean(false);
private final AtomicReference<ScheduledExecutorService> executor = new AtomicReference<>(null);
private final AtomicReference<SaveHolder> saveHolder = new AtomicReference<>(null);
private final ClusterCoordinator clusterCoordinator;
private final RevisionManager revisionManager;
/**
* listener/sender for internal cluster communication
*/
private final NodeProtocolSenderListener senderListener;
/**
* flag indicating whether we are operating in a clustered environment
*/
private final boolean configuredForClustering;
/**
* the node identifier
*/
private NodeIdentifier nodeId;
// guardedBy rwLock
private boolean firstControllerInitialization = true;
private final NiFiProperties nifiProperties;
private static final String CONNECTION_EXCEPTION_MSG_PREFIX = "Failed to connect node to cluster because ";
private static final Logger logger = LoggerFactory.getLogger(StandardFlowService.class);
public static StandardFlowService createStandaloneInstance(
final FlowController controller,
final NiFiProperties nifiProperties,
final StringEncryptor encryptor,
final RevisionManager revisionManager,
final Authorizer authorizer) throws IOException {
return new StandardFlowService(controller, nifiProperties, null, encryptor, false, null, revisionManager, authorizer);
}
public static StandardFlowService createClusteredInstance(
final FlowController controller,
final NiFiProperties nifiProperties,
final NodeProtocolSenderListener senderListener,
final ClusterCoordinator coordinator,
final StringEncryptor encryptor,
final RevisionManager revisionManager,
final Authorizer authorizer) throws IOException {
return new StandardFlowService(controller, nifiProperties, senderListener, encryptor, true, coordinator, revisionManager, authorizer);
}
private StandardFlowService(
final FlowController controller,
final NiFiProperties nifiProperties,
final NodeProtocolSenderListener senderListener,
final StringEncryptor encryptor,
final boolean configuredForClustering,
final ClusterCoordinator clusterCoordinator,
final RevisionManager revisionManager,
final Authorizer authorizer) throws IOException {
this.nifiProperties = nifiProperties;
this.controller = controller;
flowXml = Paths.get(nifiProperties.getProperty(NiFiProperties.FLOW_CONFIGURATION_FILE));
gracefulShutdownSeconds = (int) FormatUtils.getTimeDuration(nifiProperties.getProperty(NiFiProperties.FLOW_CONTROLLER_GRACEFUL_SHUTDOWN_PERIOD), TimeUnit.SECONDS);
autoResumeState = nifiProperties.getAutoResumeState();
dao = new StandardXMLFlowConfigurationDAO(flowXml, encryptor, nifiProperties);
this.clusterCoordinator = clusterCoordinator;
if (clusterCoordinator != null) {
clusterCoordinator.setFlowService(this);
}
this.revisionManager = revisionManager;
this.authorizer = authorizer;
if (configuredForClustering) {
this.configuredForClustering = configuredForClustering;
this.senderListener = senderListener;
senderListener.addHandler(this);
final InetSocketAddress nodeApiAddress = nifiProperties.getNodeApiAddress();
final InetSocketAddress nodeSocketAddress = nifiProperties.getClusterNodeProtocolAddress();
String nodeUuid = null;
final StateManager stateManager = controller.getStateManagerProvider().getStateManager(CLUSTER_NODE_CONFIG);
if (stateManager != null) {
nodeUuid = stateManager.getState(Scope.LOCAL).get(NODE_UUID);
}
if (nodeUuid == null) {
nodeUuid = UUID.randomUUID().toString();
}
// use a random UUID as the proposed node identifier
this.nodeId = new NodeIdentifier(nodeUuid,
nodeApiAddress.getHostName(), nodeApiAddress.getPort(),
nodeSocketAddress.getHostName(), nodeSocketAddress.getPort(),
nifiProperties.getRemoteInputHost(), nifiProperties.getRemoteInputPort(),
nifiProperties.getRemoteInputHttpPort(), nifiProperties.isSiteToSiteSecure());
} else {
this.configuredForClustering = false;
this.senderListener = null;
}
}
@Override
public void saveFlowChanges() throws IOException {
writeLock.lock();
try {
dao.save(controller);
} finally {
writeLock.unlock();
}
}
@Override
public void saveFlowChanges(final OutputStream outStream) throws IOException {
writeLock.lock();
try {
dao.save(controller, outStream);
} finally {
writeLock.unlock();
}
}
@Override
public void overwriteFlow(final InputStream is) throws IOException {
writeLock.lock();
try (final OutputStream output = Files.newOutputStream(flowXml, StandardOpenOption.WRITE, StandardOpenOption.CREATE);
final OutputStream gzipOut = new GZIPOutputStream(output);) {
FileUtils.copy(is, gzipOut);
} finally {
writeLock.unlock();
}
}
@Override
public void saveFlowChanges(final TimeUnit delayUnit, final long delay) {
final boolean archiveEnabled = nifiProperties.isFlowConfigurationArchiveEnabled();
saveFlowChanges(delayUnit, delay, archiveEnabled);
}
@Override
public void saveFlowChanges(final TimeUnit delayUnit, final long delay, final boolean archive) {
final Calendar saveTime = Calendar.getInstance();
final long delayInMs = TimeUnit.MILLISECONDS.convert(delay, delayUnit);
int finalDelayMs = 500; //default to 500 ms.
if (delayInMs <= Integer.MAX_VALUE) {
finalDelayMs = (int) delayInMs;
}
saveTime.add(Calendar.MILLISECOND, finalDelayMs);
if (logger.isTraceEnabled()) {
logger.trace(" A request to save the flow has been made with delay {} for time {}", finalDelayMs, saveTime.getTime());
}
saveHolder.set(new SaveHolder(saveTime, archive));
}
@Override
public boolean isRunning() {
return running.get();
}
@Override
public void start() throws LifeCycleStartException {
writeLock.lock();
try {
if (isRunning()) {
return;
}
running.set(true);
final ScheduledExecutorService newExecutor = new FlowEngine(2, "Flow Service Tasks");
newExecutor.scheduleWithFixedDelay(new SaveReportingTask(), 0L, 500L, TimeUnit.MILLISECONDS);
this.executor.set(newExecutor);
if (configuredForClustering) {
senderListener.start();
}
} catch (final IOException ioe) {
try {
stop(/* force */true);
} catch (final Exception e) {
}
throw new LifeCycleStartException("Failed to start Flow Service due to: " + ioe, ioe);
} finally {
writeLock.unlock();
}
}
@Override
public void stop(final boolean force) {
writeLock.lock();
try {
if (!isRunning()) {
return;
}
running.set(false);
if (clusterCoordinator != null) {
final Thread shutdownClusterCoordinator = new Thread(new Runnable() {
@Override
public void run() {
clusterCoordinator.shutdown();
}
});
shutdownClusterCoordinator.setDaemon(true);
shutdownClusterCoordinator.setName("Shutdown Cluster Coordinator");
shutdownClusterCoordinator.start();
}
if (!controller.isTerminated()) {
controller.shutdown(force);
}
if (configuredForClustering && senderListener != null) {
try {
senderListener.stop();
} catch (final IOException ioe) {
logger.warn("Protocol sender/listener did not stop gracefully due to: " + ioe);
}
}
final ScheduledExecutorService executorService = executor.get();
if (executorService != null) {
if (force) {
executorService.shutdownNow();
} else {
executorService.shutdown();
}
boolean graceful;
try {
graceful = executorService.awaitTermination(gracefulShutdownSeconds, TimeUnit.SECONDS);
} catch (final InterruptedException e) {
graceful = false;
}
if (!graceful) {
logger.warn("Scheduling service did not gracefully shutdown within configured " + gracefulShutdownSeconds + " second window");
}
}
} finally {
writeLock.unlock();
}
}
@Override
public boolean canHandle(final ProtocolMessage msg) {
switch (msg.getType()) {
case RECONNECTION_REQUEST:
case DISCONNECTION_REQUEST:
case FLOW_REQUEST:
return true;
default:
return false;
}
}
@Override
public ProtocolMessage handle(final ProtocolMessage request) throws ProtocolException {
final long startNanos = System.nanoTime();
try {
switch (request.getType()) {
case FLOW_REQUEST:
return handleFlowRequest((FlowRequestMessage) request);
case RECONNECTION_REQUEST: {
// Suspend heartbeats until we've reconnected. Otherwise,
// we may send a heartbeat while we are still in the process of
// connecting, which will cause the Cluster Manager to mark us
// as "Connected," which becomes problematic as the FlowController's lock
// may still be held, causing this node to take a long time to respond to requests.
controller.suspendHeartbeats();
final Thread t = new Thread(new Runnable() {
@Override
public void run() {
handleReconnectionRequest((ReconnectionRequestMessage) request);
}
}, "Reconnect to Cluster");
t.setDaemon(true);
t.start();
return new ReconnectionResponseMessage();
}
case DISCONNECTION_REQUEST: {
final Thread t = new Thread(new Runnable() {
@Override
public void run() {
handleDisconnectionRequest((DisconnectMessage) request);
}
}, "Disconnect from Cluster");
t.setDaemon(true);
t.start();
return null;
}
default:
throw new ProtocolException("Handler cannot handle message type: " + request.getType());
}
} finally {
if (logger.isDebugEnabled()) {
final long procNanos = System.nanoTime() - startNanos;
final long procMillis = TimeUnit.MILLISECONDS.convert(procNanos, TimeUnit.NANOSECONDS);
logger.debug("Finished Processing Protocol Message of type {} in {} millis", request.getType(), procMillis);
}
}
}
@Override
public void load(final DataFlow dataFlow) throws IOException, FlowSerializationException, FlowSynchronizationException, UninheritableFlowException, MissingBundleException {
if (configuredForClustering) {
// Create the initial flow from disk if it exists, or from serializing the empty root group in flow controller
final DataFlow initialFlow = (dataFlow == null) ? createDataFlow() : dataFlow;
if (logger.isTraceEnabled()) {
logger.trace("InitialFlow = " + new String(initialFlow.getFlow(), StandardCharsets.UTF_8));
}
// Sync the initial flow into the flow controller so that if the flow came from disk we loaded the
// whole flow into the flow controller and applied any bundle upgrades
writeLock.lock();
try {
loadFromBytes(initialFlow, true);
} finally {
writeLock.unlock();
}
// Get the proposed flow by serializing the flow controller which now has the synced version from above
final DataFlow proposedFlow = createDataFlowFromController();
if (logger.isTraceEnabled()) {
logger.trace("ProposedFlow = " + new String(proposedFlow.getFlow(), StandardCharsets.UTF_8));
}
/*
* Attempt to connect to the cluster. If the manager is able to
* provide a data flow, then the manager will send a connection
* response. If the manager was unable to be located, then
* the response will be null and we should load the local dataflow
* and heartbeat until a manager is located.
*/
final boolean localFlowEmpty = StandardFlowSynchronizer.isEmpty(proposedFlow);
final ConnectionResponse response = connect(true, localFlowEmpty, proposedFlow);
// obtain write lock while we are updating the controller. We need to ensure that we don't
// obtain the lock before calling connect(), though, or we will end up getting a deadlock
// because the node that is receiving the connection request won't be able to get the current
// flow, as that requires a read lock.
writeLock.lock();
try {
if (response == null || response.shouldTryLater()) {
logger.info("Flow controller will load local dataflow and suspend connection handshake until a cluster connection response is received.");
// set node ID on controller before we start heartbeating because heartbeat needs node ID
controller.setNodeId(nodeId);
clusterCoordinator.setLocalNodeIdentifier(nodeId);
// set node as clustered, since it is trying to connect to a cluster
controller.setClustered(true, null);
clusterCoordinator.setConnected(false);
controller.setConnectionStatus(new NodeConnectionStatus(nodeId, DisconnectionCode.NOT_YET_CONNECTED));
/*
* Start heartbeating. Heartbeats will fail because we can't reach
* the manager, but when we locate the manager, the node will
* reconnect and establish a connection to the cluster. The
* heartbeat is the trigger that will cause the manager to
* issue a reconnect request.
*/
controller.startHeartbeating();
// Initialize the controller after the flow is loaded so we don't take any actions on repos until everything is good
initializeController();
// notify controller that flow is initialized
try {
controller.onFlowInitialized(autoResumeState);
} catch (final Exception ex) {
logger.warn("Unable to start all processors due to invalid flow configuration.");
if (logger.isDebugEnabled()) {
logger.warn(StringUtils.EMPTY, ex);
}
}
} else {
try {
loadFromConnectionResponse(response);
} catch (final Exception e) {
logger.error("Failed to load flow from cluster due to: " + e, e);
handleConnectionFailure(e);
throw new IOException(e);
}
}
// save the flow in the controller so we write out the latest flow with any updated bundles to disk
dao.save(controller, true);
} finally {
writeLock.unlock();
}
} else {
writeLock.lock();
try {
// operating in standalone mode, so load proposed flow and initialize the controller
loadFromBytes(dataFlow, true);
initializeController();
dao.save(controller, true);
} finally {
writeLock.unlock();
}
}
}
private void handleConnectionFailure(final Exception ex) {
DisconnectionCode disconnectionCode;
if (ex instanceof UninheritableFlowException) {
disconnectionCode = DisconnectionCode.MISMATCHED_FLOWS;
} else if (ex instanceof MissingBundleException) {
disconnectionCode = DisconnectionCode.MISSING_BUNDLE;
} else if (ex instanceof FlowSynchronizationException) {
disconnectionCode = DisconnectionCode.MISMATCHED_FLOWS;
} else {
disconnectionCode = DisconnectionCode.STARTUP_FAILURE;
}
clusterCoordinator.disconnectionRequestedByNode(getNodeId(), disconnectionCode, ex.toString());
controller.setClustered(false, null);
clusterCoordinator.setConnected(false);
}
private FlowResponseMessage handleFlowRequest(final FlowRequestMessage request) throws ProtocolException {
readLock.lock();
try {
logger.info("Received flow request message from manager.");
// create the response
final FlowResponseMessage response = new FlowResponseMessage();
response.setDataFlow(createDataFlowFromController());
return response;
} catch (final Exception ex) {
throw new ProtocolException("Failed serializing flow controller state for flow request due to: " + ex, ex);
} finally {
readLock.unlock();
}
}
private byte[] getAuthorizerFingerprint() {
final boolean isInternalAuthorizer = (authorizer instanceof AbstractPolicyBasedAuthorizer);
return isInternalAuthorizer ? ((AbstractPolicyBasedAuthorizer) authorizer).getFingerprint().getBytes(StandardCharsets.UTF_8) : null;
}
@Override
public StandardDataFlow createDataFlow() throws IOException {
// Load the flow from disk if the file exists.
if (dao.isFlowPresent()) {
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
dao.load(baos);
final byte[] bytes = baos.toByteArray();
final byte[] snippetBytes = controller.getSnippetManager().export();
final byte[] authorizerFingerprint = getAuthorizerFingerprint();
final StandardDataFlow fromDisk = new StandardDataFlow(bytes, snippetBytes, authorizerFingerprint, new HashSet<>());
return fromDisk;
}
// Flow from disk does not exist, so serialize the Flow Controller and use that.
// This is done because on startup, if there is no flow, the Flow Controller
// will automatically create a Root Process Group, and we need to ensure that
// we replicate that Process Group to all nodes in the cluster, so that they all
// end up with the same ID for the root Process Group.
return createDataFlowFromController();
}
@Override
public StandardDataFlow createDataFlowFromController() throws IOException {
final byte[] snippetBytes = controller.getSnippetManager().export();
final byte[] authorizerFingerprint = getAuthorizerFingerprint();
final ByteArrayOutputStream baos = new ByteArrayOutputStream();
dao.save(controller, baos);
final byte[] flowBytes = baos.toByteArray();
baos.reset();
final Set<String> missingComponents = new HashSet<>();
controller.getRootGroup().findAllProcessors().stream().filter(p -> p.isExtensionMissing()).forEach(p -> missingComponents.add(p.getIdentifier()));
controller.getAllControllerServices().stream().filter(cs -> cs.isExtensionMissing()).forEach(cs -> missingComponents.add(cs.getIdentifier()));
controller.getAllReportingTasks().stream().filter(r -> r.isExtensionMissing()).forEach(r -> missingComponents.add(r.getIdentifier()));
return new StandardDataFlow(flowBytes, snippetBytes, authorizerFingerprint, missingComponents);
}
private NodeIdentifier getNodeId() {
readLock.lock();
try {
return nodeId;
} finally {
readLock.unlock();
}
}
private void handleReconnectionRequest(final ReconnectionRequestMessage request) {
try {
logger.info("Processing reconnection request from manager.");
// reconnect
ConnectionResponse connectionResponse = new ConnectionResponse(getNodeId(), request.getDataFlow(),
request.getInstanceId(), request.getNodeConnectionStatuses(), request.getComponentRevisions());
if (connectionResponse.getDataFlow() == null) {
logger.info("Received a Reconnection Request that contained no DataFlow. Will attempt to connect to cluster using local flow.");
connectionResponse = connect(false, false, createDataFlowFromController());
}
loadFromConnectionResponse(connectionResponse);
clusterCoordinator.resetNodeStatuses(connectionResponse.getNodeConnectionStatuses().stream()
.collect(Collectors.toMap(status -> status.getNodeIdentifier(), status -> status)));
// reconnected, this node needs to explicitly write the inherited flow to disk, and resume heartbeats
saveFlowChanges();
controller.resumeHeartbeats();
logger.info("Node reconnected.");
} catch (final Exception ex) {
// disconnect controller
if (controller.isClustered()) {
disconnect("Failed to properly handle Reconnection request due to " + ex.toString());
}
logger.error("Handling reconnection request failed due to: " + ex, ex);
handleConnectionFailure(ex);
}
}
private void handleDisconnectionRequest(final DisconnectMessage request) {
logger.info("Received disconnection request message from manager with explanation: " + request.getExplanation());
disconnect(request.getExplanation());
}
private void disconnect(final String explanation) {
writeLock.lock();
try {
logger.info("Disconnecting node due to " + explanation);
// mark node as not connected
controller.setConnectionStatus(new NodeConnectionStatus(nodeId, DisconnectionCode.UNKNOWN, explanation));
// turn off primary flag
controller.setPrimary(false);
// stop heartbeating
controller.stopHeartbeating();
// set node to not clustered
controller.setClustered(false, null);
clusterCoordinator.setConnected(false);
logger.info("Node disconnected due to " + explanation);
} finally {
writeLock.unlock();
}
}
// write lock must already be acquired
private void loadFromBytes(final DataFlow proposedFlow, final boolean allowEmptyFlow)
throws IOException, FlowSerializationException, FlowSynchronizationException, UninheritableFlowException, MissingBundleException {
logger.trace("Loading flow from bytes");
// resolve the given flow (null means load flow from disk)
final DataFlow actualProposedFlow;
final byte[] flowBytes;
final byte[] authorizerFingerprint;
final Set<String> missingComponents;
if (proposedFlow == null) {
final ByteArrayOutputStream flowOnDisk = new ByteArrayOutputStream();
copyCurrentFlow(flowOnDisk);
flowBytes = flowOnDisk.toByteArray();
authorizerFingerprint = getAuthorizerFingerprint();
missingComponents = new HashSet<>();
logger.debug("Loaded Flow from bytes");
} else {
flowBytes = proposedFlow.getFlow();
authorizerFingerprint = proposedFlow.getAuthorizerFingerprint();
missingComponents = proposedFlow.getMissingComponents();
logger.debug("Loaded flow from proposed flow");
}
actualProposedFlow = new StandardDataFlow(flowBytes, null, authorizerFingerprint, missingComponents);
// load the flow
logger.debug("Loading proposed flow into FlowController");
dao.load(controller, actualProposedFlow);
final ProcessGroup rootGroup = controller.getGroup(controller.getRootGroupId());
if (rootGroup.isEmpty() && !allowEmptyFlow) {
throw new FlowSynchronizationException("Failed to load flow because unable to connect to cluster and local flow is empty");
}
final List<Template> templates = loadTemplates();
for (final Template template : templates) {
final Template existing = rootGroup.getTemplate(template.getIdentifier());
if (existing == null) {
logger.info("Imported Template '{}' to Root Group", template.getDetails().getName());
rootGroup.addTemplate(template);
} else {
logger.info("Template '{}' was already present in Root Group so will not import from file", template.getDetails().getName());
}
}
}
/**
* In NiFi 0.x, templates were stored in a templates directory as separate
* files. They are now stored in the flow itself. If there already are
* templates in that directory, though, we want to restore them.
*
* @return the templates found in the templates directory
* @throws IOException if unable to read from the file system
*/
public List<Template> loadTemplates() throws IOException {
final Path templatePath = nifiProperties.getTemplateDirectory();
final File[] files = templatePath.toFile().listFiles(pathname -> {
final String lowerName = pathname.getName().toLowerCase();
return lowerName.endsWith(".template") || lowerName.endsWith(".xml");
});
if (files == null) {
return Collections.emptyList();
}
final List<Template> templates = new ArrayList<>();
for (final File file : files) {
try (final FileInputStream fis = new FileInputStream(file);
final BufferedInputStream bis = new BufferedInputStream(fis)) {
final TemplateDTO templateDto;
try {
templateDto = TemplateDeserializer.deserialize(bis);
} catch (final Exception e) {
logger.error("Unable to interpret " + file + " as a Template. Skipping file.");
continue;
}
if (templateDto.getId() == null) {
// If there is no ID assigned, we need to assign one. We do this by generating
// an ID from the name. This is because we know that Template Names are unique
// and are consistent across all nodes in the cluster.
final String uuid = UUID.nameUUIDFromBytes(templateDto.getName().getBytes(StandardCharsets.UTF_8)).toString();
templateDto.setId(uuid);
}
final Template template = new Template(templateDto);
templates.add(template);
}
}
return templates;
}
private ConnectionResponse connect(final boolean retryOnCommsFailure, final boolean retryIndefinitely, final DataFlow dataFlow) throws ConnectionException {
readLock.lock();
try {
logger.info("Connecting Node: " + nodeId);
// create connection request message
final ConnectionRequest request = new ConnectionRequest(nodeId, dataFlow);
final ConnectionRequestMessage requestMsg = new ConnectionRequestMessage();
requestMsg.setConnectionRequest(request);
// send connection request to cluster manager
/*
* Try to get a current copy of the cluster's dataflow from the manager
* for ten times, sleeping between attempts. Ten times should be
* enough because the manager will register the node as connecting
* and therefore, no other changes to the cluster flow can occur.
*
* However, the manager needs to obtain a current data flow within
* maxAttempts * tryLaterSeconds or else the node will fail to startup.
*/
final int maxAttempts = 10;
ConnectionResponse response = null;
for (int i = 0; i < maxAttempts || retryIndefinitely; i++) {
try {
response = senderListener.requestConnection(requestMsg).getConnectionResponse();
if (response.shouldTryLater()) {
logger.info("Requested by cluster coordinator to retry connection in " + response.getTryLaterSeconds() + " seconds with explanation: " + response.getRejectionReason());
try {
Thread.sleep(response.getTryLaterSeconds() * 1000);
} catch (final InterruptedException ie) {
// we were interrupted, so finish quickly
Thread.currentThread().interrupt();
break;
}
} else if (response.getRejectionReason() != null) {
logger.warn("Connection request was blocked by cluster coordinator with the explanation: " + response.getRejectionReason());
// set response to null and treat a firewall blockage the same as getting no response from manager
response = null;
break;
} else {
// we received a successful connection response from manager
break;
}
} catch (final NoClusterCoordinatorException ncce) {
logger.warn("There is currently no Cluster Coordinator. This often happens upon restart of NiFi when running an embedded ZooKeeper. Will register this node "
+ "to become the active Cluster Coordinator and will attempt to connect to cluster again");
controller.registerForClusterCoordinator(true);
try {
Thread.sleep(1000L);
} catch (final InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
} catch (final Exception pe) {
// could not create a socket and communicate with manager
logger.warn("Failed to connect to cluster due to: " + pe);
if (logger.isDebugEnabled()) {
logger.warn("", pe);
}
if (retryOnCommsFailure) {
try {
Thread.sleep(response == null ? 5000 : response.getTryLaterSeconds());
} catch (final InterruptedException ie) {
Thread.currentThread().interrupt();
break;
}
} else {
break;
}
}
}
if (response == null) {
// if response is null, then either we had IO problems or we were blocked by firewall or we couldn't determine manager's address
return response;
} else if (response.shouldTryLater()) {
// if response indicates we should try later, then coordinator was unable to service our request. Just load local flow and move on.
// when the cluster coordinator is able to service requests, this node's heartbeat will trigger the cluster coordinator to reach
// out to this node and re-connect to the cluster.
logger.info("Received a 'try again' response from Cluster Coordinator when attempting to connect to cluster with explanation '"
+ response.getRejectionReason() + "'. However, the maximum number of retries have already completed. Will load local flow and connect to the cluster when able.");
return null;
} else {
// cluster manager provided a successful response with a current dataflow
// persist node uuid and index returned by NCM and return the response to the caller
try {
// Ensure that we have registered our 'cluster node configuration' state key
final Map<String, String> map = Collections.singletonMap(NODE_UUID, response.getNodeIdentifier().getId());
controller.getStateManagerProvider().getStateManager(CLUSTER_NODE_CONFIG).setState(map, Scope.LOCAL);
} catch (final IOException ioe) {
logger.warn("Received successful response from Cluster Manager but failed to persist state about the Node's Unique Identifier and the Node's Index. "
+ "This node may be assigned a different UUID when the node is restarted.", ioe);
}
return response;
}
} finally {
readLock.unlock();
}
}
private void loadFromConnectionResponse(final ConnectionResponse response) throws ConnectionException {
writeLock.lock();
try {
if (response.getNodeConnectionStatuses() != null) {
clusterCoordinator.resetNodeStatuses(response.getNodeConnectionStatuses().stream()
.collect(Collectors.toMap(status -> status.getNodeIdentifier(), status -> status)));
}
// get the dataflow from the response
final DataFlow dataFlow = response.getDataFlow();
if (logger.isTraceEnabled()) {
logger.trace("ResponseFlow = " + new String(dataFlow.getFlow(), StandardCharsets.UTF_8));
}
// load new controller state
loadFromBytes(dataFlow, true);
// set node ID on controller before we start heartbeating because heartbeat needs node ID
nodeId = response.getNodeIdentifier();
logger.info("Setting Flow Controller's Node ID: " + nodeId);
controller.setNodeId(nodeId);
clusterCoordinator.setLocalNodeIdentifier(nodeId);
clusterCoordinator.setConnected(true);
revisionManager.reset(response.getComponentRevisions().stream().map(rev -> rev.toRevision()).collect(Collectors.toList()));
// mark the node as clustered
controller.setClustered(true, response.getInstanceId());
controller.setConnectionStatus(new NodeConnectionStatus(nodeId, NodeConnectionState.CONNECTED));
// Initialize the controller after the flow is loaded so we don't take any actions on repos until everything is good
initializeController();
// start the processors as indicated by the dataflow
controller.onFlowInitialized(autoResumeState);
loadSnippets(dataFlow.getSnippets());
controller.startHeartbeating();
} catch (final UninheritableFlowException ufe) {
throw new UninheritableFlowException(CONNECTION_EXCEPTION_MSG_PREFIX + "local flow is different than cluster flow.", ufe);
} catch (final MissingBundleException mbe) {
throw new MissingBundleException(CONNECTION_EXCEPTION_MSG_PREFIX + "cluster flow contains bundles that do not exist on the current node", mbe);
} catch (final FlowSerializationException fse) {
throw new ConnectionException(CONNECTION_EXCEPTION_MSG_PREFIX + "local or cluster flow is malformed.", fse);
} catch (final FlowSynchronizationException fse) {
throw new FlowSynchronizationException(CONNECTION_EXCEPTION_MSG_PREFIX + "local flow controller partially updated. "
+ "Administrator should disconnect node and review flow for corruption.", fse);
} catch (final Exception ex) {
throw new ConnectionException("Failed to connect node to cluster due to: " + ex, ex);
} finally {
writeLock.unlock();
}
}
private void initializeController() throws IOException {
if (firstControllerInitialization) {
logger.debug("First controller initialization, initializing controller...");
controller.initializeFlow();
firstControllerInitialization = false;
}
}
@Override
public void copyCurrentFlow(final OutputStream os) throws IOException {
readLock.lock();
try {
if (!Files.exists(flowXml) || Files.size(flowXml) == 0) {
return;
}
try (final InputStream in = Files.newInputStream(flowXml, StandardOpenOption.READ);
final InputStream gzipIn = new GZIPInputStream(in)) {
FileUtils.copy(gzipIn, os);
}
} finally {
readLock.unlock();
}
}
public void loadSnippets(final byte[] bytes) throws IOException {
if (bytes.length == 0) {
return;
}
final SnippetManager snippetManager = controller.getSnippetManager();
snippetManager.clear();
for (final StandardSnippet snippet : SnippetManager.parseBytes(bytes)) {
snippetManager.addSnippet(snippet);
}
}
private class SaveReportingTask implements Runnable {
@Override
public void run() {
ClassLoader currentCl = null;
final Bundle frameworkBundle = NarClassLoaders.getInstance().getFrameworkBundle();
if (frameworkBundle != null) {
currentCl = Thread.currentThread().getContextClassLoader();
final ClassLoader cl = frameworkBundle.getClassLoader();
Thread.currentThread().setContextClassLoader(cl);
}
try {
//Hang onto the SaveHolder here rather than setting it to null because if the save fails we will try again
final SaveHolder holder = StandardFlowService.this.saveHolder.get();
if (holder == null) {
return;
}
if (logger.isTraceEnabled()) {
logger.trace("Save request time {} // Current time {}", holder.saveTime.getTime(), new Date());
}
final Calendar now = Calendar.getInstance();
if (holder.saveTime.before(now)) {
if (logger.isTraceEnabled()) {
logger.trace("Waiting for write lock and then will save");
}
writeLock.lock();
try {
dao.save(controller, holder.shouldArchive);
// Nulling it out if it is still set to our current SaveHolder. Otherwise leave it alone because it means
// another save is already pending.
final boolean noSavePending = StandardFlowService.this.saveHolder.compareAndSet(holder, null);
logger.info("Saved flow controller {} // Another save pending = {}", controller, !noSavePending);
} finally {
writeLock.unlock();
}
}
} catch (final Throwable t) {
logger.error("Unable to save flow controller configuration due to: " + t, t);
if (logger.isDebugEnabled()) {
logger.error("", t);
}
// record the failed save as a bulletin
final Bulletin saveFailureBulletin = BulletinFactory.createBulletin(EVENT_CATEGORY, LogLevel.ERROR.name(), "Unable to save flow controller configuration.");
controller.getBulletinRepository().addBulletin(saveFailureBulletin);
} finally {
if (currentCl != null) {
Thread.currentThread().setContextClassLoader(currentCl);
}
}
}
}
private class SaveHolder {
private final Calendar saveTime;
private final boolean shouldArchive;
private SaveHolder(final Calendar moment, final boolean archive) {
saveTime = moment;
shouldArchive = archive;
}
}
}