/* * Copyright (c) 2010-2013 Evolveum * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.evolveum.midpoint.task.quartzimpl.cluster; import com.evolveum.midpoint.prism.PrismContext; import com.evolveum.midpoint.prism.PrismObject; import com.evolveum.midpoint.prism.PrismObjectDefinition; import com.evolveum.midpoint.prism.delta.PropertyDelta; import com.evolveum.midpoint.prism.query.ObjectQuery; import com.evolveum.midpoint.repo.api.RepositoryService; import com.evolveum.midpoint.schema.constants.SchemaConstants; import com.evolveum.midpoint.schema.result.OperationResult; import com.evolveum.midpoint.schema.util.ObjectQueryUtil; import com.evolveum.midpoint.task.api.TaskManagerInitializationException; import com.evolveum.midpoint.task.quartzimpl.TaskManagerConfiguration; import com.evolveum.midpoint.task.quartzimpl.TaskManagerQuartzImpl; import com.evolveum.midpoint.util.exception.ObjectAlreadyExistsException; import com.evolveum.midpoint.util.exception.ObjectNotFoundException; import com.evolveum.midpoint.util.exception.SchemaException; import com.evolveum.midpoint.util.exception.SystemException; import com.evolveum.midpoint.util.logging.LoggingUtils; import com.evolveum.midpoint.util.logging.Trace; import com.evolveum.midpoint.util.logging.TraceManager; import com.evolveum.midpoint.xml.ns._public.common.common_3.BuildInformationType; import com.evolveum.midpoint.xml.ns._public.common.common_3.NodeErrorStatusType; import com.evolveum.midpoint.xml.ns._public.common.common_3.NodeType; import com.evolveum.prism.xml.ns._public.types_3.PolyStringType; import org.apache.commons.lang.Validate; import javax.xml.datatype.DatatypeConfigurationException; import javax.xml.datatype.DatatypeFactory; import javax.xml.datatype.XMLGregorianCalendar; import java.net.InetAddress; import java.net.UnknownHostException; import java.util.*; /** * Takes care about node registration in repository. * * @author Pavol Mederly */ public class NodeRegistrar { private static final transient Trace LOGGER = TraceManager.getTrace(NodeRegistrar.class); private TaskManagerQuartzImpl taskManager; private ClusterManager clusterManager; private PrismObject<NodeType> nodePrism; public NodeRegistrar(TaskManagerQuartzImpl taskManager, ClusterManager clusterManager) { Validate.notNull(taskManager); Validate.notNull(clusterManager); this.taskManager = taskManager; this.clusterManager = clusterManager; } /** * Executes node startup registration: if Node object with a give name (node ID) exists, deletes it. * Then creates a new Node with the information relevant to this node. * * @param result Node prism to be used for periodic re-registrations. */ PrismObject<NodeType> createNodeObject(OperationResult result) throws TaskManagerInitializationException { nodePrism = createNodePrism(taskManager.getConfiguration()); NodeType node = nodePrism.asObjectable(); LOGGER.info("Registering this node in the repository as " + node.getNodeIdentifier() + " at " + node.getHostname() + ":" + node.getJmxPort()); List<PrismObject<NodeType>> nodes; try { nodes = findNodesWithGivenName(result, node.getName()); } catch (SchemaException e) { throw new TaskManagerInitializationException("Node registration failed because of schema exception", e); } for (PrismObject<NodeType> n : nodes) { LOGGER.trace("Removing existing NodeType with oid = {}, name = {}", n.getOid(), n.getElementName()); try { getRepositoryService().deleteObject(NodeType.class, n.getOid(), result); } catch (ObjectNotFoundException e) { LoggingUtils.logException(LOGGER, "Cannot remove NodeType with oid = {}, name = {}, because it does not exist.", e, n.getOid(), n.getElementName()); // continue, because the error is not that severe (we hope so) } } try { String oid = getRepositoryService().addObject(nodePrism, null, result); nodePrism.setOid(oid); } catch (ObjectAlreadyExistsException e) { taskManager.setNodeErrorStatus(NodeErrorStatusType.NODE_REGISTRATION_FAILED); throw new TaskManagerInitializationException("Cannot register this node, because it already exists (this should not happen, as nodes with such a name were just removed)", e); } catch (SchemaException e) { taskManager.setNodeErrorStatus(NodeErrorStatusType.NODE_REGISTRATION_FAILED); throw new TaskManagerInitializationException("Cannot register this node because of schema exception", e); } LOGGER.trace("Node was successfully registered in the repository."); return nodePrism; } private PrismObject<NodeType> createNodePrism(TaskManagerConfiguration configuration) { PrismObjectDefinition<NodeType> nodeTypeDef = getPrismContext().getSchemaRegistry().findObjectDefinitionByCompileTimeClass(NodeType.class); PrismObject<NodeType> nodePrism; try { nodePrism = nodeTypeDef.instantiate(); } catch (SchemaException e) { throw new SystemException(e.getMessage(), e); } NodeType node = nodePrism.asObjectable(); node.setNodeIdentifier(configuration.getNodeId()); node.setName(new PolyStringType(configuration.getNodeId())); node.setHostname(getMyAddress()); node.setJmxPort(configuration.getJmxPort()); node.setClustered(configuration.isClustered()); node.setRunning(true); node.setLastCheckInTime(getCurrentTime()); node.setBuild(getBuildInformation()); generateInternalNodeIdentifier(node); return nodePrism; } private BuildInformationType getBuildInformation() { BuildInformationType info = new BuildInformationType(); ResourceBundle bundle = ResourceBundle.getBundle(SchemaConstants.SCHEMA_LOCALIZATION_PROPERTIES_RESOURCE_BASE_PATH, Locale.getDefault()); info.setVersion(bundle.getString("midPointVersion")); info.setRevision(bundle.getString("midPointRevision")); return info; } /** * Generates an identifier that is used to ensure that this Node object is not (by mistake) overwritten * by another node in cluster. ClusterManager thread periodically checks if this identifier has not been changed. * * @param node */ private void generateInternalNodeIdentifier(NodeType node) { String id = node.getNodeIdentifier() + ":" + node.getJmxPort() + ":" + Math.round(Math.random() * 10000000000000.0); LOGGER.trace("internal node identifier generated: " + id); node.setInternalNodeIdentifier(id); } private XMLGregorianCalendar getCurrentTime() { try { // AFAIK the DatatypeFactory is not thread safe, so we have to create an instance every time return DatatypeFactory.newInstance().newXMLGregorianCalendar(new GregorianCalendar()); } catch (DatatypeConfigurationException e) { // this should not happen throw new SystemException("Cannot create DatatypeFactory (to create XMLGregorianCalendar instance).", e); } } private PropertyDelta<XMLGregorianCalendar> createCheckInTimeDelta() { return PropertyDelta.createReplaceDelta(nodePrism.getDefinition(), NodeType.F_LAST_CHECK_IN_TIME, getCurrentTime()); } /** * Removes current node from the repository (currently not used; recordNodeShutdown is used instead). * * @param result */ @Deprecated void removeNodeObject(OperationResult result) { String oid = nodePrism.getOid(); String name = nodePrism.asObjectable().getNodeIdentifier(); LOGGER.trace("Removing this node from the repository (name {}, oid {})", name, oid); try { getRepositoryService().deleteObject(NodeType.class, oid, result); LOGGER.trace("Node successfully unregistered (removed)."); } catch (ObjectNotFoundException e) { LoggingUtils.logException(LOGGER, "Cannot unregister (remove) this node (name {}, oid {}), because it does not exist.", e, name, oid); } } /** * Registers the node going down (sets running attribute to false). * * @param result */ void recordNodeShutdown(OperationResult result) { LOGGER.trace("Registering this node shutdown (name {}, oid {})", nodePrism.asObjectable().getName(), nodePrism.getOid()); List<PropertyDelta<?>> modifications = new ArrayList<PropertyDelta<?>>(); modifications.add(PropertyDelta.createReplaceDelta(nodePrism.getDefinition(), NodeType.F_RUNNING, false)); modifications.add(createCheckInTimeDelta()); try { getRepositoryService().modifyObject(NodeType.class, nodePrism.getOid(), modifications, result); LOGGER.trace("Node shutdown successfully registered."); } catch (ObjectNotFoundException e) { LoggingUtils.logException(LOGGER, "Cannot register shutdown of this node (name {}, oid {}), because it does not exist.", e, nodePrism.asObjectable().getName(), nodePrism.getOid()); // we do not set error flag here, because we hope that on a node startup the registration would (perhaps) succeed } catch (ObjectAlreadyExistsException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot register shutdown of this node (name {}, oid {}).", e, nodePrism.asObjectable().getName(), nodePrism.getOid()); } catch (SchemaException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot register shutdown of this node (name {}, oid {}) due to schema exception.", e, nodePrism.asObjectable().getName(), nodePrism.getOid()); } } /** * Updates registration of this node (runs periodically within ClusterManager thread). * * @param result */ void updateNodeObject(OperationResult result) { LOGGER.trace("Updating this node registration (name {}, oid {})", nodePrism.asObjectable().getName(), nodePrism.getOid()); List<PropertyDelta<?>> modifications = new ArrayList<PropertyDelta<?>>(); modifications.add(PropertyDelta.createReplaceDelta(nodePrism.getDefinition(), NodeType.F_HOSTNAME, getMyAddress())); modifications.add(createCheckInTimeDelta()); try { getRepositoryService().modifyObject(NodeType.class, nodePrism.getOid(), modifications, result); LOGGER.trace("Node registration successfully updated."); } catch (ObjectNotFoundException e) { LoggingUtils.logException(LOGGER, "Cannot update registration of this node (name {}, oid {}), because it does not exist in repository. It is probably caused by cluster misconfiguration (other node rewriting the Node object?) Stopping the scheduler.", e, nodePrism.asObjectable().getName(), nodePrism.getOid()); if (taskManager.getLocalNodeErrorStatus() == NodeErrorStatusType.OK) { registerNodeError(NodeErrorStatusType.NODE_REGISTRATION_FAILED); } } catch (ObjectAlreadyExistsException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot update registration of this node (name {}, oid {}).", e, nodePrism.asObjectable().getName(), nodePrism.getOid()); if (taskManager.getLocalNodeErrorStatus() == NodeErrorStatusType.OK) { registerNodeError(NodeErrorStatusType.NODE_REGISTRATION_FAILED); } } catch (SchemaException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot update registration of this node (name {}, oid {}) due to schema exception. Stopping the scheduler.", e, nodePrism.asObjectable().getName(), nodePrism.getOid()); if (taskManager.getLocalNodeErrorStatus() == NodeErrorStatusType.OK) { registerNodeError(NodeErrorStatusType.NODE_REGISTRATION_FAILED); } } } /** * Checks whether this Node object was not overwritten by another node (implying there is duplicate node ID in cluster). * * @param result */ void verifyNodeObject(OperationResult result) { PrismObject<NodeType> nodeInRepo; String oid = nodePrism.getOid(); PolyStringType myName = nodePrism.asObjectable().getName(); LOGGER.trace("Verifying node record with OID = " + oid); // first, let us check the record of this node - whether it exists and whether the internalNodeIdentifier is OK try { nodeInRepo = getRepositoryService().getObject(NodeType.class, oid, null, result); } catch (ObjectNotFoundException e) { if (doesNodeExist(result, myName)) { LoggingUtils.logException(LOGGER, "The record of this node cannot be read (OID {} not found), but " + "another node record with the name '{}' exists. It seems that in this cluster " + "there are two or more nodes with the same name '{}'. Stopping the scheduler " + "to minimize the damage.", e, oid, myName, myName); registerNodeError(NodeErrorStatusType.DUPLICATE_NODE_ID_OR_NAME); return; } else { LoggingUtils.logException(LOGGER, "The record of this node cannot be read (OID {} not found). It " + "seems it was deleted in the meantime. Please check the reason. Stopping the scheduler " + "to minimize the damage.", e, oid, myName, myName); // actually we could re-register the node, but it is safer (and easier for now :) to stop the node instead registerNodeError(NodeErrorStatusType.NODE_REGISTRATION_FAILED); return; } } catch (SchemaException e) { LoggingUtils.logUnexpectedException(LOGGER, "Cannot check the record of this node (OID = {}) because of schema exception. Stopping the scheduler.", e, oid); registerNodeError(NodeErrorStatusType.NODE_REGISTRATION_FAILED); return; } // check the internalNodeIdentifier String existingId = nodePrism.asObjectable().getInternalNodeIdentifier(); String idInRepo = nodeInRepo.asObjectable().getInternalNodeIdentifier(); if (!existingId.equals(idInRepo)) { LOGGER.error("Internal node identifier has been overwritten in the repository. " + "Probably somebody has overwritten it in the meantime, i.e. another node with the name of '" + nodePrism.asObjectable().getName() + "' is running. Stopping the scheduler."); registerNodeError(NodeErrorStatusType.DUPLICATE_NODE_ID_OR_NAME); return; } } /** * There may be either exactly one non-clustered node (and no other nodes), or clustered nodes only. * @param result */ public void checkNonClusteredNodes(OperationResult result) { LOGGER.trace("Checking non-clustered nodes."); List<String> clustered = new ArrayList<String>(); List<String> nonClustered = new ArrayList<String>(); List<PrismObject<NodeType>> allNodes = clusterManager.getAllNodes(result); for (PrismObject<NodeType> nodePrism : allNodes) { NodeType n = nodePrism.asObjectable(); if (isUp(n)) { if (n.isClustered()) { clustered.add(n.getNodeIdentifier()); } else { nonClustered.add(n.getNodeIdentifier()); } } } LOGGER.trace("Clustered nodes: " + clustered); LOGGER.trace("Non-clustered nodes: " + nonClustered); int all = clustered.size() + nonClustered.size(); if (!taskManager.getConfiguration().isClustered() && all > 1) { LOGGER.error("This node is a non-clustered one, mixed with other nodes. In this system, there are " + nonClustered.size() + " non-clustered nodes (" + nonClustered + ") and " + clustered.size() + " clustered ones (" + clustered + "). Stopping this node."); registerNodeError(NodeErrorStatusType.NON_CLUSTERED_NODE_WITH_OTHERS); } } boolean isUp(NodeType n) { return n.isRunning() && n.getLastCheckInTime() != null && (System.currentTimeMillis() - n.getLastCheckInTime().toGregorianCalendar().getTimeInMillis()) <= (taskManager.getConfiguration().getNodeTimeout() * 1000L); } private boolean doesNodeExist(OperationResult result, PolyStringType myName) { try { List<PrismObject<NodeType>> nodes = findNodesWithGivenName(result, myName); return nodes != null && !nodes.isEmpty(); } catch (SchemaException e) { LoggingUtils.logUnexpectedException(LOGGER, "Existence of a Node cannot be checked due to schema exception.", e); return false; } } private List<PrismObject<NodeType>> findNodesWithGivenName(OperationResult result, PolyStringType name) throws SchemaException { ObjectQuery q = ObjectQueryUtil.createOrigNameQuery(name, getPrismContext()); // ObjectQuery q = ObjectQuery.createObjectQuery(EqualsFilter.createEqual(NodeType.F_NAME, NodeType.class, getPrismContext(), null, name)); return getRepositoryService().searchObjects(NodeType.class, q, null, result); } /** * Sets node error status and shuts down the scheduler (used when an error occurs after initialization). * * @param status Error status to be set. */ private void registerNodeError(NodeErrorStatusType status) { taskManager.setNodeErrorStatus(status); if (taskManager.getServiceThreadsActivationState()) { taskManager.getExecutionManager().stopSchedulerAndTasksLocally(0L, new OperationResult("nodeError")); } taskManager.getExecutionManager().shutdownLocalSchedulerChecked(); LOGGER.warn("Scheduler stopped, please check your cluster configuration as soon as possible; kind of error = " + status); } private String getMyAddress() { if (taskManager.getConfiguration().getJmxHostName() != null) { return taskManager.getConfiguration().getJmxHostName(); } else { try { InetAddress address = InetAddress.getLocalHost(); return address.getHostAddress(); } catch (UnknownHostException e) { LoggingUtils.logException(LOGGER, "Cannot get local IP address", e); return "unknown-host"; } } } public PrismObject<NodeType> getNodePrism() { return nodePrism; } public String getNodeId() { return nodePrism.asObjectable().getNodeIdentifier(); } public boolean isCurrentNode(PrismObject<NodeType> node) { return getNodeId().equals(node.asObjectable().getNodeIdentifier()); } boolean isCurrentNode(String nodeIdentifier) { return nodeIdentifier == null || getNodeId().equals(nodeIdentifier); } private RepositoryService getRepositoryService() { return taskManager.getRepositoryService(); } private PrismContext getPrismContext() { return taskManager.getPrismContext(); } public void deleteNode(String nodeOid, OperationResult parentResult) throws SchemaException, ObjectNotFoundException { OperationResult result = parentResult.createSubresult(NodeRegistrar.class.getName() + ".deleteNode"); result.addParam("nodeOid", nodeOid); PrismObject<NodeType> nodePrism = clusterManager.getNode(nodeOid, result); if (isUp(nodePrism.asObjectable())) { result.recordFatalError("Node " + nodeOid + " cannot be deleted, because it is currently up."); } else { try { taskManager.getRepositoryService().deleteObject(NodeType.class, nodePrism.getOid(), result); result.recordSuccess(); } catch (ObjectNotFoundException e) { throw new SystemException("Unexpected ObjectNotFoundException when deleting a node", e); } } } }