/* * RHQ Management Platform * Copyright (C) 2005-2008 Red Hat, Inc. * All rights reserved. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation version 2 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ package org.rhq.enterprise.server.cloud; import java.util.List; import javax.ejb.EJB; import javax.ejb.Stateless; import javax.persistence.EntityManager; import javax.persistence.NoResultException; import javax.persistence.PersistenceContext; import javax.persistence.Query; import javax.persistence.TypedQuery; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.rhq.core.domain.auth.Subject; import org.rhq.core.domain.authz.Permission; import org.rhq.core.domain.cloud.FailoverListDetails; import org.rhq.core.domain.cloud.PartitionEventType; import org.rhq.core.domain.cloud.Server; import org.rhq.core.domain.cloud.composite.ServerWithAgentCountComposite; import org.rhq.core.domain.criteria.ServerCriteria; import org.rhq.core.domain.resource.Agent; import org.rhq.core.domain.server.PersistenceUtility; import org.rhq.core.domain.util.PageControl; import org.rhq.core.domain.util.PageList; import org.rhq.enterprise.server.RHQConstants; import org.rhq.enterprise.server.authz.AuthorizationManagerLocal; import org.rhq.enterprise.server.authz.RequiredPermission; import org.rhq.enterprise.server.authz.RequiredPermissions; import org.rhq.enterprise.server.cloud.instance.ServerManagerLocal; import org.rhq.enterprise.server.util.CriteriaQueryGenerator; import org.rhq.enterprise.server.util.CriteriaQueryRunner; import org.rhq.enterprise.server.util.LookupUtil; /** * This class manages and reports information about the RHQ Server Cloud as a whole. * It does not discern which server is which, and can be called from any server in * the cloud and will operate identically the same results. * * @author Joseph Marques */ @Stateless public class TopologyManagerBean implements TopologyManagerLocal { private final Log log = LogFactory.getLog(TopologyManagerBean.class); // A time sufficient to determine whether a server is down. Can be based on the initial delay set for the server instance // job updating the server mtimes. See StartupServlet. private static final long SERVER_DOWN_INTERVAL = 1000L * 2 * 60; @PersistenceContext(unitName = RHQConstants.PERSISTENCE_UNIT_NAME) private EntityManager entityManager; @EJB private TopologyManagerLocal topologyManager; @EJB private FailoverListManagerLocal failoverListManager; @EJB private PartitionEventManagerLocal partitionEventManager; @EJB private AuthorizationManagerLocal authorizationManager; @EJB //@IgnoreDependency private ServerManagerLocal serverManager; public List<Agent> getAgentsByServerName(String serverName) { Server server = topologyManager.getServerByName(serverName); List<Agent> agents = server.getAgents(); agents.size(); // iterating over this collection out of a transactional boundaries will throw LazyInitExceptions return agents; } public Server getServerById(int serverId) { Server server = entityManager.find(Server.class, serverId); return server; } public Server getServerByName(String serverName) { Query query = entityManager.createNamedQuery(Server.QUERY_FIND_BY_NAME); query.setParameter("name", serverName); try { Server server = (Server) query.getSingleResult(); return server; } catch (NoResultException nre) { log.info("Server[name=" + serverName + "] not found, returning null..."); return null; } } @SuppressWarnings("unchecked") public List<Server> getAllCloudServers() { Query query = entityManager.createNamedQuery(Server.QUERY_FIND_ALL_CLOUD_MEMBERS); List<Server> results = query.getResultList(); return results; } @SuppressWarnings("unchecked") public List<Server> getAllServers() { Query query = entityManager.createNamedQuery(Server.QUERY_FIND_ALL); List<Server> results = query.getResultList(); return results; } @SuppressWarnings("unchecked") @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public PageList<ServerWithAgentCountComposite> getServerComposites(Subject subject, PageControl pc) { pc.initDefaultOrderingField("s.name"); Query query = PersistenceUtility.createQueryWithOrderBy(entityManager, Server.QUERY_FIND_ALL_COMPOSITES, pc); List<ServerWithAgentCountComposite> results = query.getResultList(); int count = getServerCount(); return new PageList<ServerWithAgentCountComposite>(results, count, pc); } public int getServerCount() { Query query = PersistenceUtility.createCountQuery(entityManager, Server.QUERY_FIND_ALL); try { long serverCount = (Long) query.getSingleResult(); return (int) serverCount; } catch (NoResultException nre) { log.debug("Could not get count of cloud instances, returning 0..."); return 0; } } public int getNormalServerCount() { Query query = PersistenceUtility.createCountQuery(entityManager, Server.QUERY_FIND_ALL_NORMAL_CLOUD_MEMBERS); try { long serverCount = (Long) query.getSingleResult(); return (int) serverCount; } catch (NoResultException nre) { log.debug("Could not get count of normal cloud instances, returning 0..."); return 0; } } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public void deleteServers(Subject subject, Integer[] serverIds) throws TopologyManagerException { if (serverIds == null) { return; } for (Integer nextServerId : serverIds) { topologyManager.deleteServer(subject, nextServerId); } } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public void deleteServer(Subject subject, Integer serverId) throws TopologyManagerException { try { Server server = entityManager.find(Server.class, serverId); if (Server.OperationMode.NORMAL == server.getOperationMode()) { throw new TopologyManagerException("Could not delete server " + server.getName() + ". Server must be down or in maintenance mode. Current operating mode is: " + server.getOperationMode().name()); } // Delete any server list entries referencing this server failoverListManager.deleteServerListDetailsForServer(serverId); // Delete any agent references to this server Query query = entityManager.createNamedQuery(Agent.QUERY_REMOVE_SERVER_REFERENCE); query.setParameter("serverId", serverId); query.executeUpdate(); // Then, delete the server query = entityManager.createNamedQuery(Server.QUERY_DELETE_BY_ID); query.setParameter("serverId", serverId); query.executeUpdate(); entityManager.flush(); entityManager.clear(); log.info("Removed server " + server); // Now, request a cloud repartitioning due to the server removal partitionEventManager.cloudPartitionEventRequest(LookupUtil.getSubjectManager().getOverlord(), PartitionEventType.SERVER_DELETION, server.getName()); } catch (Exception e) { throw new TopologyManagerException("Could not delete server[id=" + serverId + "]: " + e.getMessage(), e); } } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public void updateServerManualMaintenance(Subject subject, Integer[] serverIds, boolean manualMaintenance) { if (serverIds.length > 0) { try { for (Integer id : serverIds) { Server server = entityManager.find(Server.class, id); if (manualMaintenance) { server.addStatus(Server.Status.MANUAL_MAINTENANCE_MODE); } else { server.clearStatus(Server.Status.MANUAL_MAINTENANCE_MODE); } } } catch (Exception e) { log.debug("Failed to update HA server modes: " + e); } } } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public void updateServerMode(Subject subject, Integer[] serverIds, Server.OperationMode mode) { if (serverIds == null) { return; } if (mode == null) { throw new IllegalArgumentException("Mode cannot be null."); } if (!mode.isReadOnly()) { throw new IllegalArgumentException("Cannot directly set a mode that is not configurable. Mode " + mode.name() + " is not configurable."); } if (serverIds.length > 0) { try { for (Integer id : serverIds) { Server server = entityManager.find(Server.class, id); if (server.getOperationMode() == mode) { // ignore if there is no change continue; } // Audit servers being set to DOWN since the state change can't be reported any other way. Servers // be set to any other mode will be handled when the cloud job established the current operating mode. if (Server.OperationMode.DOWN == mode) { String audit = server.getName() + ": " + server.getOperationMode().name() + " --> " + mode; partitionEventManager.auditPartitionEvent(LookupUtil.getSubjectManager().getOverlord(), PartitionEventType.OPERATION_MODE_CHANGE, audit); } server.setOperationMode(mode); } } catch (Exception e) { log.debug("Failed to update HA server modes: " + e); } } } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public Server updateServer(Subject subject, Server server) { return entityManager.merge(server); } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public PageList<FailoverListDetails> getFailoverListDetailsByAgentId(Subject subject, int agentId, PageControl pc) { pc.initDefaultOrderingField("fld.ordinal"); Query query = PersistenceUtility.createQueryWithOrderBy(entityManager, FailoverListDetails.QUERY_GET_VIA_AGENT_ID_WITH_SERVERS, pc); Query countQuery = PersistenceUtility.createCountQuery(entityManager, FailoverListDetails.QUERY_GET_VIA_AGENT_ID); query.setParameter("agentId", agentId); countQuery.setParameter("agentId", agentId); @SuppressWarnings("unchecked") List<FailoverListDetails> list = query.getResultList(); long count = (Long) countQuery.getSingleResult(); return new PageList<FailoverListDetails>(list, (int) count, pc); } public void markStaleServersDown(Subject subject) { if (!authorizationManager.isOverlord(subject)) { throw new IllegalArgumentException("The markStaleServersDown method must be called by the overlord"); } long staleTime = System.currentTimeMillis() - SERVER_DOWN_INTERVAL; String serverName = null; try { serverName = serverManager.getIdentity(); if (log.isDebugEnabled()) { log.debug(serverName + " is marking stale servers DOWN"); } } catch (Exception e) { log.error("Could not determine which instance is marking stale servers DOWN"); } Query query = entityManager.createNamedQuery(Server.QUERY_UPDATE_SET_STALE_DOWN); query.setParameter("downMode", Server.OperationMode.DOWN); query.setParameter("normalMode", Server.OperationMode.NORMAL); query.setParameter("staleTime", staleTime); query.setParameter("thisServerName", serverName); // might be null int resultCount = query.executeUpdate(); if (log.isDebugEnabled()) { log.debug(String.valueOf(resultCount) + " stale servers were marked DOWN"); } // Perform requested partition events. Note that we only need to execute one cloud partition // regardless of the number of pending requests, as the work would be duplicated. partitionEventManager.processRequestedPartitionEvents(); } @RequiredPermission(Permission.MANAGE_SETTINGS) public PageList<Server> findServersByCriteria(Subject subject, ServerCriteria criteria) { CriteriaQueryGenerator generator = new CriteriaQueryGenerator(subject, criteria); CriteriaQueryRunner<Server> runner = new CriteriaQueryRunner<Server>(criteria, generator, entityManager); return runner.execute(); } @RequiredPermissions({ @RequiredPermission(Permission.MANAGE_SETTINGS), @RequiredPermission(Permission.MANAGE_INVENTORY) }) public Integer getResourceIdOfAgent(Subject subject, int agentId) { TypedQuery<Integer> query = entityManager.<Integer> createNamedQuery( Agent.QUERY_FIND_AGENT_RESOURCE_ID_AGENT_ID, Integer.class); query.setParameter("agentId", agentId); try { Integer resourceId = query.getSingleResult(); return resourceId; } catch (NoResultException nre) { return null; } } }