package org.ovirt.engine.core.notifier; import java.io.IOException; import java.net.HttpURLConnection; import java.net.MalformedURLException; import java.net.URL; import java.security.cert.CertificateException; import java.security.cert.X509Certificate; import java.sql.Connection; import java.sql.PreparedStatement; import java.sql.SQLException; import java.sql.Timestamp; import java.util.Date; import java.util.HashSet; import java.util.Set; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; import java.util.concurrent.TimeUnit; import javax.net.ssl.HostnameVerifier; import javax.net.ssl.HttpsURLConnection; import javax.net.ssl.SSLContext; import javax.net.ssl.SSLSocketFactory; import javax.net.ssl.TrustManager; import javax.net.ssl.X509TrustManager; import javax.sql.DataSource; import org.ovirt.engine.core.common.AuditLogSeverity; import org.ovirt.engine.core.common.AuditLogType; import org.ovirt.engine.core.notifier.utils.NotificationProperties; import org.ovirt.engine.core.notifier.utils.ShutdownHook; import org.ovirt.engine.core.utils.EngineLocalConfig; import org.ovirt.engine.core.utils.crypt.EngineEncryptionUtils; import org.ovirt.engine.core.utils.db.StandaloneDataSource; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Class uses to monitor the oVirt Engineanager service by sampling its health servlet. Upon response other than code 200, * will report to <i>audit_log</i> table upon ENGINE error. <br> * If a server state was change from non responsive to responsive, will report the status change. <br> * The monitor service is detached from the notification service, being executed as a separated thread, with different * execution rate. */ public class EngineMonitorService implements Runnable { private static final Logger log = LoggerFactory.getLogger(EngineMonitorService.class); private static final String ENGINE_NOT_RESPONDING_ERROR = "Engine server is not responding."; private static final String ENGINE_RESPONDING_MESSAGE = "Engine server is up and running."; private static final String HEALTH_SERVLET_PATH = "/services/health"; private DataSource ds; private NotificationProperties prop = null; private long serverMonitorTimeout; private URL serverUrl; private boolean isServerUp = true; private boolean repeatNonResponsiveNotification; private int serverMonitorRetries; private boolean isHttpsProtocol; private boolean sslIgnoreCertErrors; private SSLSocketFactory sslFactory = null; private boolean sslIgnoreHostVerification; private static final HostnameVerifier IgnoredHostnameVerifier = (hostname, session) -> true; /** * Creates {@code EngineMonitorService} by configuration element containing required properties. * @param notificationConf * notification configuration contains service properties */ public EngineMonitorService(NotificationProperties prop) throws NotificationServiceException { this.prop = prop; initConnectivity(); initServerConnectivity(); initServerMonitorInterval(); serverMonitorRetries = prop.getInteger(NotificationProperties.ENGINE_MONITOR_RETRIES); repeatNonResponsiveNotification = this.prop.getBoolean(NotificationProperties.REPEAT_NON_RESPONSIVE_NOTIFICATION); if (log.isDebugEnabled()) { log.debug("Checking server status using {}, {}ignoring SSL errors.", isHttpsProtocol ? "HTTPS" : "HTTP", sslIgnoreCertErrors ? "" : "without "); } } /** * Reads period for timeout between retries of querying server status. <br> * If property isn't configured, uses default as set on {@code DEFAULT_SERVER_MONITOR_TIMEOUT}, if property is * misconfigured, throws exception. */ private void initServerMonitorInterval() throws NotificationServiceException { long interval = prop.getLong(NotificationProperties.ENGINE_TIMEOUT_IN_SECONDS); if (interval < 0) { throw new NotificationServiceException(NotificationProperties.ENGINE_TIMEOUT_IN_SECONDS + " value must be a positive integer number"); } serverMonitorTimeout = TimeUnit.SECONDS.convert(interval, TimeUnit.MILLISECONDS); } /** * Initializes server connectivity settings: * <li> Resolves monitored server URL * <li> Sets protocol for connectivity (HTTP/HTTPS) and configures socket factories for SSL */ private void initServerConnectivity() throws NotificationServiceException { isHttpsProtocol = prop.getBoolean(NotificationProperties.IS_HTTPS_PROTOCOL); sslIgnoreCertErrors = prop.getBoolean(NotificationProperties.SSL_IGNORE_CERTIFICATE_ERRORS); sslIgnoreHostVerification = prop.getBoolean(NotificationProperties.SSL_IGNORE_HOST_VERIFICATION); // Setting SSL_IGNORE_HOST_VERIFICATION in configuration file implies that SSL certification errors should be // ignored as well sslIgnoreCertErrors = sslIgnoreHostVerification || sslIgnoreCertErrors; if (isHttpsProtocol) { initHttpsSettings(); } else if (sslIgnoreCertErrors || sslIgnoreHostVerification) { log.warn("Properties {} and {} are ignored, since property {} is not set.", NotificationProperties.SSL_IGNORE_CERTIFICATE_ERRORS, NotificationProperties.SSL_IGNORE_HOST_VERIFICATION, NotificationProperties.IS_HTTPS_PROTOCOL); } initServerUrl(); } /** * Initializes the SSL Socket Factory. Created SSL socket factory is determined by * {@code NotificationProperties.SSL_IGNORE_CERTIFICATE_ERRORS}. If set to true, creates dummy socket factory which * accept any request. If set to false or not set, creates SSL socket factory by trusted keystore defined on * vdc_options. */ private void initHttpsSettings() throws NotificationServiceException { if (sslIgnoreCertErrors) { createDummySSLSocketFactory(); } else { createConcreteSSLSocketFactory(); } } /** * Creates SSL Socket factory which is configured by the associated keystore which is configured the database, * provided by {@code ConfigValues.keystoreUrl} for its location and {@code ConfigValues.keystorePass} for its * password. */ private void createConcreteSSLSocketFactory() throws NotificationServiceException { try { String sslProtocol = prop.getProperty(NotificationProperties.SSL_PROTOCOL); SSLContext ctx = SSLContext.getInstance(sslProtocol); ctx.init(null, EngineEncryptionUtils.getTrustManagers(), null); sslFactory = ctx.getSocketFactory(); } catch (Exception e) { throw new NotificationServiceException("Failed to create SSL factory when running with SSL mode.", e); } } /** * Creates dummy SSL Socket Factory factory which should be used by setting 'true' to * {@code NotificationProperties.SSL_IGNORE_CERTIFICATE_ERRORS}. */ private void createDummySSLSocketFactory() throws NotificationServiceException { try { SSLContext sslContext = SSLContext.getInstance("TLS"); sslContext.init(null, new TrustManager[] { new X509TrustManager() { @Override public void checkClientTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { } @Override public void checkServerTrusted(X509Certificate[] arg0, String arg1) throws CertificateException { } @Override public X509Certificate[] getAcceptedIssuers() { return new X509Certificate[0]; } } }, null); sslFactory = sslContext.getSocketFactory(); } catch (Exception e) { throw new NotificationServiceException("Failed to create SSL factory with dummy truststore.", e); } } private void initServerUrl() throws NotificationServiceException { EngineLocalConfig config = EngineLocalConfig.getInstance(); try { if (isHttpsProtocol) { serverUrl = config.getExternalHttpsUrl(HEALTH_SERVLET_PATH); } else { serverUrl = config.getExternalHttpUrl(HEALTH_SERVLET_PATH); } log.info("Engine health servlet URL is \"{}\".", serverUrl); } catch (MalformedURLException exception) { throw new NotificationServiceException("Can't get engine health servlet URL.", exception); } } @Override public void run() { ShutdownHook shutdownHook = ShutdownHook.getInstance(); ScheduledExecutorService exec = Executors.newSingleThreadScheduledExecutor(); shutdownHook.addScheduledExecutorService(exec); shutdownHook.addServiceHandler( exec.scheduleWithFixedDelay( () -> mainLogic(), 1, prop.getLong(NotificationProperties.ENGINE_INTERVAL_IN_SECONDS), TimeUnit.SECONDS ) ); } /** * The service monitor the status of the JBoss server using its Health servlet */ private void mainLogic() { try { monitorEngineServerStatus(); } catch (Throwable e) { if (!Thread.interrupted()) { log.error("Error while trying to report engine server status", e); } // initialize server status if a dispatch failed to treat as new check for next iteration isServerUp = true; } } /** * Monitors the server status: attempts to query the server status for 3 times.<br> * Between attempts, waits for amount of seconds as defined on {@link #serverMonitorTimeout}.<br> * When 3 attempts exceed, */ private void monitorEngineServerStatus() { boolean isResponsive = false; Set<String> errors = new HashSet<>(); int retries = serverMonitorRetries; while (retries > 0) { retries--; try { isResponsive = checkServerStatus(errors); if (!isResponsive) { if (retries > 0) { Thread.sleep(serverMonitorTimeout); } } else { break; // server is up and health servlet returned HTTP_OK } } catch (InterruptedException e) { // ignore this error } catch (Exception e) { errors.add(e.getMessage()); } } // errors should contain distinct list of errors while trying to obtain server status if (errors.size() > 0) { log.error("Failed to get server status with: {}", errors); errors.clear(); } // analyzes server status and report if needed reportServerStatus(isResponsive); } /** * Analyzes server status and reports upon its status by configuration as needed:<br> * If compares the current server status to the latest one. <br> * if status was changed, adds an events to audit_log to represent the concrete event, else, <br> * if is a repetition of previous status, checks the {@link #repeatNonResponsiveNotification} flag to<br> * determine whether a user configured getting repeatable notifications or not. * @param isResponsive * current server status */ private void reportServerStatus(boolean isResponsive) { boolean statusChanged; boolean lastServerStatus = isServerUp; isServerUp = isResponsive; statusChanged = lastServerStatus ^ isResponsive; // reports for any server status change or in case of configure for repeatable notification if (statusChanged || repeatNonResponsiveNotification) { if (isResponsive) { // if server is up, report only if its status was changed from non responsive. if (statusChanged) { insertEventIntoAuditLogSafe(AuditLogType.VDC_START, AuditLogSeverity.NORMAL, ENGINE_RESPONDING_MESSAGE, "Failed auditing event down (for responsive server)."); } } else { // reports an error for non responsive server EngineLocalConfig config = EngineLocalConfig.getInstance(); if(config.getEngineUpMark().exists()) { // assumed crash, since engine up file is still there insertEventIntoAuditLogSafe(AuditLogType.VDC_STOP, AuditLogSeverity.ERROR, ENGINE_NOT_RESPONDING_ERROR, "Failed auditing event up (for crashed non responsive server)."); } else { insertEventIntoAuditLogSafe(AuditLogType.VDC_STOP, AuditLogSeverity.WARNING, ENGINE_NOT_RESPONDING_ERROR, "Failed auditing event up (for stopped non responsive server)."); } } } } private void insertEventIntoAuditLogSafe(AuditLogType eventType, AuditLogSeverity severity, String message, String logMessage) { try { insertEventIntoAuditLog(eventType.name(), eventType.getValue(), severity.getValue(), message); } catch (Exception e) { log.warn(message); log.error(logMessage, e); } } /** * Examines the status of the backend engine server * * @param serverUrl * the engine server url of Health Servlet * @param errors * collection which aggregates any error * @return true is engine server is responsive (response with code 200 - HTTP_OK), else false */ private boolean checkServerStatus(Set<String> errors) { boolean isResponsive = true; HttpURLConnection engineConn = null; try { engineConn = (HttpURLConnection) serverUrl.openConnection(); if (isHttpsProtocol) { ((HttpsURLConnection) engineConn).setSSLSocketFactory(sslFactory); if (sslIgnoreHostVerification) { ((HttpsURLConnection) engineConn).setHostnameVerifier(IgnoredHostnameVerifier); } } } catch (IOException e) { errors.add(e.getMessage()); isResponsive = false; } if (isResponsive) { try { int responseCode = engineConn.getResponseCode(); if (responseCode != HttpURLConnection.HTTP_OK) { isResponsive = false; log.debug("Server is non responsive with response code: {}", responseCode); } } catch (Exception e) { errors.add(e.getMessage()); isResponsive = false; } finally { if (engineConn != null) { engineConn.disconnect(); engineConn = null; } } } log.debug("checkServerStatus return: {}", isResponsive); return isResponsive; } /** * Adds an event to audit_log table, representing server status * @param eventType * {@code AuditLogType.VDC_START} or {@code AuditLogType.VDC_STOP} events * @param eventId * id associated with {@code eventType} parameter * @param severity * severity associated with eventType, values are taken from {@code AuditLogSeverity} * @param message * a comprehensive message describing the event */ private void insertEventIntoAuditLog(String eventType, int eventId, int severity, String message) throws SQLException { try (Connection connection = ds.getConnection(); PreparedStatement ps = connection.prepareStatement ("insert into audit_log(log_time, log_type_name , log_type, severity, message) values (?,?,?,?,?)")) { ps.setTimestamp(1, new Timestamp(new Date().getTime())); ps.setString(2, eventType); ps.setInt(3, eventId); ps.setInt(4, severity); ps.setString(5, message); ps.executeUpdate(); } } private void initConnectivity() throws NotificationServiceException { try { ds = new StandaloneDataSource(); } catch (SQLException exception) { throw new NotificationServiceException("Failed to obtain database connectivity", exception); } } }