/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.brooklyn.policy.ha;
import static org.testng.Assert.assertEquals;
import static org.testng.Assert.assertTrue;
import static org.testng.Assert.fail;
import java.util.List;
import java.util.concurrent.CopyOnWriteArrayList;
import org.apache.brooklyn.api.entity.EntitySpec;
import org.apache.brooklyn.api.mgmt.ManagementContext;
import org.apache.brooklyn.api.sensor.EnricherSpec;
import org.apache.brooklyn.api.sensor.Sensor;
import org.apache.brooklyn.api.sensor.SensorEvent;
import org.apache.brooklyn.api.sensor.SensorEventListener;
import org.apache.brooklyn.core.entity.Attributes;
import org.apache.brooklyn.core.entity.Entities;
import org.apache.brooklyn.core.entity.factory.ApplicationBuilder;
import org.apache.brooklyn.core.entity.lifecycle.Lifecycle;
import org.apache.brooklyn.core.entity.lifecycle.ServiceStateLogic;
import org.apache.brooklyn.core.entity.lifecycle.ServiceStateLogic.ServiceProblemsLogic;
import org.apache.brooklyn.core.test.entity.LocalManagementContextForTests;
import org.apache.brooklyn.core.test.entity.TestApplication;
import org.apache.brooklyn.core.test.entity.TestEntity;
import org.apache.brooklyn.test.Asserts;
import org.apache.brooklyn.test.EntityTestUtils;
import org.apache.brooklyn.util.collections.MutableMap;
import org.apache.brooklyn.util.time.Duration;
import org.apache.brooklyn.util.time.Time;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.testng.annotations.AfterMethod;
import org.testng.annotations.BeforeMethod;
import org.testng.annotations.Test;
import org.apache.brooklyn.policy.ha.HASensors.FailureDescriptor;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.collect.ImmutableMap;
public class ServiceFailureDetectorTest {
private static final Logger log = LoggerFactory.getLogger(ServiceFailureDetectorTest.class);
private static final int TIMEOUT_MS = 10*1000;
private ManagementContext managementContext;
private TestApplication app;
private TestEntity e1;
private List<SensorEvent<FailureDescriptor>> events;
private SensorEventListener<FailureDescriptor> eventListener;
@BeforeMethod(alwaysRun=true)
public void setUp() throws Exception {
events = new CopyOnWriteArrayList<SensorEvent<FailureDescriptor>>();
eventListener = new SensorEventListener<FailureDescriptor>() {
@Override public void onEvent(SensorEvent<FailureDescriptor> event) {
events.add(event);
}
};
managementContext = new LocalManagementContextForTests();
app = ApplicationBuilder.newManagedApp(TestApplication.class, managementContext);
e1 = app.createAndManageChild(EntitySpec.create(TestEntity.class));
e1.enrichers().add(ServiceStateLogic.newEnricherForServiceStateFromProblemsAndUp());
app.getManagementContext().getSubscriptionManager().subscribe(e1, HASensors.ENTITY_FAILED, eventListener);
app.getManagementContext().getSubscriptionManager().subscribe(e1, HASensors.ENTITY_RECOVERED, eventListener);
}
@AfterMethod(alwaysRun=true)
public void tearDown() throws Exception {
if (managementContext != null) Entities.destroyAll(managementContext);
}
@Test(groups="Integration") // Has a 1 second wait
public void testNotNotifiedOfFailuresForHealthy() throws Exception {
// Create members before and after the policy is registered, to test both scenarios
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
assertNoEventsContinually();
assertEquals(e1.getAttribute(TestEntity.SERVICE_STATE_ACTUAL), Lifecycle.RUNNING);
}
@Test
public void testNotifiedOfFailure() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
assertEquals(events.size(), 0, "events="+events);
e1.sensors().set(TestEntity.SERVICE_UP, false);
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
assertEquals(events.size(), 1, "events="+events);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
}
@Test
public void testNotifiedOfFailureOnProblem() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
assertEquals(events.size(), 0, "events="+events);
ServiceProblemsLogic.updateProblemsIndicator(e1, "test", "foo");
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
assertEquals(events.size(), 1, "events="+events);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
}
@Test
public void testNotifiedOfFailureOnStateOnFire() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.ON_FIRE);
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
assertEquals(events.size(), 1, "events="+events);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
}
@Test
public void testNotifiedOfRecovery() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
// Make the entity fail
e1.sensors().set(TestEntity.SERVICE_UP, false);
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
// And make the entity recover
e1.sensors().set(TestEntity.SERVICE_UP, true);
assertHasEventEventually(HASensors.ENTITY_RECOVERED, Predicates.<Object>equalTo(e1), null);
assertEquals(events.size(), 2, "events="+events);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
}
@Test
public void testNotifiedOfRecoveryFromProblems() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
// Make the entity fail
ServiceProblemsLogic.updateProblemsIndicator(e1, "test", "foo");
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
// And make the entity recover
ServiceProblemsLogic.clearProblemsIndicator(e1, "test");
assertHasEventEventually(HASensors.ENTITY_RECOVERED, Predicates.<Object>equalTo(e1), null);
assertEquals(events.size(), 2, "events="+events);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
}
@Test(groups="Integration") // Has a 1 second wait
public void testEmitsEntityFailureOnlyIfPreviouslyUp() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
// Make the entity fail
e1.sensors().set(TestEntity.SERVICE_UP, false);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
assertNoEventsContinually();
}
@Test
public void testDisablingPreviouslyUpRequirementForEntityFailed() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.ENTITY_FAILED_ONLY_IF_PREVIOUSLY_UP, false));
e1.sensors().set(TestEntity.SERVICE_UP, false);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
}
@Test
public void testDisablingOnFire() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.SERVICE_ON_FIRE_STABILIZATION_DELAY, Duration.PRACTICALLY_FOREVER));
// Make the entity fail
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
e1.sensors().set(TestEntity.SERVICE_UP, false);
assertEquals(e1.getAttribute(TestEntity.SERVICE_STATE_ACTUAL), Lifecycle.RUNNING);
}
@Test(groups="Integration") // Has a 1 second wait
public void testOnFireAfterDelay() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.SERVICE_ON_FIRE_STABILIZATION_DELAY, Duration.ONE_SECOND));
// Make the entity fail
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
e1.sensors().set(TestEntity.SERVICE_UP, false);
assertEquals(e1.getAttribute(TestEntity.SERVICE_STATE_ACTUAL), Lifecycle.RUNNING);
Time.sleep(Duration.millis(100));
assertEquals(e1.getAttribute(TestEntity.SERVICE_STATE_ACTUAL), Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
}
@Test(groups="Integration") // Has a 1 second wait
public void testOnFailureDelayFromProblemAndRecover() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.SERVICE_ON_FIRE_STABILIZATION_DELAY, Duration.ONE_SECOND)
.configure(ServiceFailureDetector.ENTITY_RECOVERED_STABILIZATION_DELAY, Duration.ONE_SECOND));
// Set the entity to healthy
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
// Make the entity fail; won't set on-fire for 1s but will publish FAILED immediately.
ServiceStateLogic.ServiceProblemsLogic.updateProblemsIndicator(e1, "test", "foo");
EntityTestUtils.assertAttributeEqualsContinually(ImmutableMap.of("timeout", 100), e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
assertEquals(e1.getAttribute(TestEntity.SERVICE_STATE_ACTUAL), Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
// Now recover: will publish RUNNING immediately, but has 1s stabilisation for RECOVERED
ServiceStateLogic.ServiceProblemsLogic.clearProblemsIndicator(e1, "test");
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
assertEquals(events.size(), 1, "events="+events);
assertHasEventEventually(HASensors.ENTITY_RECOVERED, Predicates.<Object>equalTo(e1), null);
assertEquals(events.size(), 2, "events="+events);
}
@Test(groups="Integration") // Has a 1 second wait
public void testAttendsToServiceState() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
e1.sensors().set(TestEntity.SERVICE_UP, true);
// not counted as failed because not expected to be running
e1.sensors().set(TestEntity.SERVICE_UP, false);
assertNoEventsContinually();
}
@Test(groups="Integration") // Has a 1 second wait
public void testOnlyReportsFailureIfRunning() throws Exception {
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class));
// Make the entity fail
ServiceStateLogic.setExpectedState(e1, Lifecycle.STARTING);
e1.sensors().set(TestEntity.SERVICE_UP, true);
e1.sensors().set(TestEntity.SERVICE_UP, false);
assertNoEventsContinually();
}
@Test
public void testReportsFailureWhenAlreadyDownOnRegisteringPolicy() throws Exception {
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
e1.sensors().set(TestEntity.SERVICE_UP, false);
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.ENTITY_FAILED_ONLY_IF_PREVIOUSLY_UP, false));
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
}
@Test
public void testReportsFailureWhenAlreadyOnFireOnRegisteringPolicy() throws Exception {
ServiceStateLogic.setExpectedState(e1, Lifecycle.ON_FIRE);
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.ENTITY_FAILED_ONLY_IF_PREVIOUSLY_UP, false));
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
}
@Test(groups="Integration") // Has a 1.5 second wait
public void testRepublishedFailure() throws Exception {
Duration republishPeriod = Duration.millis(100);
e1.enrichers().add(EnricherSpec.create(ServiceFailureDetector.class)
.configure(ServiceFailureDetector.ENTITY_FAILED_REPUBLISH_TIME, republishPeriod));
// Set the entity to healthy
e1.sensors().set(TestEntity.SERVICE_UP, true);
ServiceStateLogic.setExpectedState(e1, Lifecycle.RUNNING);
EntityTestUtils.assertAttributeEqualsEventually(e1, Attributes.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
// Make the entity fail;
ServiceStateLogic.ServiceProblemsLogic.updateProblemsIndicator(e1, "test", "foo");
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.ON_FIRE);
assertHasEventEventually(HASensors.ENTITY_FAILED, Predicates.<Object>equalTo(e1), null);
//wait for at least 10 republish events (~1 sec)
assertEventsSizeEventually(10);
// Now recover
ServiceStateLogic.ServiceProblemsLogic.clearProblemsIndicator(e1, "test");
EntityTestUtils.assertAttributeEqualsEventually(e1, TestEntity.SERVICE_STATE_ACTUAL, Lifecycle.RUNNING);
assertHasEventEventually(HASensors.ENTITY_RECOVERED, Predicates.<Object>equalTo(e1), null);
//once recovered check no more failed events emitted periodically
assertEventsSizeContiniually(events.size());
SensorEvent<FailureDescriptor> prevEvent = null;
for (SensorEvent<FailureDescriptor> event : events) {
if (prevEvent != null) {
long repeatOffset = event.getTimestamp() - prevEvent.getTimestamp();
long deviation = Math.abs(repeatOffset - republishPeriod.toMilliseconds());
if (deviation > republishPeriod.toMilliseconds()/10 &&
//warn only if recovered is too far away from the last failure
(!event.getSensor().equals(HASensors.ENTITY_RECOVERED) ||
repeatOffset > republishPeriod.toMilliseconds())) {
log.error("The time between failure republish (" + repeatOffset + "ms) deviates too much from the expected " + republishPeriod + ". prevEvent=" + prevEvent + ", event=" + event);
}
}
prevEvent = event;
}
//make sure no republish takes place after recovered
assertEquals(prevEvent.getSensor(), HASensors.ENTITY_RECOVERED);
}
private void assertEventsSizeContiniually(final int size) {
Asserts.succeedsContinually(MutableMap.of("timeout", 500), new Runnable() {
@Override
public void run() {
assertTrue(events.size() == size, "assertEventsSizeContiniually expects " + size + " events but found " + events.size() + ": " + events);
}
});
}
private void assertEventsSizeEventually(final int size) {
Asserts.succeedsEventually(MutableMap.of("timeout", TIMEOUT_MS), new Runnable() {
@Override
public void run() {
assertTrue(events.size() >= size, "assertEventsSizeContiniually expects at least " + size + " events but found " + events.size() + ": " + events);
}
});
}
private void assertHasEvent(Sensor<?> sensor, Predicate<Object> componentPredicate, Predicate<? super CharSequence> descriptionPredicate) {
for (SensorEvent<FailureDescriptor> event : events) {
if (event.getSensor().equals(sensor) &&
(componentPredicate == null || componentPredicate.apply(event.getValue().getComponent())) &&
(descriptionPredicate == null || descriptionPredicate.apply(event.getValue().getDescription()))) {
return;
}
}
fail("No matching "+sensor+" event found; events="+events);
}
private void assertHasEventEventually(final Sensor<?> sensor, final Predicate<Object> componentPredicate, final Predicate<? super CharSequence> descriptionPredicate) {
Asserts.succeedsEventually(MutableMap.of("timeout", TIMEOUT_MS), new Runnable() {
@Override public void run() {
assertHasEvent(sensor, componentPredicate, descriptionPredicate);
}});
}
private void assertNoEventsContinually() {
Asserts.succeedsContinually(new Runnable() {
@Override public void run() {
assertTrue(events.isEmpty(), "events="+events);
}});
}
}