/** * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.aurora.scheduler.app; import java.io.File; import java.net.InetSocketAddress; import java.util.Arrays; import java.util.Set; import java.util.concurrent.CountDownLatch; import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicReference; import com.google.common.base.Optional; import com.google.common.base.Throwables; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Iterables; import com.google.common.hash.Hashing; import com.google.common.net.InetAddresses; import com.google.common.util.concurrent.Atomics; import com.google.common.util.concurrent.MoreExecutors; import com.google.common.util.concurrent.ThreadFactoryBuilder; import com.google.inject.AbstractModule; import com.google.inject.Guice; import com.google.inject.Injector; import com.google.inject.Key; import com.google.inject.Module; import org.apache.aurora.GuavaUtils; import org.apache.aurora.codec.ThriftBinaryCodec.CodingException; import org.apache.aurora.common.application.Lifecycle; import org.apache.aurora.common.stats.Stats; import org.apache.aurora.common.zookeeper.Credentials; import org.apache.aurora.common.zookeeper.ServerSetImpl; import org.apache.aurora.common.zookeeper.ZooKeeperClient; import org.apache.aurora.common.zookeeper.testing.BaseZooKeeperClientTest; import org.apache.aurora.gen.HostAttributes; import org.apache.aurora.gen.MaintenanceMode; import org.apache.aurora.gen.ScheduleStatus; import org.apache.aurora.gen.ScheduledTask; import org.apache.aurora.gen.ServerInfo; import org.apache.aurora.gen.storage.LogEntry; import org.apache.aurora.gen.storage.Op; import org.apache.aurora.gen.storage.SaveFrameworkId; import org.apache.aurora.gen.storage.SaveTasks; import org.apache.aurora.gen.storage.Snapshot; import org.apache.aurora.gen.storage.Transaction; import org.apache.aurora.gen.storage.storageConstants; import org.apache.aurora.scheduler.AppStartup; import org.apache.aurora.scheduler.TierModule; import org.apache.aurora.scheduler.base.TaskTestUtil; import org.apache.aurora.scheduler.configuration.executor.ExecutorSettings; import org.apache.aurora.scheduler.discovery.ServiceDiscoveryModule; import org.apache.aurora.scheduler.discovery.ZooKeeperConfig; import org.apache.aurora.scheduler.log.Log; import org.apache.aurora.scheduler.log.Log.Entry; import org.apache.aurora.scheduler.log.Log.Position; import org.apache.aurora.scheduler.log.Log.Stream; import org.apache.aurora.scheduler.mesos.DriverFactory; import org.apache.aurora.scheduler.mesos.DriverSettings; import org.apache.aurora.scheduler.mesos.FrameworkInfoFactory; import org.apache.aurora.scheduler.mesos.TestExecutorSettings; import org.apache.aurora.scheduler.storage.backup.BackupModule; import org.apache.aurora.scheduler.storage.entities.IHostAttributes; import org.apache.aurora.scheduler.storage.entities.IScheduledTask; import org.apache.aurora.scheduler.storage.entities.IServerInfo; import org.apache.aurora.scheduler.storage.log.EntrySerializer; import org.apache.aurora.scheduler.storage.log.LogStorageModule; import org.apache.aurora.scheduler.storage.log.SnapshotStoreImpl; import org.apache.aurora.scheduler.storage.log.testing.LogOpMatcher; import org.apache.aurora.scheduler.storage.log.testing.LogOpMatcher.StreamMatcher; import org.apache.mesos.Protos; import org.apache.mesos.Scheduler; import org.apache.mesos.SchedulerDriver; import org.apache.mesos.v1.Protos.FrameworkInfo; import org.apache.mesos.v1.Protos.Resource; import org.easymock.Capture; import org.easymock.EasyMock; import org.easymock.IMocksControl; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.rules.TemporaryFolder; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import static org.apache.aurora.common.testing.easymock.EasyMockTest.createCapture; import static org.apache.aurora.scheduler.resources.ResourceTestUtil.mesosScalar; import static org.apache.aurora.scheduler.resources.ResourceType.CPUS; import static org.apache.aurora.scheduler.resources.ResourceType.RAM_MB; import static org.easymock.EasyMock.capture; import static org.easymock.EasyMock.createControl; import static org.easymock.EasyMock.eq; import static org.easymock.EasyMock.expect; import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; public class SchedulerIT extends BaseZooKeeperClientTest { private static final Logger LOG = LoggerFactory.getLogger(SchedulerIT.class); private static final String CLUSTER_NAME = "integration_test_cluster"; private static final String SERVERSET_PATH = "/fake/service/path"; private static final String STATS_URL_PREFIX = "fake_url"; private static final String FRAMEWORK_ID = "integration_test_framework_id"; private static final Protos.MasterInfo MASTER = Protos.MasterInfo.newBuilder() .setId("master-id") .setIp(InetAddresses.coerceToInteger(InetAddresses.forString("1.2.3.4"))) //NOPMD .setPort(5050).build(); private static final IHostAttributes HOST_ATTRIBUTES = IHostAttributes.build(new HostAttributes() .setHost("host") .setSlaveId("slave-id") .setMode(MaintenanceMode.NONE) .setAttributes(ImmutableSet.of())); private static final FrameworkInfo BASE_INFO = FrameworkInfo.newBuilder() .setUser("framework user") .setName("test framework") .build(); private static final DriverSettings SETTINGS = new DriverSettings( "fakemaster", Optional.absent()); private final ExecutorService executor = Executors.newCachedThreadPool( new ThreadFactoryBuilder().setNameFormat("SchedulerIT-%d").setDaemon(true).build()); private final AtomicReference<Optional<RuntimeException>> mainException = Atomics.newReference(Optional.absent()); private IMocksControl control; private SchedulerDriver driver; private DriverFactory driverFactory; private Log log; private Stream logStream; private StreamMatcher streamMatcher; private EntrySerializer entrySerializer; private ZooKeeperClient zkClient; private File backupDir; @Rule public TemporaryFolder temporaryFolder = new TemporaryFolder(); @Before public void mySetUp() throws Exception { control = createControl(); addTearDown(() -> { if (mainException.get().isPresent()) { RuntimeException e = mainException.get().get(); LOG.error("Scheduler main exited with an exception", e); fail(e.getMessage()); } control.verify(); }); backupDir = temporaryFolder.newFolder(); driver = control.createMock(SchedulerDriver.class); // This is necessary to allow driver to block, otherwise it would stall other mocks. EasyMock.makeThreadSafe(driver, false); driverFactory = control.createMock(DriverFactory.class); log = control.createMock(Log.class); logStream = control.createMock(Stream.class); streamMatcher = LogOpMatcher.matcherFor(logStream); entrySerializer = new EntrySerializer.EntrySerializerImpl( LogStorageModule.MAX_LOG_ENTRY_SIZE.get(), Hashing.md5()); zkClient = createZkClient(); } private void startScheduler() throws Exception { // TODO(wfarner): Try to accomplish all this by subclassing SchedulerMain and actually using // AppLauncher. Module testModule = new AbstractModule() { @Override protected void configure() { bind(DriverFactory.class).toInstance(driverFactory); bind(FrameworkInfoFactory.class).toInstance(() -> BASE_INFO); bind(DriverSettings.class).toInstance(SETTINGS); bind(Log.class).toInstance(log); Set<Resource> overhead = ImmutableSet.of( mesosScalar(CPUS, 0.1), mesosScalar(RAM_MB, 1)); bind(ExecutorSettings.class) .toInstance(TestExecutorSettings.thermosOnlyWithOverhead(overhead)); install(new BackupModule(backupDir, SnapshotStoreImpl.class)); bind(IServerInfo.class).toInstance( IServerInfo.build( new ServerInfo() .setClusterName(CLUSTER_NAME) .setStatsUrlPrefix(STATS_URL_PREFIX))); } }; ZooKeeperConfig zkClientConfig = ZooKeeperConfig.create( true, // useCurator ImmutableList.of(InetSocketAddress.createUnresolved("localhost", getPort()))) .withCredentials(Credentials.digestCredentials("mesos", "mesos")); SchedulerMain main = SchedulerMain.class.newInstance(); Injector injector = Guice.createInjector( ImmutableList.<Module>builder() .add(SchedulerMain.getUniversalModule()) .add(new TierModule(TaskTestUtil.TIER_CONFIG)) .add(new LogStorageModule()) .add(new ServiceDiscoveryModule(zkClientConfig, SERVERSET_PATH)) .add(testModule) .build() ); injector.injectMembers(main); Lifecycle lifecycle = injector.getInstance(Lifecycle.class); executor.submit(() -> { try { main.run(); } catch (RuntimeException e) { mainException.set(Optional.of(e)); executor.shutdownNow(); } }); addTearDown(() -> { lifecycle.shutdown(); MoreExecutors.shutdownAndAwaitTermination(executor, 10, TimeUnit.SECONDS); }); injector.getInstance(Key.get(GuavaUtils.ServiceManagerIface.class, AppStartup.class)) .awaitHealthy(); } private void awaitSchedulerReady() throws Exception { executor.submit(() -> { ServerSetImpl schedulerService = new ServerSetImpl(zkClient, SERVERSET_PATH); final CountDownLatch schedulerReady = new CountDownLatch(1); schedulerService.watch(hostSet -> { if (!hostSet.isEmpty()) { schedulerReady.countDown(); } }); // A timeout is used because certain types of assertion errors (mocks) will not surface // until the main test thread exits this body of code. assertTrue(schedulerReady.await(5L, TimeUnit.MINUTES)); return null; }).get(); } private final AtomicInteger curPosition = new AtomicInteger(); private static class IntPosition implements Position { private final int pos; IntPosition(int pos) { this.pos = pos; } @Override public int compareTo(Position position) { return pos - ((IntPosition) position).pos; } } private IntPosition nextPosition() { return new IntPosition(curPosition.incrementAndGet()); } private Iterable<Entry> toEntries(LogEntry... entries) { return Iterables.transform(Arrays.asList(entries), entry -> () -> { try { return Iterables.getFirst(entrySerializer.serialize(entry), null); } catch (CodingException e) { throw Throwables.propagate(e); } }); } private static IScheduledTask makeTask(String id, ScheduleStatus status) { ScheduledTask builder = TaskTestUtil.addStateTransition( TaskTestUtil.makeTask(id, TaskTestUtil.JOB), status, 100) .newBuilder(); builder.getAssignedTask() .setSlaveId(HOST_ATTRIBUTES.getSlaveId()) .setSlaveHost(HOST_ATTRIBUTES.getHost()); return IScheduledTask.build(builder); } @Test public void testLaunch() throws Exception { Capture<Scheduler> scheduler = createCapture(); expect(driverFactory.create( capture(scheduler), eq(SETTINGS.getCredentials()), eq(BASE_INFO), eq(SETTINGS.getMasterUri()))) .andReturn(driver).anyTimes(); IScheduledTask snapshotTask = makeTask("snapshotTask", ScheduleStatus.ASSIGNED); IScheduledTask transactionTask = makeTask("transactionTask", ScheduleStatus.RUNNING); Iterable<Entry> recoveredEntries = toEntries( LogEntry.snapshot(new Snapshot() .setTasks(ImmutableSet.of(snapshotTask.newBuilder())) .setHostAttributes(ImmutableSet.of(HOST_ATTRIBUTES.newBuilder()))), LogEntry.transaction(new Transaction( ImmutableList.of(Op.saveTasks( new SaveTasks(ImmutableSet.of(transactionTask.newBuilder())))), storageConstants.CURRENT_SCHEMA_VERSION))); expect(log.open()).andReturn(logStream); expect(logStream.readAll()).andReturn(recoveredEntries.iterator()).anyTimes(); streamMatcher.expectTransaction(Op.saveFrameworkId(new SaveFrameworkId(FRAMEWORK_ID))) .andReturn(nextPosition()); CountDownLatch driverStarted = new CountDownLatch(1); expect(driver.start()).andAnswer(() -> { driverStarted.countDown(); return Protos.Status.DRIVER_RUNNING; }); // Try to be a good test suite citizen by releasing the blocked thread when the test case exits. CountDownLatch testCompleted = new CountDownLatch(1); expect(driver.join()).andAnswer(() -> { testCompleted.await(); return Protos.Status.DRIVER_STOPPED; }); addTearDown(testCompleted::countDown); expect(driver.stop(true)).andReturn(Protos.Status.DRIVER_STOPPED).anyTimes(); control.replay(); startScheduler(); driverStarted.await(); scheduler.getValue().registered( driver, Protos.FrameworkID.newBuilder().setValue(FRAMEWORK_ID).build(), MASTER); awaitSchedulerReady(); assertEquals(0L, Stats.<Long>getVariable("task_store_PENDING").read().longValue()); assertEquals(1L, Stats.<Long>getVariable("task_store_ASSIGNED").read().longValue()); assertEquals(1L, Stats.<Long>getVariable("task_store_RUNNING").read().longValue()); // TODO(William Farner): Send a thrift RPC to the scheduler. // TODO(William Farner): Also send an admin thrift RPC to verify capability (e.g. ROOT) mapping. } }