/*-
* -\-\-
* Helios Services
* --
* Copyright (C) 2016 Spotify AB
* --
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* -/-/-
*/
package com.spotify.helios.servicescommon;
import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.collect.Lists.reverse;
import static com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.check;
import static com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.create;
import static com.spotify.helios.servicescommon.coordination.ZooKeeperOperations.delete;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import com.spotify.helios.common.HeliosRuntimeException;
import com.spotify.helios.common.descriptors.JobId;
import com.spotify.helios.master.HostNotFoundException;
import com.spotify.helios.master.HostStillInUseException;
import com.spotify.helios.servicescommon.coordination.Paths;
import com.spotify.helios.servicescommon.coordination.ZooKeeperClient;
import com.spotify.helios.servicescommon.coordination.ZooKeeperOperation;
import java.util.List;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.KeeperException.NoNodeException;
import org.apache.zookeeper.data.Stat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
public class ZooKeeperRegistrarUtil {
private static final Logger log = LoggerFactory.getLogger(ZooKeeperRegistrarUtil.class);
public static boolean isHostRegistered(final ZooKeeperClient client, final String host) {
try {
final Stat stat = client.exists(Paths.configHostId(host));
return stat != null;
} catch (KeeperException e) {
throw new HeliosRuntimeException("getting host " + host + " id failed", e);
}
}
public static void registerHost(final ZooKeeperClient client, final String idPath,
final String hostname, final String hostId)
throws KeeperException {
log.info("registering host: {}", hostname);
// This would've been nice to do in a transaction but PathChildrenCache ensures paths
// so we can't know what paths already exist so assembling a suitable transaction is too
// painful.
client.ensurePath(Paths.configHost(hostname));
client.ensurePath(Paths.configHostJobs(hostname));
client.ensurePath(Paths.configHostPorts(hostname));
client.ensurePath(Paths.statusHost(hostname));
client.ensurePath(Paths.statusHostJobs(hostname));
// Finish registration by creating the id node last
client.createAndSetData(idPath, hostId.getBytes(UTF_8));
}
/**
* Re-register an agent with a different host id. Will remove the existing status of the agent
* but preserve any jobs deployed to the host and their history.
* @param client ZooKeeperClient
* @param host Host
* @param hostId ID of the host
* @throws HostNotFoundException If the hostname we are trying to re-register as doesn't exist.
* @throws KeeperException If an unexpected zookeeper error occurs.
*/
public static void reRegisterHost(final ZooKeeperClient client,
final String host, final String hostId)
throws HostNotFoundException, KeeperException {
// * Delete everything in the /status/hosts/<hostname> subtree
// * Don't delete any history for the job (on the host)
// * DON'T touch anything in the /config/hosts/<hostname> subtree, except updating the host id
log.info("re-registering host: {}, new host id: {}", host, hostId);
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
// Check that the host exists in ZK
operations.add(check(Paths.configHost(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// ...and re-create the /status/hosts/<host>/jobs node + parent
operations.add(create(Paths.statusHost(host)));
operations.add(create(Paths.statusHostJobs(host)));
// Update the host ID
// We don't have WRITE permissions to the node, so delete and re-create it.
operations.add(delete(Paths.configHostId(host)));
operations.add(create(Paths.configHostId(host), hostId.getBytes(UTF_8)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
public static void deregisterHost(final ZooKeeperClient client, final String host)
throws HostNotFoundException, HostStillInUseException {
log.info("deregistering host: {}", host);
// TODO (dano): handle retry failures
try {
final List<ZooKeeperOperation> operations = Lists.newArrayList();
if (client.exists(Paths.configHost(host)) == null) {
throw new HostNotFoundException("host [" + host + "] does not exist");
}
// Remove all jobs deployed to this host
final List<String> jobs = safeGetChildren(client, Paths.configHostJobs(host));
for (final String jobString : jobs) {
final JobId job = JobId.fromString(jobString);
final String hostJobPath = Paths.configHostJob(host, job);
final List<String> nodes = safeListRecursive(client, hostJobPath);
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
if (client.exists(Paths.configJobHost(job, host)) != null) {
operations.add(delete(Paths.configJobHost(job, host)));
}
// Clean out the history for each job
final List<String> history = safeListRecursive(client, Paths.historyJobHost(job, host));
for (final String s : reverse(history)) {
operations.add(delete(s));
}
}
operations.add(delete(Paths.configHostJobs(host)));
// Remove the host status
final List<String> nodes = safeListRecursive(client, Paths.statusHost(host));
for (final String node : reverse(nodes)) {
operations.add(delete(node));
}
// Remove port allocations
final List<String> ports = safeGetChildren(client, Paths.configHostPorts(host));
for (final String port : ports) {
operations.add(delete(Paths.configHostPort(host, Integer.valueOf(port))));
}
operations.add(delete(Paths.configHostPorts(host)));
// Remove host id
final String idPath = Paths.configHostId(host);
if (client.exists(idPath) != null) {
operations.add(delete(idPath));
}
// Remove host config root
operations.add(delete(Paths.configHost(host)));
client.transaction(operations);
} catch (NoNodeException e) {
throw new HostNotFoundException(host);
} catch (KeeperException e) {
throw new HeliosRuntimeException(e);
}
}
private static List<String> safeGetChildren(final ZooKeeperClient client, final String path) {
try {
return client.getChildren(path);
} catch (KeeperException ignore) {
return ImmutableList.of();
}
}
private static List<String> safeListRecursive(final ZooKeeperClient client, final String path)
throws KeeperException {
try {
return client.listRecursive(path);
} catch (NoNodeException e) {
return ImmutableList.of();
}
}
}