/* * Copyright (c) 2008-2011 EMC Corporation * All Rights Reserved */ package com.emc.storageos.coordinator.common.impl; import java.io.IOException; import java.net.URI; import java.nio.channels.FileChannel; import java.nio.channels.FileLock; import java.nio.file.Path; import java.nio.file.Paths; import java.nio.file.StandardOpenOption; import java.util.List; import java.util.UUID; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.emc.storageos.coordinator.exceptions.CoordinatorException; import com.emc.storageos.services.util.FileUtils; import org.apache.commons.lang3.StringUtils; import org.apache.curator.framework.CuratorFramework; import org.apache.curator.framework.CuratorFrameworkFactory; import org.apache.curator.framework.api.UnhandledErrorListener; import org.apache.curator.framework.state.ConnectionState; import org.apache.curator.framework.state.ConnectionStateListener; import org.apache.curator.retry.RetryUntilElapsed; import org.apache.curator.utils.EnsurePath; import org.apache.zookeeper.data.Stat; /** * Wraps CuratorFramework with spring friendly config setters and default * retry policy */ public class ZkConnection { private static final Logger _logger = LoggerFactory.getLogger(ZkConnection.class); // default sleep time is 5 seconds between connection attempts when we lose connection to cluster private static final int RETRY_INTERVAL_MS = 5 * 1000; // connection times out at 3 minutes private static final int DEFAULT_CONN_TIMEOUT = 180 * 1000; // session times out at 9 minutes, after Curator 2.3, the retry times will depends on // Session timeout value divide connection timeout value. Currently, set session timeout value to // 3 times of connection timeout value, which means will retry 3 times. private static final int DEFAULT_TIMEOUT_MS = 3 * DEFAULT_CONN_TIMEOUT; // lock file name when updating site id file private static final String SITEID_LOCKFILE="site_id_lock"; // zk cluster connection private CuratorFramework _zkConnection; private String _connectString; // zk timeout ms private int _timeoutMs = DEFAULT_TIMEOUT_MS; private String siteIdFile; private String siteId; public String getSiteId() { return siteId; } public void setSiteId(String siteId) { this.siteId = siteId; } public void setSiteIdFile(String siteIdFile) { this.siteIdFile = siteIdFile; } /** * Set coordinator cluster node URI's and build a connector. * <p/> * Node URI should be specified as * <p/> * coordinator://<node ip>:<port> * * @param server server URI list */ public void setServer(List<URI> server) throws IOException { StringBuilder connectString = new StringBuilder(); for (int i = 0; i < server.size(); i++) { URI uri = server.get(i); connectString.append(String.format("%1$s:%2$d,", uri.getHost(), uri.getPort())); } _connectString = connectString.substring(0, connectString.length() - 1); } /** * Set zk session timeout in ms * * @param timeoutMs timeout in ms */ public void setTimeoutMs(int timeoutMs) { _timeoutMs = timeoutMs; } /** * Builds zk connector. Note that this method does not initiate a connection. {@link ZkConnection#connect()} must be called to connect * to cluster. * <p/> * This separation is provided so that callbacks can be setup separately prior to connection to cluster. */ public void build() { try { _zkConnection = CuratorFrameworkFactory.builder().connectString(_connectString) .connectionTimeoutMs(DEFAULT_CONN_TIMEOUT) .canBeReadOnly(true) .sessionTimeoutMs(_timeoutMs).retryPolicy( new RetryUntilElapsed(_timeoutMs, RETRY_INTERVAL_MS)).build(); _zkConnection.getUnhandledErrorListenable().addListener(new UnhandledErrorListener() { @Override public void unhandledError(String message, Throwable e) { _logger.warn("Unknown exception in curator stack", e); } }); _zkConnection.getConnectionStateListenable().addListener(new ConnectionStateListener() { @Override public void stateChanged(CuratorFramework client, ConnectionState newState) { _logger.info("Current connection state {}", newState); } }); if (FileUtils.exists(siteIdFile)) { siteId = new String(FileUtils.readDataFromFile(siteIdFile)); siteId = siteId.trim(); _logger.info("Current site id is {}", siteId); } } catch (Exception e) { throw CoordinatorException.fatals.failedToBuildZKConnector(e); } } /** * Connect to ZK cluster. As long quorum of nodes are available, * client can talk to a cluster. If connection drop, this implementation will * continuously retry sleeping 5 seconds in between. */ public synchronized void connect() { if (!_zkConnection.isStarted()) { _zkConnection.start(); } // check if site id exists if (StringUtils.isEmpty(siteId)) { generateSiteId(); } } /** * Disconnect from ZK cluster */ public synchronized void disconnect() { if (_zkConnection.isStarted()) { _zkConnection.close(); } } /** * Get ZK connection * * @return zk connection */ public CuratorFramework curator() { return _zkConnection; } /** * Generate site unique id for current cluster. UUID is formed as 2 parts * - creation time of znode /sites * - hashcode of list zk server IPs * The uuid is stored at a local file specified by siteIdFile. It was generated * only once during first boot */ private void generateSiteId() { try { // get creation time of znode /sites EnsurePath siteZkPath = new EnsurePath(ZkPath.SITES.toString()); siteZkPath.ensure(curator().getZookeeperClient()); Stat stat = curator().checkExists().forPath(ZkPath.SITES.toString()); long ctime = stat.getCtime(); // calculate hash code for node ip list int len = _connectString.length(); int ipHashHigh = _connectString.substring(0, len / 2).hashCode(); int ipHashLow = _connectString.substring(len / 2).hashCode(); long ipHash = (((long)ipHashHigh) << 32) | (((long)ipHashLow) & 0x00000000FFFFFFFFL); siteId = createTimeUUID(ctime, ipHash); _logger.info("Site UUID is {}", siteId); if (!FileUtils.exists(siteIdFile)) { // grab a lock file before writing site id file String lockFile = FileUtils.generateTmpFileName(SITEID_LOCKFILE); if (!FileUtils.exists(lockFile)) { FileUtils.writePlainFile(lockFile, "".getBytes()); } Path path = Paths.get(lockFile); FileChannel fileChannel = FileChannel.open(path, StandardOpenOption.WRITE); try (FileLock lock = fileChannel.lock()) { FileUtils.writePlainFile(siteIdFile, siteId.getBytes()); _logger.info("Write site id {} to file", siteId); } } } catch (Exception ex) { _logger.error("Cannot generate site uuid", ex); throw CoordinatorException.fatals.failedToBuildZKConnector(ex); } } /** * Create is version 1 UUID(time based) * * @param timestamp timestamp in milliseconds * @param leastSigBits least significant bits for the uuid */ private String createTimeUUID(long timestamp, long leastSigBits) { long mostSigBits; long timeToUse = (timestamp * 10000) + 0x01B21DD213814000L; // time low mostSigBits = timeToUse << 32; // time mid mostSigBits |= (timeToUse & 0xFFFF00000000L) >> 16; // time hi and version mostSigBits |= 0x1000 | ((timeToUse >> 48) & 0x0FFF); // version 1 return new UUID(mostSigBits, leastSigBits).toString(); } }