package org.apache.solr.cloud; /* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ import java.io.File; import java.io.IOException; import java.io.UnsupportedEncodingException; import java.net.InetAddress; import java.net.NetworkInterface; import java.net.URLEncoder; import java.net.UnknownHostException; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.Enumeration; import java.util.HashMap; import java.util.Iterator; import java.util.List; import java.util.Locale; import java.util.Map; import java.util.Properties; import java.util.Set; import java.util.concurrent.Future; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.solr.client.solrj.impl.HttpSolrServer; import org.apache.solr.client.solrj.request.CoreAdminRequest.WaitForState; import org.apache.solr.common.SolrException; import org.apache.solr.common.SolrException.ErrorCode; import org.apache.solr.common.cloud.BeforeReconnect; import org.apache.solr.common.cloud.ClusterState; import org.apache.solr.common.cloud.DefaultConnectionStrategy; import org.apache.solr.common.cloud.DocCollection; import org.apache.solr.common.cloud.OnReconnect; import org.apache.solr.common.cloud.Replica; import org.apache.solr.common.cloud.Slice; import org.apache.solr.common.cloud.SolrZkClient; import org.apache.solr.common.cloud.ZkCmdExecutor; import org.apache.solr.common.cloud.ZkCoreNodeProps; import org.apache.solr.common.cloud.ZkNodeProps; import org.apache.solr.common.cloud.ZkStateReader; import org.apache.solr.common.cloud.ZooKeeperException; import org.apache.solr.common.params.SolrParams; import org.apache.solr.common.util.URLUtil; import org.apache.solr.core.CoreContainer; import org.apache.solr.core.CoreDescriptor; import org.apache.solr.core.SolrCore; import org.apache.solr.handler.component.ShardHandler; import org.apache.solr.update.UpdateLog; import org.apache.zookeeper.CreateMode; import org.apache.zookeeper.KeeperException; import org.apache.zookeeper.KeeperException.NoNodeException; import org.apache.zookeeper.KeeperException.SessionExpiredException; import org.apache.zookeeper.data.Stat; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * Handle ZooKeeper interactions. * * notes: loads everything on init, creates what's not there - further updates * are prompted with Watches. * * TODO: exceptions during shutdown on attempts to update cloud state * */ public final class ZkController { private static Logger log = LoggerFactory.getLogger(ZkController.class); static final String NEWL = System.getProperty("line.separator"); private final boolean SKIP_AUTO_RECOVERY = Boolean.getBoolean("solrcloud.skip.autorecovery"); private final DistributedQueue overseerJobQueue; private final DistributedQueue overseerCollectionQueue; private final DistributedMap overseerRunningMap; private final DistributedMap overseerCompletedMap; private final DistributedMap overseerFailureMap; public static final String CONFIGS_ZKNODE = "/configs"; public final static String COLLECTION_PARAM_PREFIX="collection."; public final static String CONFIGNAME_PROP="configName"; static class ContextKey { private String collection; private String coreNodeName; public ContextKey(String collection, String coreNodeName) { this.collection = collection; this.coreNodeName = coreNodeName; } @Override public int hashCode() { final int prime = 31; int result = 1; result = prime * result + ((collection == null) ? 0 : collection.hashCode()); result = prime * result + ((coreNodeName == null) ? 0 : coreNodeName.hashCode()); return result; } @Override public boolean equals(Object obj) { if (this == obj) return true; if (obj == null) return false; if (getClass() != obj.getClass()) return false; ContextKey other = (ContextKey) obj; if (collection == null) { if (other.collection != null) return false; } else if (!collection.equals(other.collection)) return false; if (coreNodeName == null) { if (other.coreNodeName != null) return false; } else if (!coreNodeName.equals(other.coreNodeName)) return false; return true; } } private final Map<ContextKey, ElectionContext> electionContexts = Collections.synchronizedMap(new HashMap<ContextKey, ElectionContext>()); private final SolrZkClient zkClient; private final ZkCmdExecutor cmdExecutor; private final ZkStateReader zkStateReader; private final LeaderElector leaderElector; private final String zkServerAddress; // example: 127.0.0.1:54062/solr private final String localHostPort; // example: 54065 private final String localHostContext; // example: solr private final String hostName; // example: 127.0.0.1 private final String nodeName; // example: 127.0.0.1:54065_solr private final String baseURL; // example: http://127.0.0.1:54065/solr private LeaderElector overseerElector; // for now, this can be null in tests, in which case recovery will be inactive, and other features // may accept defaults or use mocks rather than pulling things from a CoreContainer private CoreContainer cc; protected volatile Overseer overseer; private int leaderVoteWait; private int leaderConflictResolveWait; private boolean genericCoreNodeNames; private int clientTimeout; private volatile boolean isClosed; public ZkController(final CoreContainer cc, String zkServerAddress, int zkClientTimeout, int zkClientConnectTimeout, String localHost, String locaHostPort, String localHostContext, int leaderVoteWait, int leaderConflictResolveWait, boolean genericCoreNodeNames, final CurrentCoreDescriptorProvider registerOnReconnect) throws InterruptedException, TimeoutException, IOException { if (cc == null) throw new IllegalArgumentException("CoreContainer cannot be null."); this.cc = cc; this.genericCoreNodeNames = genericCoreNodeNames; // be forgiving and strip this off leading/trailing slashes // this allows us to support users specifying hostContext="/" in // solr.xml to indicate the root context, instead of hostContext="" // which means the default of "solr" localHostContext = trimLeadingAndTrailingSlashes(localHostContext); this.zkServerAddress = zkServerAddress; this.localHostPort = locaHostPort; this.localHostContext = localHostContext; this.hostName = normalizeHostName(localHost); this.nodeName = generateNodeName(this.hostName, this.localHostPort, this.localHostContext); this.leaderVoteWait = leaderVoteWait; this.leaderConflictResolveWait = leaderConflictResolveWait; this.clientTimeout = zkClientTimeout; zkClient = new SolrZkClient(zkServerAddress, zkClientTimeout, zkClientConnectTimeout, new DefaultConnectionStrategy(), // on reconnect, reload cloud info new OnReconnect() { @Override public void command() { try { // this is troublesome - we dont want to kill anything the old // leader accepted // though I guess sync will likely get those updates back? But // only if // he is involved in the sync, and he certainly may not be // ExecutorUtil.shutdownAndAwaitTermination(cc.getCmdDistribExecutor()); // we need to create all of our lost watches // seems we dont need to do this again... // Overseer.createClientNodes(zkClient, getNodeName()); cc.cancelCoreRecoveries(); registerAllCoresAsDown(registerOnReconnect, false); ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName()); ElectionContext prevContext = overseerElector.getContext(); if (prevContext != null) { prevContext.cancelElection(); } overseerElector.setup(context); overseerElector.joinElection(context, true); zkStateReader.createClusterStateWatchersAndUpdate(); // we have to register as live first to pick up docs in the buffer createEphemeralLiveNode(); List<CoreDescriptor> descriptors = registerOnReconnect .getCurrentDescriptors(); // re register all descriptors if (descriptors != null) { for (CoreDescriptor descriptor : descriptors) { // TODO: we need to think carefully about what happens when it // was // a leader that was expired - as well as what to do about // leaders/overseers // with connection loss try { register(descriptor.getName(), descriptor, true, true); } catch (Exception e) { SolrException.log(log, "Error registering SolrCore", e); } } } } catch (InterruptedException e) { // Restore the interrupted status Thread.currentThread().interrupt(); throw new ZooKeeperException( SolrException.ErrorCode.SERVER_ERROR, "", e); } catch (Exception e) { SolrException.log(log, "", e); throw new ZooKeeperException( SolrException.ErrorCode.SERVER_ERROR, "", e); } } }, new BeforeReconnect() { @Override public void command() { try { ZkController.this.overseer.close(); } catch (Exception e) { log.error("Error trying to stop any Overseer threads", e); } markAllAsNotLeader(registerOnReconnect); } }); this.overseerJobQueue = Overseer.getInQueue(zkClient); this.overseerCollectionQueue = Overseer.getCollectionQueue(zkClient); this.overseerRunningMap = Overseer.getRunningMap(zkClient); this.overseerCompletedMap = Overseer.getCompletedMap(zkClient); this.overseerFailureMap = Overseer.getFailureMap(zkClient); cmdExecutor = new ZkCmdExecutor(zkClientTimeout); leaderElector = new LeaderElector(zkClient); zkStateReader = new ZkStateReader(zkClient); this.baseURL = zkStateReader.getBaseUrlForNodeName(this.nodeName); init(registerOnReconnect); } public int getLeaderVoteWait() { return leaderVoteWait; } public void forceOverSeer(){ try { zkClient.delete("/overseer_elect/leader",-1, true); log.info("Forcing me to be leader {} ", getBaseUrl()); overseerElector.getContext().runLeaderProcess(true, Overseer.STATE_UPDATE_DELAY + 100); } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, " Error becoming overseer ",e); } } private void registerAllCoresAsDown( final CurrentCoreDescriptorProvider registerOnReconnect, boolean updateLastPublished) { List<CoreDescriptor> descriptors = registerOnReconnect .getCurrentDescriptors(); if (isClosed) return; if (descriptors != null) { // before registering as live, make sure everyone is in a // down state for (CoreDescriptor descriptor : descriptors) { try { descriptor.getCloudDescriptor().setLeader(false); publish(descriptor, ZkStateReader.DOWN, updateLastPublished); } catch (Exception e) { if (isClosed) { return; } try { Thread.sleep(1000); } catch (InterruptedException e1) { Thread.currentThread().interrupt(); } try { publish(descriptor, ZkStateReader.DOWN); } catch (Exception e2) { SolrException.log(log, "", e2); continue; } } } for (CoreDescriptor descriptor : descriptors) { // if it looks like we are going to be the leader, we don't // want to wait for the following stuff CloudDescriptor cloudDesc = descriptor.getCloudDescriptor(); String collection = cloudDesc.getCollectionName(); String slice = cloudDesc.getShardId(); try { int children = zkStateReader .getZkClient() .getChildren( ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection + "/leader_elect/" + slice + "/election", null, true).size(); if (children == 0) { log.debug("looks like we are going to be the leader for collection {} shard {}", collection, slice); continue; } } catch (NoNodeException e) { log.debug("looks like we are going to be the leader for collection {} shard {}", collection, slice); continue; } catch (InterruptedException e2) { Thread.currentThread().interrupt(); } catch (KeeperException e) { log.warn("", e); Thread.currentThread().interrupt(); } final String coreZkNodeName = descriptor.getCloudDescriptor().getCoreNodeName(); try { log.debug("calling waitForLeaderToSeeDownState for coreZkNodeName={} collection={} shard={}", new Object[] {coreZkNodeName, collection, slice}); waitForLeaderToSeeDownState(descriptor, coreZkNodeName); } catch (Exception e) { SolrException.log(log, "", e); if (isClosed) { return; } try { Thread.sleep(5000); } catch (InterruptedException e1) { Thread.currentThread().interrupt(); } } } } } private void markAllAsNotLeader( final CurrentCoreDescriptorProvider registerOnReconnect) { List<CoreDescriptor> descriptors = registerOnReconnect .getCurrentDescriptors(); if (descriptors != null) { for (CoreDescriptor descriptor : descriptors) { descriptor.getCloudDescriptor().setLeader(false); } } } /** * Closes the underlying ZooKeeper client. */ public void close() { this.isClosed = true; try { for (ElectionContext context : electionContexts.values()) { try { context.close(); } catch (Exception e) { log.error("Error closing overseer", e); } } } finally { try { try { overseer.close(); } catch (Exception e) { log.error("Error closing overseer", e); } } finally { try { try { zkStateReader.close(); } catch (Exception e) { log.error("Error closing zkStateReader", e); } } finally { try { zkClient.close(); } catch (Exception e) { log.error("Error closing zkClient", e); } } } } } /** * Returns true if config file exists */ public boolean configFileExists(String collection, String fileName) throws KeeperException, InterruptedException { Stat stat = zkClient.exists(CONFIGS_ZKNODE + "/" + collection + "/" + fileName, null, true); return stat != null; } /** * @return information about the cluster from ZooKeeper */ public ClusterState getClusterState() { return zkStateReader.getClusterState(); } /** * Returns config file data (in bytes) */ public byte[] getConfigFileData(String zkConfigName, String fileName) throws KeeperException, InterruptedException { String zkPath = CONFIGS_ZKNODE + "/" + zkConfigName + "/" + fileName; byte[] bytes = zkClient.getData(zkPath, null, null, true); if (bytes == null) { log.error("Config file contains no data:" + zkPath); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "Config file contains no data:" + zkPath); } return bytes; } // normalize host removing any url scheme. // input can be null, host, or url_prefix://host private String normalizeHostName(String host) throws IOException { if (host == null || host.length() == 0) { String hostaddress; try { hostaddress = InetAddress.getLocalHost().getHostAddress(); } catch (UnknownHostException e) { hostaddress = "127.0.0.1"; // cannot resolve system hostname, fall through } // Re-get the IP again for "127.0.0.1", the other case we trust the hosts // file is right. if ("127.0.0.1".equals(hostaddress)) { Enumeration<NetworkInterface> netInterfaces = null; try { netInterfaces = NetworkInterface.getNetworkInterfaces(); while (netInterfaces.hasMoreElements()) { NetworkInterface ni = netInterfaces.nextElement(); Enumeration<InetAddress> ips = ni.getInetAddresses(); while (ips.hasMoreElements()) { InetAddress ip = ips.nextElement(); if (ip.isSiteLocalAddress()) { hostaddress = ip.getHostAddress(); } } } } catch (Exception e) { SolrException.log(log, "Error while looking for a better host name than 127.0.0.1", e); } } host = hostaddress; } else { if(URLUtil.hasScheme(host)) { host = URLUtil.removeScheme(host); } } return host; } public String getHostName() { return hostName; } public String getHostPort() { return localHostPort; } public SolrZkClient getZkClient() { return zkClient; } /** * @return zookeeper server address */ public String getZkServerAddress() { return zkServerAddress; } private void init(CurrentCoreDescriptorProvider registerOnReconnect) { try { boolean createdWatchesAndUpdated = false; if (zkClient.exists(ZkStateReader.LIVE_NODES_ZKNODE, true)) { zkStateReader.createClusterStateWatchersAndUpdate(); createdWatchesAndUpdated = true; publishAndWaitForDownStates(); } // makes nodes zkNode cmdExecutor.ensureExists(ZkStateReader.LIVE_NODES_ZKNODE, zkClient); createEphemeralLiveNode(); cmdExecutor.ensureExists(ZkStateReader.COLLECTIONS_ZKNODE, zkClient); ShardHandler shardHandler; String adminPath; shardHandler = cc.getShardHandlerFactory().getShardHandler(); adminPath = cc.getAdminPath(); overseerElector = new LeaderElector(zkClient); this.overseer = new Overseer(shardHandler, adminPath, zkStateReader); ElectionContext context = new OverseerElectionContext(zkClient, overseer, getNodeName()); overseerElector.setup(context); overseerElector.joinElection(context, false); if (!createdWatchesAndUpdated) { zkStateReader.createClusterStateWatchersAndUpdate(); } } catch (IOException e) { log.error("", e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Can't create ZooKeeperController", e); } catch (InterruptedException e) { // Restore the interrupted status Thread.currentThread().interrupt(); log.error("", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } catch (KeeperException e) { log.error("", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } } public void publishAndWaitForDownStates() throws KeeperException, InterruptedException { ClusterState clusterState = zkStateReader.getClusterState(); Set<String> collections = clusterState.getCollections(); List<String> updatedNodes = new ArrayList<>(); for (String collectionName : collections) { DocCollection collection = clusterState.getCollection(collectionName); Collection<Slice> slices = collection.getSlices(); for (Slice slice : slices) { Collection<Replica> replicas = slice.getReplicas(); for (Replica replica : replicas) { if (getNodeName().equals(replica.getNodeName()) && !(replica.getStr(ZkStateReader.STATE_PROP) .equals(ZkStateReader.DOWN))) { ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state", ZkStateReader.STATE_PROP, ZkStateReader.DOWN, ZkStateReader.BASE_URL_PROP, getBaseUrl(), ZkStateReader.CORE_NAME_PROP, replica.getStr(ZkStateReader.CORE_NAME_PROP), ZkStateReader.ROLES_PROP, replica.getStr(ZkStateReader.ROLES_PROP), ZkStateReader.NODE_NAME_PROP, getNodeName(), ZkStateReader.SHARD_ID_PROP, replica.getStr(ZkStateReader.SHARD_ID_PROP), ZkStateReader.COLLECTION_PROP, collectionName, ZkStateReader.CORE_NODE_NAME_PROP, replica.getName()); updatedNodes.add(replica.getStr(ZkStateReader.CORE_NAME_PROP)); overseerJobQueue.offer(ZkStateReader.toJSON(m)); } } } } // now wait till the updates are in our state long now = System.nanoTime(); long timeout = now + TimeUnit.NANOSECONDS.convert(60, TimeUnit.SECONDS); boolean foundStates = false; while (System.nanoTime() < timeout) { clusterState = zkStateReader.getClusterState(); collections = clusterState.getCollections(); for (String collectionName : collections) { DocCollection collection = clusterState.getCollection(collectionName); Collection<Slice> slices = collection.getSlices(); for (Slice slice : slices) { Collection<Replica> replicas = slice.getReplicas(); for (Replica replica : replicas) { if (replica.getStr(ZkStateReader.STATE_PROP).equals( ZkStateReader.DOWN)) { updatedNodes.remove(replica.getStr(ZkStateReader.CORE_NAME_PROP)); } } } } if (updatedNodes.size() == 0) { foundStates = true; Thread.sleep(1000); break; } Thread.sleep(1000); } if (!foundStates) { log.warn("Timed out waiting to see all nodes published as DOWN in our cluster state."); } } /** * Validates if the chroot exists in zk (or if it is successfully created). * Optionally, if create is set to true this method will create the path in * case it doesn't exist * * @return true if the path exists or is created false if the path doesn't * exist and 'create' = false */ public static boolean checkChrootPath(String zkHost, boolean create) throws KeeperException, InterruptedException { if (!containsChroot(zkHost)) { return true; } log.info("zkHost includes chroot"); String chrootPath = zkHost.substring(zkHost.indexOf("/"), zkHost.length()); SolrZkClient tmpClient = new SolrZkClient(zkHost.substring(0, zkHost.indexOf("/")), 60 * 1000); boolean exists = tmpClient.exists(chrootPath, true); if (!exists && create) { tmpClient.makePath(chrootPath, false, true); exists = true; } tmpClient.close(); return exists; } /** * Validates if zkHost contains a chroot. See http://zookeeper.apache.org/doc/r3.2.2/zookeeperProgrammers.html#ch_zkSessions */ private static boolean containsChroot(String zkHost) { return zkHost.contains("/"); } public boolean isConnected() { return zkClient.isConnected(); } private void createEphemeralLiveNode() throws KeeperException, InterruptedException { String nodeName = getNodeName(); String nodePath = ZkStateReader.LIVE_NODES_ZKNODE + "/" + nodeName; log.info("Register node as live in ZooKeeper:" + nodePath); try { boolean nodeDeleted = true; try { // we attempt a delete in the case of a quick server bounce - // if there was not a graceful shutdown, the node may exist // until expiration timeout - so a node won't be created here because // it exists, but eventually the node will be removed. So delete // in case it exists and create a new node. zkClient.delete(nodePath, -1, true); } catch (KeeperException.NoNodeException e) { // fine if there is nothing to delete // TODO: annoying that ZK logs a warning on us nodeDeleted = false; } if (nodeDeleted) { log .info("Found a previous node that still exists while trying to register a new live node " + nodePath + " - removing existing node to create another."); } zkClient.makePath(nodePath, CreateMode.EPHEMERAL, true); } catch (KeeperException e) { // its okay if the node already exists if (e.code() != KeeperException.Code.NODEEXISTS) { throw e; } } } public String getNodeName() { return nodeName; } /** * Returns true if the path exists */ public boolean pathExists(String path) throws KeeperException, InterruptedException { return zkClient.exists(path, true); } /** * Register shard with ZooKeeper. * * @return the shardId for the SolrCore */ public String register(String coreName, final CoreDescriptor desc) throws Exception { return register(coreName, desc, false, false); } /** * Register shard with ZooKeeper. * * @return the shardId for the SolrCore */ public String register(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, boolean afterExpiration) throws Exception { // pre register has published our down state final String baseUrl = getBaseUrl(); final CloudDescriptor cloudDesc = desc.getCloudDescriptor(); final String collection = cloudDesc.getCollectionName(); final String coreZkNodeName = desc.getCloudDescriptor().getCoreNodeName(); assert coreZkNodeName != null : "we should have a coreNodeName by now"; String shardId = cloudDesc.getShardId(); Map<String,Object> props = new HashMap<>(); // we only put a subset of props into the leader node props.put(ZkStateReader.BASE_URL_PROP, baseUrl); props.put(ZkStateReader.CORE_NAME_PROP, coreName); props.put(ZkStateReader.NODE_NAME_PROP, getNodeName()); if (log.isInfoEnabled()) { log.info("Register replica - core:" + coreName + " address:" + baseUrl + " collection:" + cloudDesc.getCollectionName() + " shard:" + shardId); } ZkNodeProps leaderProps = new ZkNodeProps(props); try { joinElection(desc, afterExpiration); } catch (InterruptedException e) { // Restore the interrupted status Thread.currentThread().interrupt(); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } catch (KeeperException e) { throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } catch (IOException e) { throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } // in this case, we want to wait for the leader as long as the leader might // wait for a vote, at least - but also long enough that a large cluster has // time to get its act together String leaderUrl = getLeader(cloudDesc, leaderVoteWait + 600000); String ourUrl = ZkCoreNodeProps.getCoreUrl(baseUrl, coreName); log.info("We are " + ourUrl + " and leader is " + leaderUrl); boolean isLeader = leaderUrl.equals(ourUrl); SolrCore core = null; try { core = cc.getCore(desc.getName()); // recover from local transaction log and wait for it to complete before // going active // TODO: should this be moved to another thread? To recoveryStrat? // TODO: should this actually be done earlier, before (or as part of) // leader election perhaps? UpdateLog ulog = core.getUpdateHandler().getUpdateLog(); if (!core.isReloaded() && ulog != null) { // disable recovery in case shard is in construction state (for shard splits) Slice slice = getClusterState().getSlice(collection, shardId); if (!Slice.CONSTRUCTION.equals(slice.getState()) || !isLeader) { Future<UpdateLog.RecoveryInfo> recoveryFuture = core.getUpdateHandler() .getUpdateLog().recoverFromLog(); if (recoveryFuture != null) { recoveryFuture.get(); // NOTE: this could potentially block for // minutes or more! // TODO: public as recovering in the mean time? // TODO: in the future we could do peersync in parallel with recoverFromLog } else { log.info("No LogReplay needed for core=" + core.getName() + " baseURL=" + baseUrl); } } boolean didRecovery = checkRecovery(coreName, desc, recoverReloadedCores, isLeader, cloudDesc, collection, coreZkNodeName, shardId, leaderProps, core, cc); if (!didRecovery) { publish(desc, ZkStateReader.ACTIVE); } } } finally { if (core != null) { core.close(); } } // make sure we have an update cluster state right away zkStateReader.updateClusterState(true); return shardId; } // timeoutms is the timeout for the first call to get the leader - there is then // a longer wait to make sure that leader matches our local state private String getLeader(final CloudDescriptor cloudDesc, int timeoutms) { String collection = cloudDesc.getCollectionName(); String shardId = cloudDesc.getShardId(); // rather than look in the cluster state file, we go straight to the zknodes // here, because on cluster restart there could be stale leader info in the // cluster state node that won't be updated for a moment String leaderUrl; try { leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms) .getCoreUrl(); // now wait until our currently cloud state contains the latest leader String clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId, timeoutms * 2); // since we found it in zk, we are willing to // wait a while to find it in state int tries = 0; final long msInSec = 1000L; int maxTries = (int)Math.floor(leaderConflictResolveWait/msInSec); while (!leaderUrl.equals(clusterStateLeaderUrl)) { if (tries > maxTries) { throw new SolrException(ErrorCode.SERVER_ERROR, "There is conflicting information about the leader of shard: " + cloudDesc.getShardId() + " our state says:" + clusterStateLeaderUrl + " but zookeeper says:" + leaderUrl); } Thread.sleep(msInSec); tries++; clusterStateLeaderUrl = zkStateReader.getLeaderUrl(collection, shardId, timeoutms); leaderUrl = getLeaderProps(collection, cloudDesc.getShardId(), timeoutms) .getCoreUrl(); if (tries % 30 == 0) { String warnMsg = String.format(Locale.ENGLISH, "Still seeing conflicting information about the leader " + "of shard %s for collection %s after %d seconds; our state says %s, but ZooKeeper says %s", cloudDesc.getShardId(), collection, tries, clusterStateLeaderUrl, leaderUrl); log.warn(warnMsg); } } } catch (Exception e) { log.error("Error getting leader from zk", e); throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, "Error getting leader from zk for shard " + shardId, e); } return leaderUrl; } /** * Get leader props directly from zk nodes. */ public ZkCoreNodeProps getLeaderProps(final String collection, final String slice, int timeoutms) throws InterruptedException { return getLeaderProps(collection, slice, timeoutms, false); } /** * Get leader props directly from zk nodes. * * @return leader props */ public ZkCoreNodeProps getLeaderProps(final String collection, final String slice, int timeoutms, boolean failImmediatelyOnExpiration) throws InterruptedException { int iterCount = timeoutms / 1000; Exception exp = null; while (iterCount-- > 0) { try { byte[] data = zkClient.getData( ZkStateReader.getShardLeadersPath(collection, slice), null, null, true); ZkCoreNodeProps leaderProps = new ZkCoreNodeProps( ZkNodeProps.load(data)); return leaderProps; } catch (InterruptedException e) { throw e; } catch (SessionExpiredException e) { if (failImmediatelyOnExpiration) { throw new RuntimeException("Session has expired - could not get leader props", exp); } exp = e; Thread.sleep(1000); } catch (Exception e) { exp = e; Thread.sleep(1000); } if (cc.isShutDown()) { throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "CoreContainer is shutdown"); } } throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "Could not get leader props", exp); } private void joinElection(CoreDescriptor cd, boolean afterExpiration) throws InterruptedException, KeeperException, IOException { // look for old context - if we find it, cancel it String collection = cd.getCloudDescriptor().getCollectionName(); final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); ContextKey contextKey = new ContextKey(collection, coreNodeName); ElectionContext prevContext = electionContexts.get(contextKey); if (prevContext != null) { prevContext.cancelElection(); } String shardId = cd.getCloudDescriptor().getShardId(); Map<String,Object> props = new HashMap<>(); // we only put a subset of props into the leader node props.put(ZkStateReader.BASE_URL_PROP, getBaseUrl()); props.put(ZkStateReader.CORE_NAME_PROP, cd.getName()); props.put(ZkStateReader.NODE_NAME_PROP, getNodeName()); ZkNodeProps ourProps = new ZkNodeProps(props); ElectionContext context = new ShardLeaderElectionContext(leaderElector, shardId, collection, coreNodeName, ourProps, this, cc); leaderElector.setup(context); electionContexts.put(contextKey, context); leaderElector.joinElection(context, false); } /** * Returns whether or not a recovery was started */ private boolean checkRecovery(String coreName, final CoreDescriptor desc, boolean recoverReloadedCores, final boolean isLeader, final CloudDescriptor cloudDesc, final String collection, final String shardZkNodeName, String shardId, ZkNodeProps leaderProps, SolrCore core, CoreContainer cc) { if (SKIP_AUTO_RECOVERY) { log.warn("Skipping recovery according to sys prop solrcloud.skip.autorecovery"); return false; } boolean doRecovery = true; if (!isLeader) { if (core.isReloaded() && !recoverReloadedCores) { doRecovery = false; } if (doRecovery) { log.info("Core needs to recover:" + core.getName()); core.getUpdateHandler().getSolrCoreState().doRecovery(cc, core.getCoreDescriptor()); return true; } } else { log.info("I am the leader, no recovery necessary"); } return false; } public String getBaseUrl() { return baseURL; } public void publish(final CoreDescriptor cd, final String state) throws KeeperException, InterruptedException { publish(cd, state, true); } public void publish(final CoreDescriptor cd, final String state, boolean updateLastState) throws KeeperException, InterruptedException { publish(cd, state, true, false); } /** * Publish core state to overseer. */ public void publish(final CoreDescriptor cd, final String state, boolean updateLastState, boolean forcePublish) throws KeeperException, InterruptedException { if (!forcePublish) { SolrCore core = cc.getCore(cd.getName()); if (core == null) { return; } try { if (core.isClosed()) { return; } } finally { core.close(); } } String collection = cd.getCloudDescriptor().getCollectionName(); log.info("publishing core={} state={} collection={}", cd.getName(), state, collection); //System.out.println(Thread.currentThread().getStackTrace()[3]); Integer numShards = cd.getCloudDescriptor().getNumShards(); if (numShards == null) { //XXX sys prop hack log.info("numShards not found on descriptor - reading it from system property"); numShards = Integer.getInteger(ZkStateReader.NUM_SHARDS_PROP); } assert collection != null && collection.length() > 0; String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); //assert cd.getCloudDescriptor().getShardId() != null; ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "state", ZkStateReader.STATE_PROP, state, ZkStateReader.BASE_URL_PROP, getBaseUrl(), ZkStateReader.CORE_NAME_PROP, cd.getName(), ZkStateReader.ROLES_PROP, cd.getCloudDescriptor().getRoles(), ZkStateReader.NODE_NAME_PROP, getNodeName(), ZkStateReader.SHARD_ID_PROP, cd.getCloudDescriptor().getShardId(), ZkStateReader.COLLECTION_PROP, collection, ZkStateReader.NUM_SHARDS_PROP, numShards != null ? numShards.toString() : null, ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName != null ? coreNodeName : null); if (updateLastState) { cd.getCloudDescriptor().lastPublished = state; } overseerJobQueue.offer(ZkStateReader.toJSON(m)); } private boolean needsToBeAssignedShardId(final CoreDescriptor desc, final ClusterState state, final String coreNodeName) { final CloudDescriptor cloudDesc = desc.getCloudDescriptor(); final String shardId = state.getShardId(getNodeName(), desc.getName()); if (shardId != null) { cloudDesc.setShardId(shardId); return false; } return true; } public void unregister(String coreName, CoreDescriptor cd) throws InterruptedException, KeeperException { final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); final String collection = cd.getCloudDescriptor().getCollectionName(); assert collection != null; if (collection == null || collection.trim().length() == 0) { log.error("No collection was specified."); return; } ElectionContext context = electionContexts.remove(new ContextKey(collection, coreNodeName)); if (context != null) { context.cancelElection(); } CloudDescriptor cloudDescriptor = cd.getCloudDescriptor(); ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, Overseer.DELETECORE, ZkStateReader.CORE_NAME_PROP, coreName, ZkStateReader.NODE_NAME_PROP, getNodeName(), ZkStateReader.COLLECTION_PROP, cloudDescriptor.getCollectionName(), ZkStateReader.CORE_NODE_NAME_PROP, coreNodeName); overseerJobQueue.offer(ZkStateReader.toJSON(m)); } public void createCollection(String collection) throws KeeperException, InterruptedException { ZkNodeProps m = new ZkNodeProps(Overseer.QUEUE_OPERATION, "createcollection", ZkStateReader.NODE_NAME_PROP, getNodeName(), ZkStateReader.COLLECTION_PROP, collection); overseerJobQueue.offer(ZkStateReader.toJSON(m)); } public void uploadToZK(File dir, String zkPath) throws IOException, KeeperException, InterruptedException { uploadToZK(zkClient, dir, zkPath); } public void uploadConfigDir(File dir, String configName) throws IOException, KeeperException, InterruptedException { uploadToZK(zkClient, dir, ZkController.CONFIGS_ZKNODE + "/" + configName); } // convenience for testing void printLayoutToStdOut() throws KeeperException, InterruptedException { zkClient.printLayoutToStdOut(); } public void createCollectionZkNode(CloudDescriptor cd) throws KeeperException, InterruptedException { String collection = cd.getCollectionName(); log.info("Check for collection zkNode:" + collection); String collectionPath = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; try { if(!zkClient.exists(collectionPath, true)) { log.info("Creating collection in ZooKeeper:" + collection); SolrParams params = cd.getParams(); try { Map<String,Object> collectionProps = new HashMap<>(); // TODO: if collection.configName isn't set, and there isn't already a conf in zk, just use that? String defaultConfigName = System.getProperty(COLLECTION_PARAM_PREFIX+CONFIGNAME_PROP, collection); // params passed in - currently only done via core admin (create core commmand). if (params != null) { Iterator<String> iter = params.getParameterNamesIterator(); while (iter.hasNext()) { String paramName = iter.next(); if (paramName.startsWith(COLLECTION_PARAM_PREFIX)) { collectionProps.put(paramName.substring(COLLECTION_PARAM_PREFIX.length()), params.get(paramName)); } } // if the config name wasn't passed in, use the default if (!collectionProps.containsKey(CONFIGNAME_PROP)) { // TODO: getting the configName from the collectionPath should fail since we already know it doesn't exist? getConfName(collection, collectionPath, collectionProps); } } else if(System.getProperty("bootstrap_confdir") != null) { // if we are bootstrapping a collection, default the config for // a new collection to the collection we are bootstrapping log.info("Setting config for collection:" + collection + " to " + defaultConfigName); Properties sysProps = System.getProperties(); for (String sprop : System.getProperties().stringPropertyNames()) { if (sprop.startsWith(COLLECTION_PARAM_PREFIX)) { collectionProps.put(sprop.substring(COLLECTION_PARAM_PREFIX.length()), sysProps.getProperty(sprop)); } } // if the config name wasn't passed in, use the default if (!collectionProps.containsKey(CONFIGNAME_PROP)) collectionProps.put(CONFIGNAME_PROP, defaultConfigName); } else if (Boolean.getBoolean("bootstrap_conf")) { // the conf name should should be the collection name of this core collectionProps.put(CONFIGNAME_PROP, cd.getCollectionName()); } else { getConfName(collection, collectionPath, collectionProps); } collectionProps.remove(ZkStateReader.NUM_SHARDS_PROP); // we don't put numShards in the collections properties ZkNodeProps zkProps = new ZkNodeProps(collectionProps); zkClient.makePath(collectionPath, ZkStateReader.toJSON(zkProps), CreateMode.PERSISTENT, null, true); } catch (KeeperException e) { // its okay if the node already exists if (e.code() != KeeperException.Code.NODEEXISTS) { throw e; } } } else { log.info("Collection zkNode exists"); } } catch (KeeperException e) { // its okay if another beats us creating the node if (e.code() != KeeperException.Code.NODEEXISTS) { throw e; } } } private void getConfName(String collection, String collectionPath, Map<String,Object> collectionProps) throws KeeperException, InterruptedException { // check for configName log.info("Looking for collection configName"); List<String> configNames = null; int retry = 1; int retryLimt = 6; for (; retry < retryLimt; retry++) { if (zkClient.exists(collectionPath, true)) { ZkNodeProps cProps = ZkNodeProps.load(zkClient.getData(collectionPath, null, null, true)); if (cProps.containsKey(CONFIGNAME_PROP)) { break; } } // if there is only one conf, use that try { configNames = zkClient.getChildren(CONFIGS_ZKNODE, null, true); } catch (NoNodeException e) { // just keep trying } if (configNames != null && configNames.size() == 1) { // no config set named, but there is only 1 - use it log.info("Only one config set found in zk - using it:" + configNames.get(0)); collectionProps.put(CONFIGNAME_PROP, configNames.get(0)); break; } if (configNames != null && configNames.contains(collection)) { log.info("Could not find explicit collection configName, but found config name matching collection name - using that set."); collectionProps.put(CONFIGNAME_PROP, collection); break; } log.info("Could not find collection configName - pausing for 3 seconds and trying again - try: " + retry); Thread.sleep(3000); } if (retry == retryLimt) { log.error("Could not find configName for collection " + collection); throw new ZooKeeperException( SolrException.ErrorCode.SERVER_ERROR, "Could not find configName for collection " + collection + " found:" + configNames); } } public ZkStateReader getZkStateReader() { return zkStateReader; } private void doGetShardIdAndNodeNameProcess(CoreDescriptor cd) { final String coreNodeName = cd.getCloudDescriptor().getCoreNodeName(); if (coreNodeName != null) { waitForShardId(cd); } else { // if no explicit coreNodeName, we want to match by base url and core name waitForCoreNodeName(cd); waitForShardId(cd); } } private void waitForCoreNodeName(CoreDescriptor descriptor) { int retryCount = 320; log.info("look for our core node name"); while (retryCount-- > 0) { Map<String,Slice> slicesMap = zkStateReader.getClusterState() .getSlicesMap(descriptor.getCloudDescriptor().getCollectionName()); if (slicesMap != null) { for (Slice slice : slicesMap.values()) { for (Replica replica : slice.getReplicas()) { // TODO: for really large clusters, we could 'index' on this String nodeName = replica.getStr(ZkStateReader.NODE_NAME_PROP); String core = replica.getStr(ZkStateReader.CORE_NAME_PROP); String msgNodeName = getNodeName(); String msgCore = descriptor.getName(); if (msgNodeName.equals(nodeName) && core.equals(msgCore)) { descriptor.getCloudDescriptor() .setCoreNodeName(replica.getName()); return; } } } } try { Thread.sleep(1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } } private void waitForShardId(CoreDescriptor cd) { log.info("waiting to find shard id in clusterstate for " + cd.getName()); int retryCount = 320; while (retryCount-- > 0) { final String shardId = zkStateReader.getClusterState().getShardId(getNodeName(), cd.getName()); if (shardId != null) { cd.getCloudDescriptor().setShardId(shardId); return; } try { Thread.sleep(1000); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } throw new SolrException(ErrorCode.SERVER_ERROR, "Could not get shard id for core: " + cd.getName()); } public static void uploadToZK(SolrZkClient zkClient, File dir, String zkPath) throws IOException, KeeperException, InterruptedException { File[] files = dir.listFiles(); if (files == null) { throw new IllegalArgumentException("Illegal directory: " + dir); } for(File file : files) { if (!file.getName().startsWith(".")) { if (!file.isDirectory()) { zkClient.makePath(zkPath + "/" + file.getName(), file, false, true); } else { uploadToZK(zkClient, file, zkPath + "/" + file.getName()); } } } } public static void downloadFromZK(SolrZkClient zkClient, String zkPath, File dir) throws IOException, KeeperException, InterruptedException { List<String> files = zkClient.getChildren(zkPath, null, true); for (String file : files) { List<String> children = zkClient.getChildren(zkPath + "/" + file, null, true); if (children.size() == 0) { byte[] data = zkClient.getData(zkPath + "/" + file, null, null, true); dir.mkdirs(); log.info("Write file " + new File(dir, file)); FileUtils.writeByteArrayToFile(new File(dir, file), data); } else { downloadFromZK(zkClient, zkPath + "/" + file, new File(dir, file)); } } } public String getCoreNodeName(CoreDescriptor descriptor){ String coreNodeName = descriptor.getCloudDescriptor().getCoreNodeName(); if (coreNodeName == null && !genericCoreNodeNames) { // it's the default return getNodeName() + "_" + descriptor.getName(); } return coreNodeName; } public static void uploadConfigDir(SolrZkClient zkClient, File dir, String configName) throws IOException, KeeperException, InterruptedException { uploadToZK(zkClient, dir, ZkController.CONFIGS_ZKNODE + "/" + configName); } public static void downloadConfigDir(SolrZkClient zkClient, String configName, File dir) throws IOException, KeeperException, InterruptedException { downloadFromZK(zkClient, ZkController.CONFIGS_ZKNODE + "/" + configName, dir); } public void preRegister(CoreDescriptor cd ) { String coreNodeName = getCoreNodeName(cd); // before becoming available, make sure we are not live and active // this also gets us our assigned shard id if it was not specified try { checkStateInZk(cd); CloudDescriptor cloudDesc = cd.getCloudDescriptor(); // make sure the node name is set on the descriptor if (cloudDesc.getCoreNodeName() == null) { cloudDesc.setCoreNodeName(coreNodeName); } publish(cd, ZkStateReader.DOWN, false, true); } catch (KeeperException e) { log.error("", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); log.error("", e); throw new ZooKeeperException(SolrException.ErrorCode.SERVER_ERROR, "", e); } if (cd.getCloudDescriptor().getShardId() == null && needsToBeAssignedShardId(cd, zkStateReader.getClusterState(), coreNodeName)) { doGetShardIdAndNodeNameProcess(cd); } else { // still wait till we see us in local state doGetShardIdAndNodeNameProcess(cd); } } private void checkStateInZk(CoreDescriptor cd) throws InterruptedException { if (!Overseer.isLegacy(zkStateReader.getClusterProps())) { CloudDescriptor cloudDesc = cd.getCloudDescriptor(); String coreNodeName = cloudDesc.getCoreNodeName(); assert coreNodeName != null; if (cloudDesc.getShardId() == null) throw new SolrException(ErrorCode.SERVER_ERROR ,"No shard id for :" + cd); long endTime = System.nanoTime() + TimeUnit.NANOSECONDS.convert(3, TimeUnit.SECONDS); String errMessage= null; for (; System.nanoTime()<endTime; ) { Thread.sleep(100); errMessage = null; Slice slice = zkStateReader.getClusterState().getSlice(cd.getCollectionName(), cloudDesc.getShardId()); if (slice == null) { errMessage = "Invalid slice : " + cloudDesc.getShardId(); continue; } if (slice.getReplica(coreNodeName) != null) return; } if(errMessage == null) errMessage = " no_such_replica in clusterstate ,replicaName : " + coreNodeName; throw new SolrException(ErrorCode.SERVER_ERROR,errMessage + "state : "+ zkStateReader.getClusterState().getCollection(cd.getCollectionName())); } } private ZkCoreNodeProps waitForLeaderToSeeDownState( CoreDescriptor descriptor, final String coreZkNodeName) { CloudDescriptor cloudDesc = descriptor.getCloudDescriptor(); String collection = cloudDesc.getCollectionName(); String shard = cloudDesc.getShardId(); ZkCoreNodeProps leaderProps = null; int retries = 6; for (int i = 0; i < retries; i++) { try { if (isClosed) { throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "We have been closed"); } // go straight to zk, not the cloud state - we must have current info leaderProps = getLeaderProps(collection, shard, 30000); break; } catch (Exception e) { SolrException.log(log, "There was a problem finding the leader in zk", e); try { Thread.sleep(2000); } catch (InterruptedException e1) { Thread.currentThread().interrupt(); } if (i == retries - 1) { throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem finding the leader in zk"); } } } String leaderBaseUrl = leaderProps.getBaseUrl(); String leaderCoreName = leaderProps.getCoreName(); String ourUrl = ZkCoreNodeProps.getCoreUrl(getBaseUrl(), descriptor.getName()); boolean isLeader = leaderProps.getCoreUrl().equals(ourUrl); if (!isLeader && !SKIP_AUTO_RECOVERY) { HttpSolrServer server = null; server = new HttpSolrServer(leaderBaseUrl); try { server.setConnectionTimeout(15000); server.setSoTimeout(120000); WaitForState prepCmd = new WaitForState(); prepCmd.setCoreName(leaderCoreName); prepCmd.setNodeName(getNodeName()); prepCmd.setCoreNodeName(coreZkNodeName); prepCmd.setState(ZkStateReader.DOWN); // let's retry a couple times - perhaps the leader just went down, // or perhaps he is just not quite ready for us yet retries = 6; for (int i = 0; i < retries; i++) { if (isClosed) { throw new SolrException(ErrorCode.SERVICE_UNAVAILABLE, "We have been closed"); } try { server.request(prepCmd); break; } catch (Exception e) { SolrException.log(log, "There was a problem making a request to the leader", e); try { Thread.sleep(2000); } catch (InterruptedException e1) { Thread.currentThread().interrupt(); } if (i == retries - 1) { throw new SolrException(ErrorCode.SERVER_ERROR, "There was a problem making a request to the leader"); } } } } finally { server.shutdown(); } } return leaderProps; } public static void linkConfSet(SolrZkClient zkClient, String collection, String confSetName) throws KeeperException, InterruptedException { String path = ZkStateReader.COLLECTIONS_ZKNODE + "/" + collection; if (log.isInfoEnabled()) { log.info("Load collection config from:" + path); } byte[] data; try { data = zkClient.getData(path, null, null, true); } catch (NoNodeException e) { // if there is no node, we will try and create it // first try to make in case we are pre configuring ZkNodeProps props = new ZkNodeProps(CONFIGNAME_PROP, confSetName); try { zkClient.makePath(path, ZkStateReader.toJSON(props), CreateMode.PERSISTENT, null, true); } catch (KeeperException e2) { // its okay if the node already exists if (e2.code() != KeeperException.Code.NODEEXISTS) { throw e; } // if we fail creating, setdata // TODO: we should consider using version zkClient.setData(path, ZkStateReader.toJSON(props), true); } return; } // we found existing data, let's update it ZkNodeProps props = null; if(data != null) { props = ZkNodeProps.load(data); Map<String,Object> newProps = new HashMap<>(); newProps.putAll(props.getProperties()); newProps.put(CONFIGNAME_PROP, confSetName); props = new ZkNodeProps(newProps); } else { props = new ZkNodeProps(CONFIGNAME_PROP, confSetName); } // TODO: we should consider using version zkClient.setData(path, ZkStateReader.toJSON(props), true); } /** * If in SolrCloud mode, upload config sets for each SolrCore in solr.xml. */ public static void bootstrapConf(SolrZkClient zkClient, CoreContainer cc, String solrHome) throws IOException, KeeperException, InterruptedException { //List<String> allCoreNames = cfg.getAllCoreNames(); List<CoreDescriptor> cds = cc.getCoresLocator().discover(cc); log.info("bootstrapping config for " + cds.size() + " cores into ZooKeeper using solr.xml from " + solrHome); for (CoreDescriptor cd : cds) { String coreName = cd.getName(); String confName = cd.getCollectionName(); if (StringUtils.isEmpty(confName)) confName = coreName; String instanceDir = cd.getInstanceDir(); File udir = new File(instanceDir, "conf"); log.info("Uploading directory " + udir + " with name " + confName + " for SolrCore " + coreName); ZkController.uploadConfigDir(zkClient, udir, confName); } } public DistributedQueue getOverseerJobQueue() { return overseerJobQueue; } public DistributedQueue getOverseerCollectionQueue() { return overseerCollectionQueue; } public DistributedMap getOverseerRunningMap() { return overseerRunningMap; } public DistributedMap getOverseerCompletedMap() { return overseerCompletedMap; } public DistributedMap getOverseerFailureMap() { return overseerFailureMap; } public int getClientTimeout() { return clientTimeout; } public Overseer getOverseer() { return overseer; } public LeaderElector getOverseerElector() { return overseerElector; } /** * Returns the nodeName that should be used based on the specified properties. * * @param hostName - must not be null or the empty string * @param hostPort - must consist only of digits, must not be null or the empty string * @param hostContext - should not begin or end with a slash (leading/trailin slashes will be ignored), must not be null, may be the empty string to denote the root context * @lucene.experimental * @see ZkStateReader#getBaseUrlForNodeName */ static String generateNodeName(final String hostName, final String hostPort, final String hostContext) { try { return hostName + ':' + hostPort + '_' + URLEncoder.encode(trimLeadingAndTrailingSlashes(hostContext), "UTF-8"); } catch (UnsupportedEncodingException e) { throw new IllegalStateException("JVM Does not seem to support UTF-8", e); } } /** * Utility method for trimming and leading and/or trailing slashes from * it's input. May return the empty string. May return null if and only * if the input is null. */ public static String trimLeadingAndTrailingSlashes(final String in) { if (null == in) return in; String out = in; if (out.startsWith("/")) { out = out.substring(1); } if (out.endsWith("/")) { out = out.substring(0,out.length()-1); } return out; } public void rejoinOverseerElection() { try { overseerElector.retryElection(); } catch (Exception e) { throw new SolrException(ErrorCode.SERVER_ERROR, "Unable to rejoin election", e); } } }