/**
* Copyright (C) 2014-2016 LinkedIn Corp. (pinot-core@linkedin.com)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.linkedin.pinot.tools.admin.command;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.linkedin.pinot.common.config.AbstractTableConfig;
import com.linkedin.pinot.common.utils.CommonConstants;
import com.linkedin.pinot.common.utils.helix.HelixHelper;
import com.linkedin.pinot.common.utils.retry.RetryPolicies;
import com.linkedin.pinot.tools.Command;
import com.linkedin.pinot.tools.PinotZKChanger;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.file.Files;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import javax.annotation.Nullable;
import org.apache.helix.PropertyPathConfig;
import org.apache.helix.PropertyType;
import org.apache.helix.ZNRecord;
import org.apache.helix.manager.zk.ZKHelixAdmin;
import org.apache.helix.manager.zk.ZNRecordSerializer;
import org.apache.helix.model.IdealState;
import org.apache.helix.store.zk.ZkHelixPropertyStore;
import org.json.JSONException;
import org.kohsuke.args4j.CmdLineException;
import org.kohsuke.args4j.CmdLineParser;
import org.kohsuke.args4j.Option;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Command to migrate a subset of replica group from current servers to the provided destination servers.
* This command is intended to be run multiple times to migrate all the replicas of a table to the destination
* servers (if intended).
*/
public class MoveReplicaGroup extends AbstractBaseAdminCommand implements Command {
private static Logger LOGGER = LoggerFactory.getLogger(MoveReplicaGroup.class);
@Option(name = "-srcHosts", aliases = {"-s", "--src"}, required = true, metaVar = "<filePath or csv hostnames>",
usage = "File with names of source hosts or csv list of hostnames")
private String srcHosts;
@Option(name = "-destHostsFile", aliases = {"-d", "--dest"}, required = false, metaVar = "<File Path>",
usage = "File with destination servers list")
private String destHostsFile = "";
@Option(name = "-tableName", aliases = {"-t", "-table"}, required = true, metaVar = "<string>",
usage = "Table name. Supports only OFFLINE table (type is optional)")
private String tableName;
@Option(name = "-maxSegmentsToMove", aliases = {"-m", "--max"}, required = false, metaVar = "<int>", usage = "MaxSegmentsToMove")
private int maxSegmentsToMove = Integer.MAX_VALUE;
@Option(name = "-zkHost", aliases = {"--zk", "-z"}, required = true, metaVar = "<string>", usage = "Zookeeper host:port")
private String zkHost;
@Option(name = "-zkPath", aliases = {"--cluster", "-c"}, required = true, metaVar = "<string>", usage = "Zookeeper cluster path(Ex: /pinot")
private String zkPath;
@Option(name = "-exec", required = false, metaVar = "<boolean>", usage = "Execute replica group move. dryRun(default) if not specified")
private boolean exec = false;
@Option(name = "-help", required = false, aliases = { "-h", "--h", "--help"}, metaVar = "<boolean>", usage = "Prints help")
private boolean help = false;
private ZKHelixAdmin helix;
private PinotZKChanger zkChanger;
@Override
public boolean getHelp() {
return help;
}
@Override
public String getName() {
return "MoveReplicaGroup";
}
public String description() {
return "Move complete set of segment replica from source servers to tagged servers in cluster";
}
public String toString() {
String retString = "MoveReplicaGroup -srcHosts " + srcHosts + " -tableName " +
tableName + " -zkHost " + zkHost + " -zkPath " + zkPath +
(exec ? " -exec" : "");
return retString;
}
@Override
public void cleanup() {
}
public boolean execute()
throws IOException, JSONException, InterruptedException {
validateParams();
zkChanger = new PinotZKChanger(zkHost, zkPath);
this.helix = zkChanger.getHelixAdmin();
if (! isExistingTable(tableName)) {
LOGGER.error("Table {} does not exist", tableName);
}
// expects returned host names to be instance names (Server_<hostName>_<port>)
List<String> srcHostsList = readSourceHosts();
LOGGER.info("Source hosts: {}", srcHostsList);
String serverTenant = getServerTenantName(tableName) + "_OFFLINE";
LOGGER.debug("Using server tenant: {}", serverTenant);
List<String> destinationServers = readDestinationServers();
LOGGER.info("Destination servers: {}", destinationServers);
verifyServerLists(srcHostsList, destinationServers);
Map<String, Map<String, String>> idealStateMap = helix.getResourceIdealState(zkPath, tableName)
.getRecord().getMapFields();
System.out.println("Existing idealstate:");
printIdealState(idealStateMap);
PriorityQueue<SourceSegments> segmentsToMove = getSegmentsToMoveQueue(idealStateMap, srcHostsList);
PriorityQueue<ServerInstance> destinationServerQueue = getDestinationServerQueue(idealStateMap, destinationServers);
Map<String, Map<String, String>> proposedIdealState =
computeNewIdealState(idealStateMap, segmentsToMove, destinationServerQueue, srcHostsList);
System.out.println("Proposed idealstate:");
printIdealState(proposedIdealState);
printDestinationServerCounts(destinationServerQueue);
if (! exec) {
LOGGER.info("Run with -exec to apply this IdealState");
System.exit(0);
}
applyIdealState(proposedIdealState);
zkChanger.waitForStable(tableName);
return true;
}
private List<String> readSourceHosts()
throws IOException {
if (this.srcHosts.isEmpty()) {
LOGGER.error("Source hosts(-s) are required");
System.exit(1);
}
File srcFile = new File(this.srcHosts);
List<String> srcHostsList = null;
if (srcFile.exists()) {
srcHostsList = readHostsFromFile(this.srcHosts);
if (srcHostsList.isEmpty()) {
LOGGER.error("Empty list of servers. Nothing to do");
// this is not process error but most likely usage error
// exiting with status 1 so that scripts can catch this
System.exit(1);
}
} else {
List<String> hosts = Arrays.asList(this.srcHosts.split("\\s*,\\s*"));
srcHostsList = hostNameToInstanceNames(hosts);
}
return srcHostsList;
}
private void printDestinationServerCounts(PriorityQueue<ServerInstance> destinationServerQueue) {
System.out.println("Number of segments per server: ");
for (ServerInstance instance : destinationServerQueue) {
System.out.println(instance.server + " : " + instance.segments);
}
}
private void printIdealState(Map<String, Map<String, String>> idealState)
throws JsonProcessingException {
String output = new ObjectMapper().writerWithDefaultPrettyPrinter().writeValueAsString(idealState);
System.out.println(output);
}
private void applyIdealState(final Map<String, Map<String, String>> proposedIdealState) {
HelixHelper.updateIdealState(zkChanger.getHelixManager(), tableName, new Function<IdealState, IdealState>() {
@Nullable
@Override
public IdealState apply(@Nullable IdealState input) {
Map<String, Map<String, String>> existingMapField = input.getRecord().getMapFields();
for (Map.Entry<String, Map<String, String>> segmentEntry : proposedIdealState.entrySet()) {
existingMapField.put(segmentEntry.getKey(), segmentEntry.getValue());
}
return input;
}
}, RetryPolicies.exponentialBackoffRetryPolicy(5, 500L, 2.0f));
}
private Map<String, Map<String, String>> computeNewIdealState(Map<String, Map<String, String>> idealStateMap,
PriorityQueue<SourceSegments> segmentsToMove, PriorityQueue<ServerInstance> destinationServers,
List<String> srcHostsList) {
Map<String, Map<String, String>> newIdealState = copyIdealState(idealStateMap);
for (int remapCount = 0; remapCount < maxSegmentsToMove && !segmentsToMove.isEmpty(); ++remapCount) {
String segment = segmentsToMove.poll().segment;
Map<String, String> existingMapping = newIdealState.get(segment);
String destinationServer = getDestinationServer(destinationServers, existingMapping);
if (destinationServer == null) {
throw new RuntimeException("No destination server for segment: " + segment);
}
String toRemove = null;
for (Map.Entry<String, String> existingInstanceEntry : existingMapping.entrySet()) {
if (srcHostsList.contains(existingInstanceEntry.getKey())) {
toRemove = existingInstanceEntry.getKey();
break;
}
}
if (toRemove == null) {
throw new RuntimeException("Could not find a source host to remove for segment: " + segment);
}
existingMapping.remove(toRemove);
existingMapping.put(destinationServer, "ONLINE");
}
return newIdealState;
}
private String getDestinationServer(PriorityQueue<ServerInstance> destinationServers, Map<String, String> existingSegmentMapping) {
Preconditions.checkNotNull(destinationServers);
Preconditions.checkArgument(! destinationServers.isEmpty());
List<ServerInstance> removedServers = new ArrayList<>();
String selectedServer = null;
while (!destinationServers.isEmpty()) {
ServerInstance si = destinationServers.poll();
removedServers.add(si);
if (! existingSegmentMapping.containsKey(si.server)) {
selectedServer = si.server;
++si.segments;
break;
}
}
for (ServerInstance removedServer : removedServers) {
destinationServers.add(removedServer);
}
return selectedServer;
}
private Map<String, Map<String, String>> copyIdealState(Map<String, Map<String, String>> idealStateMap) {
Map<String, Map<String, String>> copy = new HashMap<>(idealStateMap);
for (Map.Entry<String, Map<String, String>> segmentEntry : idealStateMap.entrySet()) {
Map<String, String> instanceCopy = new HashMap<>(segmentEntry.getValue().size());
for (Map.Entry<String, String> instanceEntry : segmentEntry.getValue().entrySet()) {
instanceCopy.put(instanceEntry.getKey(), instanceEntry.getValue());
}
copy.put(segmentEntry.getKey(), instanceCopy);
}
return copy;
}
private void verifyServerLists(List<String> srcHosts, List<String> taggedServers) {
for (String srcHost : srcHosts) {
if (taggedServers.contains(srcHost)) {
LOGGER.error("Source host: {} is also present in destination list", srcHost);
LOGGER.error("Refusing to migrate replica group");
System.exit(1);
}
}
// having disabled source hosts in okay since we are moving segments away from source
if (hasDisabledInstances("Destination", taggedServers)) {
LOGGER.error("Destination server list has disabled instances. Retry after correcting input");
System.exit(1);
}
}
private boolean hasDisabledInstances(String logTag, List<String> instances) {
boolean hasDisabled = false;
for (String instance : instances) {
if (! helix.getInstanceConfig(zkPath, instance).getInstanceEnabled()) {
LOGGER.error("{} instance: {} is disabled", logTag, instance);
hasDisabled = true;
}
}
return hasDisabled;
}
private PriorityQueue<ServerInstance> getDestinationServerQueue(Map<String, Map<String, String>> idealStateMap,
List<String> destServers) {
// better to keep map rather than removing elements from heap each time
Map<String, ServerInstance> serverMap = new HashMap<>(destServers.size());
for (String ds : destServers) {
serverMap.put(ds, new ServerInstance(ds, 0));
}
// For existing mapping of destination servers in idealstate, update the segment count
for (Map.Entry<String, Map<String, String>> segmentEntry : idealStateMap.entrySet()) {
for (Map.Entry<String, String> instanceEntry : segmentEntry.getValue().entrySet()) {
String server = instanceEntry.getKey();
ServerInstance instance = serverMap.get(server);
if (instance != null) {
++instance.segments;
}
}
}
PriorityQueue<ServerInstance> destServerQueue = new PriorityQueue<>(destServers.size(),
new Comparator<ServerInstance>() {
@Override
public int compare(ServerInstance o1, ServerInstance o2) {
return o1.segments < o2.segments ? -1 : 1;
}
});
for (Map.Entry<String, ServerInstance> serverEntry : serverMap.entrySet()) {
destServerQueue.add(serverEntry.getValue());
}
return destServerQueue;
}
class SourceSegments {
SourceSegments(String segment, int replicas) {
this.segment = segment;
this.replicaCount = replicas;
}
String segment;
int replicaCount;
}
class ServerInstance{
ServerInstance(String server, int segments) {
this.server = server;
this.segments = segments;
}
String server;
int segments;
}
// this is a priority queue so that we first move those segments which have highest replicasx
// on srcHosts. This can happen if previous run of the program limited the number of segments to move
private PriorityQueue<SourceSegments> getSegmentsToMoveQueue(Map<String, Map<String, String>> idealStateMap,
List<String> srcHosts) {
PriorityQueue<SourceSegments> sourceSegments = new PriorityQueue<>(idealStateMap.keySet().size(),
new Comparator<SourceSegments>() {
@Override
public int compare(SourceSegments s1, SourceSegments s2) {
// arbitrary decision for equals case
return (s1.replicaCount > s2.replicaCount ? -1: 1);
}
});
for (Map.Entry<String, Map<String, String>> segmentEntry : idealStateMap.entrySet()) {
SourceSegments srcSegment = new SourceSegments(segmentEntry.getKey(), 0);
for (Map.Entry<String, String> instanceEntry : segmentEntry.getValue().entrySet()) {
if (srcHosts.contains(instanceEntry.getKey())) {
++srcSegment.replicaCount;
}
}
if (srcSegment.replicaCount > 0) {
sourceSegments.add(srcSegment);
}
}
return sourceSegments;
}
private void validateParams() {
if (tableName == null || tableName.isEmpty()) {
LOGGER.error("Table name is required and can not be empty");
System.exit(1);
}
if (tableName.endsWith(CommonConstants.Helix.TableType.REALTIME.toString())) {
LOGGER.error("This operation is not supported for realtime table. table: {}", tableName);
System.exit(1);
}
tableName = tableName.endsWith(CommonConstants.Helix.TableType.OFFLINE.toString()) ?
tableName : tableName + "_OFFLINE";
if (zkHost.isEmpty() || zkPath.isEmpty()) {
LOGGER.error("zkHost or zkPath should not be empty");
System.exit(1);
}
if (zkPath.startsWith("/")) {
zkPath = zkPath.substring(1);
}
String[] hostSplits = zkHost.split("/");
String[] pathSplits = zkPath.split("/");
if (hostSplits.length == 1 || (hostSplits.length == 2 && hostSplits[1].isEmpty())) {
zkHost = hostSplits[0] + "/" + pathSplits[0];
zkPath = Joiner.on("/").join(Arrays.copyOfRange(pathSplits, 1, pathSplits.length));
}
LOGGER.info("Using zkHost: {}, zkPath: {}", zkHost, zkPath);
}
private String getServerTenantName(String tableName)
throws IOException, JSONException {
return getTableConfig(tableName).getTenantConfig().getServer();
}
private AbstractTableConfig getTableConfig(String tableName)
throws IOException, JSONException {
ZNRecordSerializer serializer = new ZNRecordSerializer();
String path = PropertyPathConfig.getPath(PropertyType.PROPERTYSTORE, zkPath);
ZkHelixPropertyStore<ZNRecord> propertyStore = new ZkHelixPropertyStore<>(zkHost, serializer, path);
ZNRecord tcZnRecord = propertyStore.get("/CONFIGS/TABLE/" + tableName, null, 0);
AbstractTableConfig tableConfig = AbstractTableConfig.fromZnRecord(tcZnRecord);
LOGGER.debug("Loaded table config");
return tableConfig;
}
private boolean isExistingTable(String tableName) {
return helix.getResourcesInCluster(zkPath).contains(tableName);
}
private List<String> readDestinationServers()
throws IOException, JSONException {
if (destHostsFile.isEmpty()) {
String serverTenant = getServerTenantName(tableName) + "_OFFLINE";
LOGGER.debug("Using server tenant: {}", serverTenant);
return HelixHelper.getEnabledInstancesWithTag(helix, zkPath, serverTenant);
} else {
return readHostsFromFile(destHostsFile);
}
}
private List<String> readHostsFromFile(String filename)
throws IOException {
List<String> hosts = Files.readAllLines(Paths.get(filename), Charset.defaultCharset());
return hostNameToInstanceNames(hosts);
}
private List<String> hostNameToInstanceNames(List<String> hosts) {
List<String> srcHosts = new ArrayList<>(hosts.size());
for (String host : hosts) {
if (host.isEmpty()) {
continue;
}
String server = host.split("_").length == 1 ? "Server_" + host + "_8001" : host;
srcHosts.add(server);
}
return srcHosts;
}
public static void main(String[] args)
throws Exception {
MoveReplicaGroup mrg = new MoveReplicaGroup();
CmdLineParser parser = new CmdLineParser(mrg);
try {
parser.parseArgument(args);
} catch (CmdLineException e) {
LOGGER.error("Failed to parse arguments: {}", e);
parser.printUsage(System.err);
System.exit(1);
}
mrg.execute();
}
}