/* * Licensed to ElasticSearch and Shay Banon under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information * regarding copyright ownership. ElasticSearch licenses this * file to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.cluster.routing.allocation.decider; import com.google.common.collect.Maps; import gnu.trove.map.hash.TObjectIntHashMap; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.MetaData; import org.elasticsearch.cluster.routing.MutableShardRouting; import org.elasticsearch.cluster.routing.RoutingNode; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.allocation.RoutingAllocation; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.ImmutableSettings; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.node.settings.NodeSettingsService; import java.util.HashMap; import java.util.Map; import static org.elasticsearch.common.settings.ImmutableSettings.Builder.EMPTY_SETTINGS; /** */ public class AwarenessAllocationDecider extends ServerAllocationDecider { static { MetaData.addDynamicSettings( "cluster.routing.allocation.awareness.attributes", "cluster.routing.allocation.awareness.force.*" ); } class ApplySettings implements NodeSettingsService.Listener { @Override public void onRefreshSettings(Settings settings) { String[] awarenessAttributes = settings.getAsArray("cluster.routing.allocation.awareness.attributes", null); if (awarenessAttributes != null) { logger.info("updating [cluster.routing.allocation.awareness.attributes] from [{}] to [{}]", AwarenessAllocationDecider.this.awarenessAttributes, awarenessAttributes); AwarenessAllocationDecider.this.awarenessAttributes = awarenessAttributes; } Map<String, String[]> forcedAwarenessAttributes = new HashMap<String, String[]>(AwarenessAllocationDecider.this.forcedAwarenessAttributes); Map<String, Settings> forceGroups = settings.getGroups("cluster.routing.allocation.awareness.force."); if (!forceGroups.isEmpty()) { for (Map.Entry<String, Settings> entry : forceGroups.entrySet()) { String[] aValues = entry.getValue().getAsArray("values"); if (aValues.length > 0) { forcedAwarenessAttributes.put(entry.getKey(), aValues); } } } AwarenessAllocationDecider.this.forcedAwarenessAttributes = forcedAwarenessAttributes; } } private String[] awarenessAttributes; private Map<String, String[]> forcedAwarenessAttributes; public AwarenessAllocationDecider() { this(EMPTY_SETTINGS); } public AwarenessAllocationDecider(Settings settings) { this(settings, new NodeSettingsService(settings)); } @Inject public AwarenessAllocationDecider(Settings settings, NodeSettingsService nodeSettingsService) { super(settings); this.awarenessAttributes = settings.getAsArray("cluster.routing.allocation.awareness.attributes"); forcedAwarenessAttributes = Maps.newHashMap(); Map<String, Settings> forceGroups = settings.getGroups("cluster.routing.allocation.awareness.force."); for (Map.Entry<String, Settings> entry : forceGroups.entrySet()) { String[] aValues = entry.getValue().getAsArray("values"); if (aValues.length > 0) { forcedAwarenessAttributes.put(entry.getKey(), aValues); } } nodeSettingsService.addListener(new ApplySettings()); } public String[] awarenessAttributes() { return this.awarenessAttributes; } @Override public Decision canAllocate(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return underCapacity(shardRouting, node, allocation, true) ? Decision.YES : Decision.NO; } @Override public boolean canRemain(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation) { return underCapacity(shardRouting, node, allocation, false); } private boolean underCapacity(ShardRouting shardRouting, RoutingNode node, RoutingAllocation allocation, boolean moveToNode) { if (awarenessAttributes.length == 0) { return true; } IndexMetaData indexMetaData = allocation.metaData().index(shardRouting.index()); int shardCount = indexMetaData.numberOfReplicas() + 1; // 1 for primary for (String awarenessAttribute : awarenessAttributes) { // the node the shard exists on must be associated with an awareness attribute if (!node.node().attributes().containsKey(awarenessAttribute)) { return false; } // build attr_value -> nodes map TObjectIntHashMap<String> nodesPerAttribute = allocation.routingNodes().nodesPerAttributesCounts(awarenessAttribute); // build the count of shards per attribute value TObjectIntHashMap<String> shardPerAttribute = new TObjectIntHashMap<String>(); for (RoutingNode routingNode : allocation.routingNodes()) { for (int i = 0; i < routingNode.shards().size(); i++) { MutableShardRouting nodeShardRouting = routingNode.shards().get(i); if (nodeShardRouting.shardId().equals(shardRouting.shardId())) { // if the shard is relocating, then make sure we count it as part of the node it is relocating to if (nodeShardRouting.relocating()) { RoutingNode relocationNode = allocation.routingNodes().node(nodeShardRouting.relocatingNodeId()); shardPerAttribute.adjustOrPutValue(relocationNode.node().attributes().get(awarenessAttribute), 1, 1); } else if (nodeShardRouting.started()) { shardPerAttribute.adjustOrPutValue(routingNode.node().attributes().get(awarenessAttribute), 1, 1); } } } } if (moveToNode) { if (shardRouting.assignedToNode()) { String nodeId = shardRouting.relocating() ? shardRouting.relocatingNodeId() : shardRouting.currentNodeId(); if (!node.nodeId().equals(nodeId)) { // we work on different nodes, move counts around shardPerAttribute.adjustOrPutValue(allocation.routingNodes().node(nodeId).node().attributes().get(awarenessAttribute), -1, 0); shardPerAttribute.adjustOrPutValue(node.node().attributes().get(awarenessAttribute), 1, 1); } } else { shardPerAttribute.adjustOrPutValue(node.node().attributes().get(awarenessAttribute), 1, 1); } } int numberOfAttributes = nodesPerAttribute.size(); String[] fullValues = forcedAwarenessAttributes.get(awarenessAttribute); if (fullValues != null) { for (String fullValue : fullValues) { if (!shardPerAttribute.contains(fullValue)) { numberOfAttributes++; } } } // TODO should we remove ones that are not part of full list? int averagePerAttribute = shardCount / numberOfAttributes; int totalLeftover = shardCount % numberOfAttributes; int requiredCountPerAttribute; if (averagePerAttribute == 0) { // if we have more attributes values than shard count, no leftover totalLeftover = 0; requiredCountPerAttribute = 1; } else { requiredCountPerAttribute = averagePerAttribute; } int leftoverPerAttribute = totalLeftover == 0 ? 0 : 1; int currentNodeCount = shardPerAttribute.get(node.node().attributes().get(awarenessAttribute)); // if we are above with leftover, then we know we are not good, even with mod if (currentNodeCount > (requiredCountPerAttribute + leftoverPerAttribute)) { return false; } // all is well, we are below or same as average if (currentNodeCount <= requiredCountPerAttribute) { continue; } } return true; } }