/* * Copyright (c) 2017 Strapdata (http://www.strapdata.com) * Contains some code from Elasticsearch (http://www.elastic.co) * * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.elassandra.indices; import java.util.ArrayList; import java.util.List; import java.util.concurrent.ConcurrentMap; import org.elassandra.cluster.routing.AbstractSearchStrategy; import org.elassandra.gateway.CassandraGatewayService; import org.elasticsearch.cluster.ClusterChangedEvent; import org.elasticsearch.cluster.ClusterService; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.ClusterStateListener; import org.elasticsearch.cluster.metadata.IndexMetaData; import org.elasticsearch.cluster.metadata.IndexMetaData.State; import org.elasticsearch.cluster.metadata.MappingMetaData; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.routing.IndexRoutingTable; import org.elasticsearch.cluster.routing.ShardRouting; import org.elasticsearch.cluster.routing.ShardRoutingState; import org.elasticsearch.common.Nullable; import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.component.AbstractLifecycleComponent; import org.elasticsearch.common.compress.CompressedXContent; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.util.concurrent.ConcurrentCollections; import org.elasticsearch.index.IndexService; import org.elasticsearch.index.IndexShardAlreadyExistsException; import org.elasticsearch.index.aliases.IndexAliasesService; import org.elasticsearch.index.engine.Engine; import org.elasticsearch.index.mapper.DocumentMapper; import org.elasticsearch.index.mapper.MapperService; import org.elasticsearch.index.mapper.MapperService.MergeReason; import org.elasticsearch.index.settings.IndexSettingsService; import org.elasticsearch.index.shard.IndexShard; import org.elasticsearch.index.shard.ShardId; import org.elasticsearch.index.shard.ShardNotFoundException; import org.elasticsearch.indices.IndicesService; import org.elasticsearch.threadpool.ThreadPool; import com.carrotsearch.hppc.cursors.ObjectCursor; /** * Pre-applied cluster state listener */ public class CassandraIndicesClusterStateService extends AbstractLifecycleComponent<CassandraIndicesClusterStateService> implements ClusterStateListener { private final IndicesService indicesService; private final ClusterService clusterService; private final ThreadPool threadPool; // a map of mappings type we have seen per index due to cluster state // we need this so we won't remove types automatically created as part of the indexing process private final ConcurrentMap<Tuple<String, String>, Boolean> seenMappings = ConcurrentCollections.newConcurrentMap(); private final Object mutex = new Object(); private final FailedEngineHandler failedEngineHandler = new FailedEngineHandler(); @Inject public CassandraIndicesClusterStateService(Settings settings, IndicesService indicesService, ClusterService clusterService, ThreadPool threadPool) { super(settings); this.indicesService = indicesService; this.clusterService = clusterService; this.threadPool = threadPool; } @Override protected void doStart() { clusterService.addFirst(this); } @Override protected void doStop() { clusterService.remove(this); } @Override protected void doClose() { } @Override public void clusterChanged(final ClusterChangedEvent event) { if (!indicesService.changesAllowed()) { return; } if (!lifecycle.started()) { return; } synchronized (mutex) { // we need to clean the shards and indices we have on this node, since we // are going to recover them again once state persistence is disabled (no master / not recovered) // TODO: this feels a bit hacky here, a block disables state persistence, and then we clean the allocated shards, maybe another flag in blocks? if (event.state().blocks().disableStatePersistence() && !event.state().blocks().hasGlobalBlock(CassandraGatewayService.NO_CASSANDRA_RING_BLOCK.id())) { for (IndexService indexService : indicesService) { String index = indexService.index().getName(); for (Integer shardId : indexService.shardIds()) { logger.debug("[{}][{}] removing shard (disabled block persistence)", index, shardId); try { indexService.removeShard(shardId, "removing shard (disabled block persistence)"); } catch (Throwable e) { logger.warn("[{}] failed to remove shard (disabled block persistence)", e, index); } } removeIndex(index, "cleaning index (disabled block persistence)"); } return; } applyDeletedOrCloseIndices(event); applyNewIndices(event); applyMappings(event); applyAliases(event); applySettings(event); } } private void applyDeletedOrCloseIndices(final ClusterChangedEvent event) { final ClusterState previousState = event.previousState(); final String localNodeId = event.state().nodes().localNodeId(); assert localNodeId != null; DiscoveryNode localNode = event.state().nodes().localNode(); for (ObjectCursor<String> cursor : previousState.metaData().indices().keys()) { String index = cursor.value; if (!event.state().metaData().hasIndex(index)) { // delete index removed from metadata if (logger.isDebugEnabled()) logger.debug("[{}] cleaning index, no longer part of the metadata", index); final IndexService idxService = indicesService.indexService(index); if (idxService != null) { // delete open index deleteIndex(index, "index no longer part of the metadata"); } else { // delete closed index final IndexMetaData prevIndexMetaData = previousState.metaData().index(index); assert prevIndexMetaData != null; if (localNode.isMasterNode() || localNode.isDataNode()) { indicesService.deleteClosedIndex("closed index no longer part of the metadata", prevIndexMetaData, event.state()); } } } } // cleanup removed or closed indices for (IndexService indexService : indicesService) { String index = indexService.index().name(); IndexMetaData indexMetaData = event.state().metaData().index(index); if (indexMetaData != null) { if (!indexMetaData.isSameUUID(indexService.indexUUID())) { logger.warn("[{}] mismatch on index UUIDs between cluster state and local state, cleaning the index so it will be recreated", indexMetaData.getIndex()); deleteIndex(indexMetaData.getIndex(), "mismatch on index UUIDs between cluster state and local state, cleaning the index so it will be recreated"); } else if (indexMetaData.getState() == IndexMetaData.State.CLOSE) { try { logger.debug("[{}] removing shards (index is closed)", index); indexService.close("removing shard (index is closed)", false); } catch (Throwable e) { logger.warn("[{}] failed to remove shard (index is closed)", e, index); } if (indexService.shardIds().isEmpty()) { if (logger.isDebugEnabled()) { logger.debug("[{}] cleaning index (no shards allocated)", index); } // clean the index removeIndex(index, "removing index (no shards allocated)"); } } } else { if (logger.isDebugEnabled()) { logger.debug("[{}] cleaning index, no longer part of the metadata", index); } final IndexService idxService = indicesService.indexService(index); if (idxService != null) { deleteIndex(index, "index no longer part of the metadata"); } else { final IndexMetaData metaData = previousState.metaData().index(index); assert metaData != null; indicesService.deleteClosedIndex("closed index no longer part of the metadata", metaData, event.state()); } } } } private void applyNewIndices(final ClusterChangedEvent event) { for (IndexMetaData indexMetaData : event.state().metaData()) { IndexService indexService = indicesService.indexService(indexMetaData.getIndex()); if (indexService == null && indexMetaData.getState() == State.OPEN) { if (logger.isDebugEnabled()) { logger.debug("[{}] creating index", indexMetaData.getIndex()); } try { indexService = indicesService.createIndex(indexMetaData.getIndex(), indexMetaData.getSettings(), event.state().nodes().localNode().id()); } catch (Throwable e) { if (logger.isWarnEnabled()) { logger.warn("[{}][{}] failed to create index", indexMetaData.getIndex(), indexMetaData.getIndexUUID()); } } } if (indexService != null && indexService.shard(0) == null && indexMetaData.getState() == State.OPEN) { try { if (logger.isDebugEnabled()) { logger.debug("[{}][{}] creating shard INITIALIZING", indexMetaData.getIndex(), 0); } ShardRouting shardRouting = new ShardRouting(indexMetaData.getIndex(), 0, clusterService.localNode().id(), true, ShardRoutingState.INITIALIZING, event.state().metaData().version(), IndexRoutingTable.UNASSIGNED_INFO_INDEX_CREATED, AbstractSearchStrategy.EMPTY_RANGE_TOKEN_LIST); IndexShard indexShard = indexService.createShard(shardRouting); indexShard.shardRouting(shardRouting); indexShard.addFailedEngineListener(failedEngineHandler); this.clusterService.recoverShard(indexMetaData.getIndex()); // post-applied shard recovery by CassandraIndicesClusterStateService } catch (IndexShardAlreadyExistsException e) { // ignore this, the method call can happen several times } catch (Throwable e) { logger.error("Unexpected error", e); failAndRemoveShard(indexService, true, "failed to create shard", e); } } } } private void applySettings(ClusterChangedEvent event) { if (!event.metaDataChanged()) { return; } for (IndexMetaData indexMetaData : event.state().metaData()) { if (!indicesService.hasIndex(indexMetaData.getIndex())) { // we only create / update here continue; } // if the index meta data didn't change, no need check for refreshed settings if (!event.indexMetaDataChanged(indexMetaData)) { continue; } String index = indexMetaData.getIndex(); IndexService indexService = indicesService.indexService(index); if (indexService == null) { // already deleted on us, ignore it continue; } IndexSettingsService indexSettingsService = indexService.injector().getInstance(IndexSettingsService.class); indexSettingsService.refreshSettings(indexMetaData.getSettings()); } } private void applyMappings(ClusterChangedEvent event) { // go over and update mappings for (IndexMetaData indexMetaData : event.state().metaData()) { if (!indicesService.hasIndex(indexMetaData.getIndex())) { // we only create / update here continue; } List<String> typesToRefresh = new ArrayList<>(); String index = indexMetaData.getIndex(); IndexService indexService = indicesService.indexService(index); if (indexService == null) { // got deleted on us, ignore (closing the node) return; } try { MapperService mapperService = indexService.mapperService(); // first, go over and update the _default_ mapping (if exists) if (indexMetaData.getMappings().containsKey(MapperService.DEFAULT_MAPPING)) { boolean requireRefresh = processMapping(index, mapperService, MapperService.DEFAULT_MAPPING, indexMetaData.mapping(MapperService.DEFAULT_MAPPING).source()); if (requireRefresh) { typesToRefresh.add(MapperService.DEFAULT_MAPPING); } } // go over and add the relevant mappings (or update them) for (ObjectCursor<MappingMetaData> cursor : indexMetaData.getMappings().values()) { MappingMetaData mappingMd = cursor.value; String mappingType = mappingMd.type(); CompressedXContent mappingSource = mappingMd.source(); if (mappingType.equals(MapperService.DEFAULT_MAPPING)) { // we processed _default_ first continue; } boolean requireRefresh = processMapping(index, mapperService, mappingType, mappingSource); if (requireRefresh) { typesToRefresh.add(mappingType); } } } catch (Throwable t) { // if we failed the mappings anywhere, we need to fail the shards for this index, note, we safeguard // by creating the processing the mappings on the master, or on the node the mapping was introduced on, // so this failure typically means wrong node level configuration or something similar failAndRemoveShard(indexService, true, "failed to update mappings", t); } } } private boolean processMapping(String index, MapperService mapperService, String mappingType, CompressedXContent mappingSource) throws Throwable { if (!seenMappings.containsKey(new Tuple<>(index, mappingType))) { seenMappings.put(new Tuple<>(index, mappingType), true); } // refresh mapping can happen for 2 reasons. The first is less urgent, and happens when the mapping on this // node is ahead of what there is in the cluster state (yet an update-mapping has been sent to it already, // it just hasn't been processed yet and published). Eventually, the mappings will converge, and the refresh // mapping sent is more of a safe keeping (assuming the update mapping failed to reach the master, ...) // the second case is where the parsing/merging of the mapping from the metadata doesn't result in the same // mapping, in this case, we send to the master to refresh its own version of the mappings (to conform with the // merge version of it, which it does when refreshing the mappings), and warn log it. boolean requiresRefresh = false; try { if (!mapperService.hasMapping(mappingType)) { if (logger.isDebugEnabled() && mappingSource.compressed().length < 512) { logger.debug("[{}] adding mapping [{}], source [{}]", index, mappingType, mappingSource.string()); } else if (logger.isTraceEnabled()) { logger.trace("[{}] adding mapping [{}], source [{}]", index, mappingType, mappingSource.string()); } else { logger.debug("[{}] adding mapping [{}] (source suppressed due to length, use TRACE level if needed)", index, mappingType); } // we don't apply default, since it has been applied when the mappings were parsed initially mapperService.merge(mappingType, mappingSource, MergeReason.MAPPING_UPDATE, true); if (!mapperService.documentMapper(mappingType).mappingSource().equals(mappingSource)) { logger.debug("[{}] parsed mapping [{}], and got different sources\noriginal:\n{}\nparsed:\n{}", index, mappingType, mappingSource, mapperService.documentMapper(mappingType).mappingSource()); requiresRefresh = true; } } else { DocumentMapper existingMapper = mapperService.documentMapper(mappingType); if (!mappingSource.equals(existingMapper.mappingSource())) { // mapping changed, update it if (logger.isDebugEnabled() && mappingSource.compressed().length < 512) { logger.debug("[{}] updating mapping [{}], source [{}]", index, mappingType, mappingSource.string()); } else if (logger.isTraceEnabled()) { logger.trace("[{}] updating mapping [{}], source [{}]", index, mappingType, mappingSource.string()); } else { logger.debug("[{}] updating mapping [{}] (source suppressed due to length, use TRACE level if needed)", index, mappingType); } // we don't apply default, since it has been applied when the mappings were parsed initially mapperService.merge(mappingType, mappingSource, MergeReason.MAPPING_UPDATE, true); if (!mapperService.documentMapper(mappingType).mappingSource().equals(mappingSource)) { requiresRefresh = true; logger.debug("[{}] parsed mapping [{}], and got different sources\noriginal:\n{}\nparsed:\n{}", index, mappingType, mappingSource, mapperService.documentMapper(mappingType).mappingSource()); } } } } catch (Throwable e) { logger.warn("[{}] failed to add mapping [{}], source [{}]", e, index, mappingType, mappingSource); throw e; } return requiresRefresh; } private void applyAliases(ClusterChangedEvent event) { // check if aliases changed OR is opening for (ObjectCursor<IndexMetaData> cursor : event.previousState().metaData().indices().values()) { IndexMetaData prevIndex = cursor.value; IndexMetaData thisIndex = event.state().metaData().indices().get(prevIndex.getIndex()); if (thisIndex != null) { if (!prevIndex.getAliases().equals(thisIndex.getAliases()) || (prevIndex.getState()==IndexMetaData.State.CLOSE && thisIndex.getState()==IndexMetaData.State.OPEN)) { IndexService indexService = indicesService.indexService(thisIndex.getIndex()); IndexAliasesService indexAliasesService = indexService.aliasesService(); indexAliasesService.setAliases(thisIndex.getAliases()); } } } } private void removeIndex(String index, String reason) { try { indicesService.removeIndex(index, reason); } catch (Throwable e) { logger.warn("failed to clean index ({})", e, reason); } clearSeenMappings(index); } private void clearSeenMappings(String index) { // clear seen mappings as well for (Tuple<String, String> tuple : seenMappings.keySet()) { if (tuple.v1().equals(index)) { seenMappings.remove(tuple); } } } private void deleteIndex(String index, String reason) { try { indicesService.deleteIndex(index, reason); } catch (Throwable e) { logger.warn("failed to delete index ({})", e, reason); } // clear seen mappings as well clearSeenMappings(index); } private void failAndRemoveShard(IndexService indexService, boolean sendShardFailure, String message, @Nullable Throwable failure) { if (indexService.hasShard(0)) { try { indexService.removeShard(0, message); } catch (ShardNotFoundException e) { // the node got closed on us, ignore it } catch (Throwable e1) { logger.warn("[{}][{}] failed to remove shard after failure ([{}])", e1, indexService.index(), 0, message); } } } private class FailedEngineHandler implements Engine.FailedEngineListener { @Override public void onFailedEngine(final ShardId shardId, final String reason, final @Nullable Throwable failure) { ShardRouting shardRouting = null; final IndexService indexService = indicesService.indexService(shardId.index().name()); if (indexService != null) { IndexShard indexShard = indexService.shard(shardId.id()); if (indexShard != null) { shardRouting = indexShard.routingEntry(); } } if (shardRouting == null) { logger.warn("[{}][{}] engine failed, but can't find index shard. failure reason: [{}]", failure, shardId.index().name(), shardId.id(), reason); return; } threadPool.generic().execute(new Runnable() { @Override public void run() { synchronized (mutex) { failAndRemoveShard(indexService, true, "engine failure, reason [" + reason + "]", failure); } } }); } } }