/* * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ package org.elasticsearch.index.reindex; import org.apache.http.Header; import org.apache.http.HttpHost; import org.apache.http.auth.AuthScope; import org.apache.http.auth.UsernamePasswordCredentials; import org.apache.http.client.CredentialsProvider; import org.apache.http.impl.client.BasicCredentialsProvider; import org.apache.http.impl.nio.reactor.IOReactorConfig; import org.apache.http.message.BasicHeader; import org.apache.logging.log4j.Logger; import org.apache.lucene.util.automaton.Automata; import org.apache.lucene.util.automaton.Automaton; import org.apache.lucene.util.automaton.CharacterRunAutomaton; import org.apache.lucene.util.automaton.MinimizationOperations; import org.apache.lucene.util.automaton.Operations; import org.elasticsearch.action.ActionListener; import org.elasticsearch.action.ActionRequestValidationException; import org.elasticsearch.action.bulk.BackoffPolicy; import org.elasticsearch.action.bulk.BulkItemResponse.Failure; import org.elasticsearch.index.reindex.ScrollableHitSource.SearchFailure; import org.elasticsearch.action.index.IndexRequest; import org.elasticsearch.action.search.SearchRequest; import org.elasticsearch.action.support.ActionFilters; import org.elasticsearch.action.support.AutoCreateIndex; import org.elasticsearch.action.support.HandledTransportAction; import org.elasticsearch.client.Client; import org.elasticsearch.client.ParentTaskAssigningClient; import org.elasticsearch.client.RestClient; import org.elasticsearch.cluster.ClusterState; import org.elasticsearch.cluster.metadata.IndexNameExpressionResolver; import org.elasticsearch.cluster.service.ClusterService; import org.elasticsearch.common.Strings; import org.elasticsearch.common.inject.Inject; import org.elasticsearch.common.lucene.uid.Versions; import org.elasticsearch.common.regex.Regex; import org.elasticsearch.common.settings.Setting; import org.elasticsearch.common.settings.Setting.Property; import org.elasticsearch.common.settings.Settings; import org.elasticsearch.common.xcontent.NamedXContentRegistry; import org.elasticsearch.common.xcontent.XContentBuilder; import org.elasticsearch.common.xcontent.XContentParser; import org.elasticsearch.common.xcontent.XContentType; import org.elasticsearch.index.VersionType; import org.elasticsearch.index.mapper.VersionFieldMapper; import org.elasticsearch.index.reindex.remote.RemoteScrollableHitSource; import org.elasticsearch.script.Script; import org.elasticsearch.script.ScriptService; import org.elasticsearch.tasks.Task; import org.elasticsearch.threadpool.ThreadPool; import org.elasticsearch.transport.TransportService; import java.io.IOException; import java.io.UncheckedIOException; import java.util.ArrayList; import java.util.List; import java.util.Map; import java.util.Objects; import java.util.concurrent.atomic.AtomicInteger; import java.util.function.BiFunction; import java.util.function.Function; import static java.util.Collections.emptyList; import static java.util.Collections.synchronizedList; import static java.util.Objects.requireNonNull; import static org.elasticsearch.index.VersionType.INTERNAL; public class TransportReindexAction extends HandledTransportAction<ReindexRequest, BulkByScrollResponse> { public static final Setting<List<String>> REMOTE_CLUSTER_WHITELIST = Setting.listSetting("reindex.remote.whitelist", emptyList(), Function.identity(), Property.NodeScope); private final ClusterService clusterService; private final ScriptService scriptService; private final AutoCreateIndex autoCreateIndex; private final Client client; private final CharacterRunAutomaton remoteWhitelist; @Inject public TransportReindexAction(Settings settings, ThreadPool threadPool, ActionFilters actionFilters, IndexNameExpressionResolver indexNameExpressionResolver, ClusterService clusterService, ScriptService scriptService, AutoCreateIndex autoCreateIndex, Client client, TransportService transportService) { super(settings, ReindexAction.NAME, threadPool, transportService, actionFilters, indexNameExpressionResolver, ReindexRequest::new); this.clusterService = clusterService; this.scriptService = scriptService; this.autoCreateIndex = autoCreateIndex; this.client = client; remoteWhitelist = buildRemoteWhitelist(REMOTE_CLUSTER_WHITELIST.get(settings)); } @Override protected void doExecute(Task task, ReindexRequest request, ActionListener<BulkByScrollResponse> listener) { if (request.getSlices() > 1) { BulkByScrollParallelizationHelper.startSlices(client, taskManager, ReindexAction.INSTANCE, clusterService.localNode().getId(), (ParentBulkByScrollTask) task, request, listener); } else { checkRemoteWhitelist(remoteWhitelist, request.getRemoteInfo()); ClusterState state = clusterService.state(); validateAgainstAliases(request.getSearchRequest(), request.getDestination(), request.getRemoteInfo(), indexNameExpressionResolver, autoCreateIndex, state); ParentTaskAssigningClient client = new ParentTaskAssigningClient(this.client, clusterService.localNode(), task); new AsyncIndexBySearchAction((WorkingBulkByScrollTask) task, logger, client, threadPool, request, scriptService, state, listener).start(); } } @Override protected void doExecute(ReindexRequest request, ActionListener<BulkByScrollResponse> listener) { throw new UnsupportedOperationException("task required"); } static void checkRemoteWhitelist(CharacterRunAutomaton whitelist, RemoteInfo remoteInfo) { if (remoteInfo == null) { return; } String check = remoteInfo.getHost() + ':' + remoteInfo.getPort(); if (whitelist.run(check)) { return; } throw new IllegalArgumentException('[' + check + "] not whitelisted in " + REMOTE_CLUSTER_WHITELIST.getKey()); } /** * Build the {@link CharacterRunAutomaton} that represents the reindex-from-remote whitelist and make sure that it doesn't whitelist * the world. */ static CharacterRunAutomaton buildRemoteWhitelist(List<String> whitelist) { if (whitelist.isEmpty()) { return new CharacterRunAutomaton(Automata.makeEmpty()); } Automaton automaton = Regex.simpleMatchToAutomaton(whitelist.toArray(Strings.EMPTY_ARRAY)); automaton = MinimizationOperations.minimize(automaton, Operations.DEFAULT_MAX_DETERMINIZED_STATES); if (Operations.isTotal(automaton)) { throw new IllegalArgumentException("Refusing to start because whitelist " + whitelist + " accepts all addresses. " + "This would allow users to reindex-from-remote any URL they like effectively having Elasticsearch make HTTP GETs " + "for them."); } return new CharacterRunAutomaton(automaton); } /** * Throws an ActionRequestValidationException if the request tries to index * back into the same index or into an index that points to two indexes. * This cannot be done during request validation because the cluster state * isn't available then. Package private for testing. */ static void validateAgainstAliases(SearchRequest source, IndexRequest destination, RemoteInfo remoteInfo, IndexNameExpressionResolver indexNameExpressionResolver, AutoCreateIndex autoCreateIndex, ClusterState clusterState) { if (remoteInfo != null) { return; } String target = destination.index(); if (false == autoCreateIndex.shouldAutoCreate(target, clusterState)) { /* * If we're going to autocreate the index we don't need to resolve * it. This is the same sort of dance that TransportIndexRequest * uses to decide to autocreate the index. */ target = indexNameExpressionResolver.concreteIndexNames(clusterState, destination)[0]; } for (String sourceIndex : indexNameExpressionResolver.concreteIndexNames(clusterState, source)) { if (sourceIndex.equals(target)) { ActionRequestValidationException e = new ActionRequestValidationException(); e.addValidationError("reindex cannot write into an index its reading from [" + target + ']'); throw e; } } } /** * Build the {@link RestClient} used for reindexing from remote clusters. * @param remoteInfo connection information for the remote cluster * @param taskId the id of the current task. This is added to the thread name for easier tracking * @param threadCollector a list in which we collect all the threads created by the client */ static RestClient buildRestClient(RemoteInfo remoteInfo, long taskId, List<Thread> threadCollector) { Header[] clientHeaders = new Header[remoteInfo.getHeaders().size()]; int i = 0; for (Map.Entry<String, String> header : remoteInfo.getHeaders().entrySet()) { clientHeaders[i] = new BasicHeader(header.getKey(), header.getValue()); } return RestClient.builder(new HttpHost(remoteInfo.getHost(), remoteInfo.getPort(), remoteInfo.getScheme())) .setDefaultHeaders(clientHeaders) .setRequestConfigCallback(c -> { c.setConnectTimeout(Math.toIntExact(remoteInfo.getConnectTimeout().millis())); c.setSocketTimeout(Math.toIntExact(remoteInfo.getSocketTimeout().millis())); return c; }) .setHttpClientConfigCallback(c -> { // Enable basic auth if it is configured if (remoteInfo.getUsername() != null) { UsernamePasswordCredentials creds = new UsernamePasswordCredentials(remoteInfo.getUsername(), remoteInfo.getPassword()); CredentialsProvider credentialsProvider = new BasicCredentialsProvider(); credentialsProvider.setCredentials(AuthScope.ANY, creds); c.setDefaultCredentialsProvider(credentialsProvider); } // Stick the task id in the thread name so we can track down tasks from stack traces AtomicInteger threads = new AtomicInteger(); c.setThreadFactory(r -> { String name = "es-client-" + taskId + "-" + threads.getAndIncrement(); Thread t = new Thread(r, name); threadCollector.add(t); return t; }); // Limit ourselves to one reactor thread because for now the search process is single threaded. c.setDefaultIOReactorConfig(IOReactorConfig.custom().setIoThreadCount(1).build()); return c; }).build(); } /** * Simple implementation of reindex using scrolling and bulk. There are tons * of optimizations that can be done on certain types of reindex requests * but this makes no attempt to do any of them so it can be as simple * possible. */ static class AsyncIndexBySearchAction extends AbstractAsyncBulkByScrollAction<ReindexRequest> { /** * List of threads created by this process. Usually actions don't create threads in Elasticsearch. Instead they use the builtin * {@link ThreadPool}s. But reindex-from-remote uses Elasticsearch's {@link RestClient} which doesn't use the * {@linkplain ThreadPool}s because it uses httpasyncclient. It'd be a ton of trouble to work around creating those threads. So * instead we let it create threads but we watch them carefully and assert that they are dead when the process is over. */ private List<Thread> createdThreads = emptyList(); AsyncIndexBySearchAction(WorkingBulkByScrollTask task, Logger logger, ParentTaskAssigningClient client, ThreadPool threadPool, ReindexRequest request, ScriptService scriptService, ClusterState clusterState, ActionListener<BulkByScrollResponse> listener) { this(task, logger, client, threadPool, request, scriptService, clusterState, listener, client.settings()); } AsyncIndexBySearchAction(WorkingBulkByScrollTask task, Logger logger, ParentTaskAssigningClient client, ThreadPool threadPool, ReindexRequest request, ScriptService scriptService, ClusterState clusterState, ActionListener<BulkByScrollResponse> listener, Settings settings) { super(task, logger, client, threadPool, request, scriptService, clusterState, listener, settings); } @Override protected boolean needsSourceDocumentVersions() { /* * We only need the source version if we're going to use it when write and we only do that when the destination request uses * external versioning. */ return mainRequest.getDestination().versionType() != VersionType.INTERNAL; } @Override protected ScrollableHitSource buildScrollableResultSource(BackoffPolicy backoffPolicy) { if (mainRequest.getRemoteInfo() != null) { RemoteInfo remoteInfo = mainRequest.getRemoteInfo(); createdThreads = synchronizedList(new ArrayList<>()); RestClient restClient = buildRestClient(remoteInfo, task.getId(), createdThreads); return new RemoteScrollableHitSource(logger, backoffPolicy, threadPool, task::countSearchRetry, this::finishHim, restClient, remoteInfo.getQuery(), mainRequest.getSearchRequest()); } return super.buildScrollableResultSource(backoffPolicy); } @Override protected void finishHim(Exception failure, List<Failure> indexingFailures, List<SearchFailure> searchFailures, boolean timedOut) { super.finishHim(failure, indexingFailures, searchFailures, timedOut); // A little extra paranoia so we log something if we leave any threads running for (Thread thread : createdThreads) { if (thread.isAlive()) { assert false: "Failed to properly stop client thread [" + thread.getName() + "]"; logger.error("Failed to properly stop client thread [{}]", thread.getName()); } } } @Override public BiFunction<RequestWrapper<?>, ScrollableHitSource.Hit, RequestWrapper<?>> buildScriptApplier() { Script script = mainRequest.getScript(); if (script != null) { return new ReindexScriptApplier(task, scriptService, script, script.getParams()); } return super.buildScriptApplier(); } @Override protected RequestWrapper<IndexRequest> buildRequest(ScrollableHitSource.Hit doc) { IndexRequest index = new IndexRequest(); // Copy the index from the request so we always write where it asked to write index.index(mainRequest.getDestination().index()); // If the request override's type then the user wants all documents in that type. Otherwise keep the doc's type. if (mainRequest.getDestination().type() == null) { index.type(doc.getType()); } else { index.type(mainRequest.getDestination().type()); } /* * Internal versioning can just use what we copied from the destination request. Otherwise we assume we're using external * versioning and use the doc's version. */ index.versionType(mainRequest.getDestination().versionType()); if (index.versionType() == INTERNAL) { assert doc.getVersion() == -1 : "fetched version when we didn't have to"; index.version(mainRequest.getDestination().version()); } else { index.version(doc.getVersion()); } // id and source always come from the found doc. Scripts can change them but they operate on the index request. index.id(doc.getId()); // the source xcontent type and destination could be different final XContentType sourceXContentType = doc.getXContentType(); final XContentType mainRequestXContentType = mainRequest.getDestination().getContentType(); if (mainRequestXContentType != null && doc.getXContentType() != mainRequestXContentType) { // we need to convert try (XContentParser parser = sourceXContentType.xContent().createParser(NamedXContentRegistry.EMPTY, doc.getSource()); XContentBuilder builder = XContentBuilder.builder(mainRequestXContentType.xContent())) { parser.nextToken(); builder.copyCurrentStructure(parser); index.source(builder.bytes(), builder.contentType()); } catch (IOException e) { throw new UncheckedIOException("failed to convert hit from " + sourceXContentType + " to " + mainRequestXContentType, e); } } else { index.source(doc.getSource(), doc.getXContentType()); } /* * The rest of the index request just has to be copied from the template. It may be changed later from scripts or the superclass * here on out operates on the index request rather than the template. */ index.routing(mainRequest.getDestination().routing()); index.parent(mainRequest.getDestination().parent()); index.setPipeline(mainRequest.getDestination().getPipeline()); // OpType is synthesized from version so it is handled when we copy version above. return wrap(index); } /** * Override the simple copy behavior to allow more fine grained control. */ @Override protected void copyRouting(RequestWrapper<?> request, String routing) { String routingSpec = mainRequest.getDestination().routing(); if (routingSpec == null) { super.copyRouting(request, routing); return; } if (routingSpec.startsWith("=")) { super.copyRouting(request, mainRequest.getDestination().routing().substring(1)); return; } switch (routingSpec) { case "keep": super.copyRouting(request, routing); break; case "discard": super.copyRouting(request, null); break; default: throw new IllegalArgumentException("Unsupported routing command"); } } class ReindexScriptApplier extends ScriptApplier { ReindexScriptApplier(WorkingBulkByScrollTask task, ScriptService scriptService, Script script, Map<String, Object> params) { super(task, scriptService, script, params); } /* * Methods below here handle script updating the index request. They try * to be pretty liberal with regards to types because script are often * dynamically typed. */ @Override protected void scriptChangedIndex(RequestWrapper<?> request, Object to) { requireNonNull(to, "Can't reindex without a destination index!"); request.setIndex(to.toString()); } @Override protected void scriptChangedType(RequestWrapper<?> request, Object to) { requireNonNull(to, "Can't reindex without a destination type!"); request.setType(to.toString()); } @Override protected void scriptChangedId(RequestWrapper<?> request, Object to) { request.setId(Objects.toString(to, null)); } @Override protected void scriptChangedVersion(RequestWrapper<?> request, Object to) { if (to == null) { request.setVersion(Versions.MATCH_ANY); request.setVersionType(INTERNAL); } else { request.setVersion(asLong(to, VersionFieldMapper.NAME)); } } @Override protected void scriptChangedParent(RequestWrapper<?> request, Object to) { // Have to override routing with parent just in case its changed String routing = Objects.toString(to, null); request.setParent(routing); request.setRouting(routing); } @Override protected void scriptChangedRouting(RequestWrapper<?> request, Object to) { request.setRouting(Objects.toString(to, null)); } private long asLong(Object from, String name) { /* * Stuffing a number into the map will have converted it to * some Number. * */ Number fromNumber; try { fromNumber = (Number) from; } catch (ClassCastException e) { throw new IllegalArgumentException(name + " may only be set to an int or a long but was [" + from + "]", e); } long l = fromNumber.longValue(); // Check that we didn't round when we fetched the value. if (fromNumber.doubleValue() != l) { throw new IllegalArgumentException(name + " may only be set to an int or a long but was [" + from + "]"); } return l; } } } }