/* * Licensed to Crate under one or more contributor license agreements. * See the NOTICE file distributed with this work for additional * information regarding copyright ownership. Crate licenses this file * to you under the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. You may * obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or * implied. See the License for the specific language governing * permissions and limitations under the License. * * However, if you have executed another commercial license agreement * with Crate these terms will supersede the license and you may use the * software solely pursuant to the terms of the relevant commercial * agreement. */ package io.crate.planner.statement; import com.carrotsearch.hppc.cursors.ObjectCursor; import com.google.common.collect.ImmutableList; import com.google.common.collect.Lists; import io.crate.analyze.CopyFromAnalyzedStatement; import io.crate.analyze.CopyToAnalyzedStatement; import io.crate.analyze.symbol.Symbol; import io.crate.metadata.ColumnIdent; import io.crate.metadata.GeneratedReference; import io.crate.metadata.PartitionName; import io.crate.metadata.Reference; import io.crate.metadata.doc.DocSysColumns; import io.crate.metadata.doc.DocTableInfo; import io.crate.operation.projectors.TopN; import io.crate.planner.Merge; import io.crate.planner.Plan; import io.crate.planner.Planner; import io.crate.planner.consumer.ConsumerContext; import io.crate.planner.consumer.FetchMode; import io.crate.planner.node.dql.Collect; import io.crate.planner.node.dql.FileUriCollectPhase; import io.crate.planner.projection.MergeCountProjection; import io.crate.planner.projection.Projection; import io.crate.planner.projection.SourceIndexWriterProjection; import io.crate.planner.projection.WriterProjection; import io.crate.planner.projection.builder.ProjectionBuilder; import org.apache.lucene.util.BytesRef; import org.elasticsearch.cluster.node.DiscoveryNode; import org.elasticsearch.cluster.node.DiscoveryNodes; import org.elasticsearch.cluster.service.ClusterService; import java.util.ArrayList; import java.util.Collection; import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Set; import java.util.function.Predicate; public class CopyStatementPlanner { private final ClusterService clusterService; public CopyStatementPlanner(ClusterService clusterService) { this.clusterService = clusterService; } public Plan planCopyFrom(CopyFromAnalyzedStatement analysis, Planner.Context context) { /* * copy from has two "modes": * * 1: non-partitioned tables or partitioned tables with partition ident --> import into single es index * -> collect raw source and import as is * * 2: partitioned table without partition ident * -> collect document and partition by values * -> exclude partitioned by columns from document * -> insert into es index (partition determined by partition by value) */ DocTableInfo table = analysis.table(); int clusteredByPrimaryKeyIdx = table.primaryKey().indexOf(analysis.table().clusteredBy()); List<String> partitionedByNames; String partitionIdent = null; List<BytesRef> partitionValues; if (analysis.partitionIdent() == null) { if (table.isPartitioned()) { partitionedByNames = Lists.newArrayList( Lists.transform(table.partitionedBy(), ColumnIdent::fqn)); } else { partitionedByNames = Collections.emptyList(); } partitionValues = ImmutableList.of(); } else { assert table.isPartitioned() : "table must be partitioned if partitionIdent is set"; // partitionIdent is present -> possible to index raw source into concrete es index partitionValues = PartitionName.decodeIdent(analysis.partitionIdent()); partitionIdent = analysis.partitionIdent(); partitionedByNames = Collections.emptyList(); } SourceIndexWriterProjection sourceIndexWriterProjection = new SourceIndexWriterProjection( table.ident(), partitionIdent, table.getReference(DocSysColumns.RAW), table.primaryKey(), table.partitionedBy(), partitionValues, table.clusteredBy(), clusteredByPrimaryKeyIdx, analysis.settings(), null, partitionedByNames.size() > 0 ? partitionedByNames.toArray(new String[partitionedByNames.size()]) : null, table.isPartitioned() // autoCreateIndices ); List<Projection> projections = Collections.singletonList(sourceIndexWriterProjection); List<ColumnIdent> primaryKeys = new ArrayList<>(table.primaryKey().size()); List<Symbol> toCollect = new ArrayList<>(); // add primaryKey columns for (ColumnIdent primaryKey : table.primaryKey()) { Reference reference = table.getReference(primaryKey); if (reference instanceof GeneratedReference && table.partitionedByColumns().contains(reference)) { // will track this reference in the partitioned by list (so we can extract its references and // the function expression continue; } toCollect.add(reference); primaryKeys.add(primaryKey); } partitionedByNames.removeAll(Lists.transform(primaryKeys, ColumnIdent::fqn)); // add partitioned columns (if not part of primaryKey) Set<Reference> referencedReferences = new HashSet<>(); for (String partitionedColumn : partitionedByNames) { Reference reference = table.getReference(ColumnIdent.fromPath(partitionedColumn)); Symbol symbol; if (reference instanceof GeneratedReference) { symbol = ((GeneratedReference) reference).generatedExpression(); referencedReferences.addAll(((GeneratedReference) reference).referencedReferences()); } else { symbol = reference; } toCollect.add(symbol); } // add clusteredBy column (if not part of primaryKey) if (clusteredByPrimaryKeyIdx == -1 && table.clusteredBy() != null && !DocSysColumns.ID.equals(table.clusteredBy())) { toCollect.add(table.getReference(table.clusteredBy())); } // add _raw or _doc if (table.isPartitioned() && analysis.partitionIdent() == null) { toCollect.add(table.getReference(DocSysColumns.DOC)); } else { toCollect.add(table.getReference(DocSysColumns.RAW)); } // add columns referenced by generated columns which are used as partitioned by column for (Reference reference : referencedReferences) { if (!toCollect.contains(reference)) { toCollect.add(reference); } } DiscoveryNodes allNodes = clusterService.state().nodes(); FileUriCollectPhase collectPhase = new FileUriCollectPhase( context.jobId(), context.nextExecutionPhaseId(), "copyFrom", getExecutionNodes(allNodes, analysis.settings().getAsInt("num_readers", allNodes.getSize()), analysis.nodePredicate()), analysis.uri(), toCollect, projections, analysis.settings().get("compression", null), analysis.settings().getAsBoolean("shared", null) ); Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, 1, null); return Merge.ensureOnHandler(collect, context, Collections.singletonList(MergeCountProjection.INSTANCE)); } public Plan planCopyTo(CopyToAnalyzedStatement statement, Planner.Context context) { WriterProjection.OutputFormat outputFormat = statement.outputFormat(); if (outputFormat == null) { outputFormat = statement.columnsDefined() ? WriterProjection.OutputFormat.JSON_ARRAY : WriterProjection.OutputFormat.JSON_OBJECT; } WriterProjection projection = ProjectionBuilder.writerProjection( statement.subQueryRelation().querySpec().outputs(), statement.uri(), statement.compressionType(), statement.overwrites(), statement.outputNames(), outputFormat); ConsumerContext consumerContext = new ConsumerContext(context); consumerContext.setFetchMode(FetchMode.NEVER); Plan plan = context.planSubRelation(statement.subQueryRelation(), consumerContext); if (plan == null) { return null; } plan.addProjection(projection, null, null, null); return Merge.ensureOnHandler(plan, context, Collections.singletonList(MergeCountProjection.INSTANCE)); } private static Collection<String> getExecutionNodes(DiscoveryNodes allNodes, int maxNodes, final Predicate<DiscoveryNode> nodeFilters) { int counter = maxNodes; final List<String> nodes = new ArrayList<>(allNodes.getSize()); for (ObjectCursor<DiscoveryNode> cursor : allNodes.getDataNodes().values()) { if (nodeFilters.test(cursor.value) && counter-- > 0) { nodes.add(cursor.value.getId()); } } return nodes; } }