CopyStatementPlanner.java example

Explorer
crate-master
/*
 * Licensed to Crate under one or more contributor license agreements.
 * See the NOTICE file distributed with this work for additional
 * information regarding copyright ownership.  Crate licenses this file
 * to you under the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.  You may
 * obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
 * implied.  See the License for the specific language governing
 * permissions and limitations under the License.
 *
 * However, if you have executed another commercial license agreement
 * with Crate these terms will supersede the license and you may use the
 * software solely pursuant to the terms of the relevant commercial
 * agreement.
 */

package io.crate.planner.statement;

import com.carrotsearch.hppc.cursors.ObjectCursor;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
import io.crate.analyze.CopyFromAnalyzedStatement;
import io.crate.analyze.CopyToAnalyzedStatement;
import io.crate.analyze.symbol.Symbol;
import io.crate.metadata.ColumnIdent;
import io.crate.metadata.GeneratedReference;
import io.crate.metadata.PartitionName;
import io.crate.metadata.Reference;
import io.crate.metadata.doc.DocSysColumns;
import io.crate.metadata.doc.DocTableInfo;
import io.crate.operation.projectors.TopN;
import io.crate.planner.Merge;
import io.crate.planner.Plan;
import io.crate.planner.Planner;
import io.crate.planner.consumer.ConsumerContext;
import io.crate.planner.consumer.FetchMode;
import io.crate.planner.node.dql.Collect;
import io.crate.planner.node.dql.FileUriCollectPhase;
import io.crate.planner.projection.MergeCountProjection;
import io.crate.planner.projection.Projection;
import io.crate.planner.projection.SourceIndexWriterProjection;
import io.crate.planner.projection.WriterProjection;
import io.crate.planner.projection.builder.ProjectionBuilder;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.cluster.node.DiscoveryNode;
import org.elasticsearch.cluster.node.DiscoveryNodes;
import org.elasticsearch.cluster.service.ClusterService;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.function.Predicate;

public class CopyStatementPlanner {

    private final ClusterService clusterService;

    public CopyStatementPlanner(ClusterService clusterService) {
        this.clusterService = clusterService;
    }

    public Plan planCopyFrom(CopyFromAnalyzedStatement analysis, Planner.Context context) {
        /*
         * copy from has two "modes":
         *
         * 1: non-partitioned tables or partitioned tables with partition ident --> import into single es index
         *    -> collect raw source and import as is
         *
         * 2: partitioned table without partition ident
         *    -> collect document and partition by values
         *    -> exclude partitioned by columns from document
         *    -> insert into es index (partition determined by partition by value)
         */

        DocTableInfo table = analysis.table();
        int clusteredByPrimaryKeyIdx = table.primaryKey().indexOf(analysis.table().clusteredBy());
        List<String> partitionedByNames;
        String partitionIdent = null;

        List<BytesRef> partitionValues;
        if (analysis.partitionIdent() == null) {

            if (table.isPartitioned()) {
                partitionedByNames = Lists.newArrayList(
                    Lists.transform(table.partitionedBy(), ColumnIdent::fqn));
            } else {
                partitionedByNames = Collections.emptyList();
            }
            partitionValues = ImmutableList.of();
        } else {
            assert table.isPartitioned() : "table must be partitioned if partitionIdent is set";
            // partitionIdent is present -> possible to index raw source into concrete es index

            partitionValues = PartitionName.decodeIdent(analysis.partitionIdent());
            partitionIdent = analysis.partitionIdent();
            partitionedByNames = Collections.emptyList();
        }

        SourceIndexWriterProjection sourceIndexWriterProjection = new SourceIndexWriterProjection(
            table.ident(),
            partitionIdent,
            table.getReference(DocSysColumns.RAW),
            table.primaryKey(),
            table.partitionedBy(),
            partitionValues,
            table.clusteredBy(),
            clusteredByPrimaryKeyIdx,
            analysis.settings(),
            null,
            partitionedByNames.size() >
            0 ? partitionedByNames.toArray(new String[partitionedByNames.size()]) : null,
            table.isPartitioned() // autoCreateIndices
        );
        List<Projection> projections = Collections.singletonList(sourceIndexWriterProjection);

        List<ColumnIdent> primaryKeys = new ArrayList<>(table.primaryKey().size());
        List<Symbol> toCollect = new ArrayList<>();
        // add primaryKey columns
        for (ColumnIdent primaryKey : table.primaryKey()) {
            Reference reference = table.getReference(primaryKey);
            if (reference instanceof GeneratedReference && table.partitionedByColumns().contains(reference)) {
                // will track this reference in the partitioned by list (so we can extract its references and
                // the function expression
                continue;
            }
            toCollect.add(reference);
            primaryKeys.add(primaryKey);
        }
        partitionedByNames.removeAll(Lists.transform(primaryKeys, ColumnIdent::fqn));

        // add partitioned columns (if not part of primaryKey)
        Set<Reference> referencedReferences = new HashSet<>();
        for (String partitionedColumn : partitionedByNames) {
            Reference reference = table.getReference(ColumnIdent.fromPath(partitionedColumn));
            Symbol symbol;
            if (reference instanceof GeneratedReference) {
                symbol = ((GeneratedReference) reference).generatedExpression();
                referencedReferences.addAll(((GeneratedReference) reference).referencedReferences());
            } else {
                symbol = reference;
            }
            toCollect.add(symbol);
        }
        // add clusteredBy column (if not part of primaryKey)
        if (clusteredByPrimaryKeyIdx == -1 && table.clusteredBy() != null &&
            !DocSysColumns.ID.equals(table.clusteredBy())) {
            toCollect.add(table.getReference(table.clusteredBy()));
        }
        // add _raw or _doc
        if (table.isPartitioned() && analysis.partitionIdent() == null) {
            toCollect.add(table.getReference(DocSysColumns.DOC));
        } else {
            toCollect.add(table.getReference(DocSysColumns.RAW));
        }

        // add columns referenced by generated columns which are used as partitioned by column
        for (Reference reference : referencedReferences) {
            if (!toCollect.contains(reference)) {
                toCollect.add(reference);
            }
        }

        DiscoveryNodes allNodes = clusterService.state().nodes();
        FileUriCollectPhase collectPhase = new FileUriCollectPhase(
            context.jobId(),
            context.nextExecutionPhaseId(),
            "copyFrom",
            getExecutionNodes(allNodes, analysis.settings().getAsInt("num_readers", allNodes.getSize()), analysis.nodePredicate()),
            analysis.uri(),
            toCollect,
            projections,
            analysis.settings().get("compression", null),
            analysis.settings().getAsBoolean("shared", null)
        );

        Collect collect = new Collect(collectPhase, TopN.NO_LIMIT, 0, 1, 1, null);
        return Merge.ensureOnHandler(collect, context, Collections.singletonList(MergeCountProjection.INSTANCE));
    }

    public Plan planCopyTo(CopyToAnalyzedStatement statement, Planner.Context context) {
        WriterProjection.OutputFormat outputFormat = statement.outputFormat();
        if (outputFormat == null) {
            outputFormat = statement.columnsDefined() ?
                WriterProjection.OutputFormat.JSON_ARRAY : WriterProjection.OutputFormat.JSON_OBJECT;
        }

        WriterProjection projection = ProjectionBuilder.writerProjection(
            statement.subQueryRelation().querySpec().outputs(),
            statement.uri(),
            statement.compressionType(),
            statement.overwrites(),
            statement.outputNames(),
            outputFormat);

        ConsumerContext consumerContext = new ConsumerContext(context);
        consumerContext.setFetchMode(FetchMode.NEVER);
        Plan plan = context.planSubRelation(statement.subQueryRelation(), consumerContext);
        if (plan == null) {
            return null;
        }
        plan.addProjection(projection, null, null, null);
        return Merge.ensureOnHandler(plan, context, Collections.singletonList(MergeCountProjection.INSTANCE));
    }

    private static Collection<String> getExecutionNodes(DiscoveryNodes allNodes,
                                                        int maxNodes,
                                                        final Predicate<DiscoveryNode> nodeFilters) {
        int counter = maxNodes;
        final List<String> nodes = new ArrayList<>(allNodes.getSize());
        for (ObjectCursor<DiscoveryNode> cursor : allNodes.getDataNodes().values()) {
            if (nodeFilters.test(cursor.value) && counter-- > 0) {
                nodes.add(cursor.value.getId());
            }
        }
        return nodes;
    }
}