/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.plan.planner;
import java.util.List;
import org.diqube.diql.request.FunctionRequest;
import org.diqube.executionenv.ExecutionEnvironment;
import org.diqube.remote.cluster.thrift.RExecutionPlanStep;
import org.diqube.remote.cluster.thrift.RExecutionPlanStepType;
/**
* A {@link ColumnManager} manages the columns that need to be available in the {@link ExecutionEnvironment} on a
* specific type of node while executing a query.
*
* @author Bastian Gloeckle
*/
public interface ColumnManager<T> {
/**
* Create steps that build a column by executing the function of the given {@link FunctionRequest}.
*/
public void produceColumn(FunctionRequest functionRequest);
/**
* Ensures that the data of a specific column is available on the type of node.
*
* <p>
* This is usually only interesting for the Query master: Calling this method makes sure that all cluster nodes select
* the actual values of this column (for the active row IDs) and forward it to the query master. This is needed e.g.
* if the query master needs to evaluate an ORDER or a HAVING clause and needs the data of that column not only for
* the TableShard the query master might have himself, but for all selected rowIDs.
*
* <p>
* Calling this method will ensure that not only the actual values are resolved by the cluster nodes and forwarded to
* the query master, but also that the query master will build an actual temporary column out of these values (i.e.
* make it available in the {@link ExecutionEnvironment}).
*/
public void ensureColumnAvailable(String colName);
/**
* If the given column name is available and is up for being created on the type of cluster node, then wire the
* targetStep to the step building the column: The targetStep will be informed as soon as the column is actually built
* and it will not execute before that happens.
*/
public void wireOutputOfColumnIfAvailable(String colName, T targetStep);
/**
* As soon as a source step for groupings is available, this method should be called in order to wire any
* column-creating steps (that have been created by calling {@link #produceColumn(FunctionRequest)}) are wired to that
* group step. Please note that the type of group step might be different for each implementing class: a cluster node
* (=remote) implementation might need a {@link RExecutionPlanStep} with type {@link RExecutionPlanStepType#GROUP} (=
* the step that cretaes the actual groupings), whereas a query master implementation might need a step that provides
* already intermediate-aggregated results of any group aggregation functions.
*/
public void wireGroupInput(T groupStep);
/**
* Prepare the call to {@link #build()} and execute any side effects to other data structures that need to be
* executed. See implementing classes for descriptions and when this actually has to be called.
*/
public void prepareBuild();
/**
* Execute final wiring of the steps created using this {@link ColumnManager}, build and return them.
*
* <p>
* Please note that implementing classes might need to be provided additional steps, see their JavaDoc.
*/
public List<T> build();
/**
* Checks if {@link #produceColumn(FunctionRequest)} was called for the given colName - meaning that the corresponding
* column will be created.
*/
public boolean isColumnProduced(String colName);
}