/**
* diqube: Distributed Query Base.
*
* Copyright (C) 2015 Bastian Gloeckle
*
* This file is part of diqube.
*
* diqube is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package org.diqube.executionenv;
import java.util.List;
import java.util.Map;
import org.diqube.data.column.ColumnShard;
import org.diqube.data.column.ColumnType;
import org.diqube.data.column.ConstantColumnShard;
import org.diqube.data.column.StandardColumnShard;
import org.diqube.data.table.TableShard;
import org.diqube.data.types.dbl.DoubleColumnShard;
import org.diqube.data.types.lng.LongColumnShard;
import org.diqube.data.types.str.StringColumnShard;
import org.diqube.executionenv.cache.ColumnShardCache;
import org.diqube.executionenv.querystats.QueryableColumnShard;
import org.diqube.executionenv.resolver.QueryableDoubleColumnShardResolver;
import org.diqube.executionenv.resolver.QueryableLongColumnShardResolver;
import org.diqube.executionenv.resolver.QueryableStringColumnShardResolver;
/**
* The environment of an execution, which holds for example temporary data produced by some {@link ExecutablePlanStep}s
* so other steps can fetch it from here.
*
* <p>
* This is used both on the Query Master node and on each Cluster node when executing a specific query. Note that on the
* query master, there is no backing {@link TableShard} for this execution, as the query master itself does not need to
* have any actual information about the Table/TableShard the query executes on.
*
* <p>
* During execution of the {@link ExecutablePlanStep}s, there is usually one "default execution environment" (or
* "defaultEnv") which is passed on to most steps in their constructor. This defaultEnv will contain those columns and
* values that are final, meaning which will not be changed any more. On the other hand, on the query master there are
* multiple {@link VersionedExecutionEnvironment} in place during execution, which enables the query master to not only
* execute steps as soon as the input (or intermediary) columns are fully built, but also to execute the steps on
* intermediary versions of the columns. As a simple example, this happens when one remote already responded with the
* values of a specific column, but a second remote did not yet. The query master might then decide to build an
* intermediary column out of the results of the first remote and start executing its steps based on that intermediary
* column, in order to produce user-facing results as soon as possible. Note that on remotes, no
* {@link VersionedExecutionEnvironment} will be used.
*
* <p>
* An {@link ExecutionEnvironment} might optionally be based on a {@link ColumnShardCache}, which will be the case on
* query remotes. Note that such an {@link ExecutionEnvironment} will not only load existing column shards from a
* backing {@link TableShard}, but also from the cache. As such cached {@link ColumnShard}s may be evicted from the
* cache at any time, though, the {@link ExecutionEnvironment} will add such a cached column to the "temporary columns"
* of the {@link ExecutionEnvironment} itself as soon as the column is fetched from the cache. With that procedure, the
* {@link ExecutionEnvironment} can guarantee that a specific column that was once "visible" to the
* {@link #getColumnShard(String)} methods (and similar) will be available throughout the execution of a whole query
* (=until the {@link ExecutionEnvironment} is invalidated). At the same time, the cache will be based on the temporary
* columns that are available in the {@link ExecutionEnvironment} of a query after its execution is complete - so cached
* columns will be presented again to the cache if they have been loaded into a {@link ExecutionEnvironment}. This
* allows the cache then to count the usages of specific {@link ColumnShard}s and allows to tune the cache.
*
* @author Bastian Gloeckle
*/
public interface ExecutionEnvironment
extends QueryableDoubleColumnShardResolver, QueryableLongColumnShardResolver, QueryableStringColumnShardResolver {
/**
* @return the {@link ColumnType} of a column that can be fetched with {@link #getColumnShard(String)},
* {@link #getLongColumnShard(String)}, {@link #getStringColumnShard(String)},
* {@link #getDoubleColumnShard(String)}, {@link #getPureConstantColumnShard(String)} or
* {@link #getPureStandardColumnShard(String)}.
*/
public ColumnType getColumnType(String colName);
/**
* Returns a {@link QueryableColumnShard} for a specific column (no matter what data type the corresponding column
* has).
*
* <p>
* That column shard can either be a temporary one or a "real" one from a {@link TableShard}.
*
* <p>
* Note that this method might actually return a different instance each time called, but when a column for a name was
* returned once, there will be data available until this {@link ExecutionEnvironment} is at its end of life.
*
* @return A {@link QueryableColumnShard} for the column with the given name or <code>null</code> if it does not
* exist.
*/
public QueryableColumnShard getColumnShard(String name);
/**
* Get the "real" (non-facaded) {@link StandardColumnShard} of a specific column.
*
* <p>
* That column shard can either be a temporary one or a "real" one from a {@link TableShard}.
*
* <p>
* Note that this method might actually return a different instance each time called, but when a column for a name was
* returned once, there will be data available until this {@link ExecutionEnvironment} is at its end of life.
*
* @return A {@link StandardColumnShard} for the column or <code>null</code> if the column not exists or if it is no
* {@link StandardColumnShard}.
*/
public StandardColumnShard getPureStandardColumnShard(String name);
/**
* Get the "real" (non-facaded) {@link ConstantColumnShard} of a specific column.
*
* <p>
* That column shard can either be a temporary one or a "real" one from a {@link TableShard}.
*
* <p>
* Note that this method might actually return a different instance each time called, but when a column for a name was
* returned once, there will be data available until this {@link ExecutionEnvironment} is at its end of life.
*
* @return A {@link ConstantColumnShard} for the column or <code>null</code> if the column not exists or if it is no
* {@link ConstantColumnShard}.
*/
public ConstantColumnShard getPureConstantColumnShard(String name);
/**
* @return <code>true</code> if the given column is a temporary one, <code>false</code> if it is a real column present
* in a {@link TableShard}.
*/
public boolean isTemporaryColumn(String colName);
/**
* Returns a map from colName to a list of {@link QueryableColumnShard}s for all temporary columns.
*
* <p>
* On the query master we may have several versions of a column (see {@link VersionedExecutionEnvironment}), this
* method returns all versions of all columns, the last entry in the list being the newest version. Note that the
* actual column shard objects might be different instances with each call.
*
* <p>
* This method will <b>not</b> return cached columns which have not been requested at least once using
* {@link #getColumnShard(String)} etc. It will <b>not</b> load any other cached column shards into this
* ExecutionEnvironment in order to make sure that the column stays available.
*
* <p>
* This method <b>will</b> return all column shards that were loaded from a cache because of a call to
* {@link #getColumnShard(String)} etc.
*/
public Map<String, List<QueryableColumnShard>> getAllTemporaryColumnShards();
/**
* Returns a map from colName to a list of {@link QueryableColumnShard}s for all non-temporary columns.
*
* On the query master we may have several versions of a column (see {@link VersionedExecutionEnvironment}), this
* method returns all versions of all columns, the last entry in the list being the newest version. Note that the
* actual column shard objects might be different instances with each call.
*/
public Map<String, QueryableColumnShard> getAllNonTemporaryColumnShards();
/**
* Store a new temporary {@link LongColumnShard} in this {@link ExecutionEnvironment}.
*/
public void storeTemporaryLongColumnShard(LongColumnShard column);
/**
* Store a new temporary {@link StringColumnShard} in this {@link ExecutionEnvironment}.
*/
public void storeTemporaryStringColumnShard(StringColumnShard column);
/**
* Store a new temporary {@link DoubleColumnShard} in this {@link ExecutionEnvironment}.
*/
public void storeTemporaryDoubleColumnShard(DoubleColumnShard column);
/**
* @return The overall lowest rowID of all columns of this {@link ExecutionEnvironment}.
*/
public long getFirstRowIdInShard();
/**
* @return -1 if unknown, which typically happens on the query master, as it does not have a backing
* {@link TableShard}.
*/
public long getLastRowIdInShard();
/**
* @return -1 if unknown, which typically happens on the query master, as it does not have a backing
* {@link TableShard}.
*/
public long getNumberOfRowsInShard();
}