/* This file is part of VoltDB.
* Copyright (C) 2008-2017 VoltDB Inc.
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with VoltDB. If not, see <http://www.gnu.org/licenses/>.
*/
package org.voltdb.plannodes;
import java.util.Collection;
import java.util.List;
import java.util.Set;
import org.json_voltpatches.JSONException;
import org.json_voltpatches.JSONObject;
import org.json_voltpatches.JSONStringer;
import org.voltdb.catalog.Database;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AbstractSubqueryExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.types.ExpressionType;
import org.voltdb.types.JoinType;
import org.voltdb.types.PlanNodeType;
import org.voltdb.types.SortDirectionType;
public abstract class AbstractJoinPlanNode extends AbstractPlanNode implements IndexSortablePlanNode {
public enum Members {
SORT_DIRECTION,
JOIN_TYPE,
PRE_JOIN_PREDICATE,
JOIN_PREDICATE,
WHERE_PREDICATE,
OUTPUT_SCHEMA_PRE_AGG;
}
protected JoinType m_joinType = JoinType.INNER;
// sortDirection is only used in handleOrderBy(),
// and the sortDirection used in EE is from inlined IndexScan node for NLIJ
protected SortDirectionType m_sortDirection = SortDirectionType.INVALID;
protected AbstractExpression m_preJoinPredicate = null;
protected AbstractExpression m_joinPredicate = null;
protected AbstractExpression m_wherePredicate = null;
protected NodeSchema m_outputSchemaPreInlineAgg = null;
private final IndexUseForOrderBy m_indexUse = new IndexUseForOrderBy();
protected AbstractJoinPlanNode() {
super();
}
@Override
public void validate() throws Exception {
super.validate();
if (m_preJoinPredicate != null) {
m_preJoinPredicate.validate();
}
if (m_joinPredicate != null) {
m_joinPredicate.validate();
}
if (m_wherePredicate != null) {
m_wherePredicate.validate();
}
}
/**
* @return the join_type
*/
public JoinType getJoinType() {
return m_joinType;
}
/**
* @param join_type the join_type to set
*/
public void setJoinType(JoinType join_type) {
m_joinType = join_type;
}
/**
* @return the pre join predicate
*/
public AbstractExpression getPreJoinPredicate() {
return m_preJoinPredicate;
}
/**
* @return the join predicate
*/
public AbstractExpression getJoinPredicate() {
return m_joinPredicate;
}
/**
* @return the where predicate
*/
public AbstractExpression getWherePredicate() {
return m_wherePredicate;
}
/**
* @param predicate the where predicate to set
*/
public void setWherePredicate(AbstractExpression predicate)
{
if (predicate != null) {
m_wherePredicate = predicate.clone();
} else {
m_wherePredicate = null;
}
}
/**
* @param predicate the join predicate to set
*/
public void setPreJoinPredicate(AbstractExpression predicate)
{
if (predicate != null) {
m_preJoinPredicate = predicate.clone();
} else {
m_preJoinPredicate = null;
}
}
/**
* @param predicate the join predicate to set
*/
public void setJoinPredicate(AbstractExpression predicate)
{
if (predicate != null) {
m_joinPredicate = predicate.clone();
} else {
m_joinPredicate = null;
}
}
@Override
public void generateOutputSchema(Database db)
{
// FUTURE: At some point it would be awesome to further
// cull the columns out of the join to remove columns that were only
// used by scans/joins. I think we can coerce HSQL into provide this
// info relatively easily. --izzy
// Index join will have to override this method.
// Assert and provide functionality for generic join
assert(m_children.size() == 2);
for (AbstractPlanNode child : m_children) {
child.generateOutputSchema(db);
}
// Generate the output schema for subqueries
Collection<AbstractExpression> subqueryExpressions = findAllSubquerySubexpressions();
for (AbstractExpression expr : subqueryExpressions) {
((AbstractSubqueryExpression) expr).generateOutputSchema(db);
}
// Join the schema together to form the output schema
m_outputSchemaPreInlineAgg =
m_children.get(0).getOutputSchema().
join(m_children.get(1).getOutputSchema()).copyAndReplaceWithTVE();
m_hasSignificantOutputSchema = true;
generateRealOutputSchema(db);
}
protected void generateRealOutputSchema(Database db) {
AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(this);
if (aggNode != null) {
// generate its subquery output schema
aggNode.generateOutputSchema(db);
m_outputSchema = aggNode.getOutputSchema().copyAndReplaceWithTVE();
} else {
m_outputSchema = m_outputSchemaPreInlineAgg;
}
}
// Given any non-inlined type of join, this method will resolve the column
// order and TVE indexes for the output SchemaColumns.
@Override
public void resolveColumnIndexes()
{
// First, assert that our topology is sane and then
// recursively resolve all child/inline column indexes
IndexScanPlanNode index_scan =
(IndexScanPlanNode) getInlinePlanNode(PlanNodeType.INDEXSCAN);
assert(m_children.size() == 2 && index_scan == null);
for (AbstractPlanNode child : m_children) {
child.resolveColumnIndexes();
}
final NodeSchema outer_schema = m_children.get(0).getOutputSchema();
final NodeSchema inner_schema = m_children.get(1).getOutputSchema();
final int outerSize = outer_schema.size();
final int innerSize = inner_schema.size();
// resolve predicates
resolvePredicate(m_preJoinPredicate, outer_schema, inner_schema);
resolvePredicate(m_joinPredicate, outer_schema, inner_schema);
resolvePredicate(m_wherePredicate, outer_schema, inner_schema);
// Resolve subquery expression indexes
resolveSubqueryColumnIndexes();
// Resolve TVE indexes for each schema column.
for (int i = 0; i < m_outputSchemaPreInlineAgg.size(); ++i) {
SchemaColumn col = m_outputSchemaPreInlineAgg.getColumns().get(i);
// These will all be TVEs.
assert(col.getExpression() instanceof TupleValueExpression);
TupleValueExpression tve = (TupleValueExpression)col.getExpression();
int index;
if (i < outerSize) {
index = tve.setColumnIndexUsingSchema(outer_schema);
}
else {
index = tve.setColumnIndexUsingSchema(inner_schema);
index += outerSize;
}
if (index == -1) {
throw new RuntimeException("Unable to find index for column: " +
col.toString());
}
tve.setColumnIndex(index);
tve.setDifferentiator(index);
}
// We want the output columns to be ordered like [outer table columns][inner table columns],
// and further ordered by TVE index within the left- and righthand sides.
// generateOutputSchema already places outer columns on the left and inner on the right,
// so we just need to order the left- and righthand sides by TVE index separately.
m_outputSchemaPreInlineAgg.sortByTveIndex(0, outer_schema.size());
m_outputSchemaPreInlineAgg.sortByTveIndex(outer_schema.size(), m_outputSchemaPreInlineAgg.size());
m_hasSignificantOutputSchema = true;
resolveRealOutputSchema();
}
protected void resolveRealOutputSchema() {
AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(this);
if (aggNode != null) {
aggNode.resolveColumnIndexesUsingSchema(m_outputSchemaPreInlineAgg);
m_outputSchema = aggNode.getOutputSchema().clone();
} else {
m_outputSchema = m_outputSchemaPreInlineAgg;
}
}
public SortDirectionType getSortDirection() {
return m_sortDirection;
}
@Override
public boolean isOutputOrdered (List<AbstractExpression> sortExpressions, List<SortDirectionType> sortDirections) {
AbstractPlanNode outerTable = m_children.get(0);
AbstractPlanNode aggrNode = AggregatePlanNode.getInlineAggregationNode(this);
if (aggrNode != null && aggrNode.getPlanNodeType() == PlanNodeType.HASHAGGREGATE) {
return false;
}
// Not yet handling ORDER BY expressions based on more than just the left-most table
if (outerTable.getPlanNodeType() == PlanNodeType.INDEXSCAN || outerTable instanceof AbstractJoinPlanNode) {
return outerTable.isOutputOrdered(sortExpressions, sortDirections);
}
return false;
}
// TODO: need to extend the sort direction for join from one table to the other table if possible
// right now, only consider the sort direction on the outer table
public void resolveSortDirection() {
AbstractPlanNode outerTable = m_children.get(0);
if (m_joinType == JoinType.FULL) {
// Disable the usual optimizations for ordering join output by
// outer table only. In case of FULL join, the unmatched inner table tuples
// get appended to the end of the join's output table thus invalidating
// the outer table join order.
m_sortDirection = SortDirectionType.INVALID;
return;
}
if (outerTable instanceof IndexSortablePlanNode) {
m_sortDirection = ((IndexSortablePlanNode)outerTable).indexUse().getSortOrderFromIndexScan();
}
}
@Override
public void toJSONString(JSONStringer stringer) throws JSONException {
super.toJSONString(stringer);
stringer.keySymbolValuePair(Members.JOIN_TYPE.name(), m_joinType.toString());
stringer.key(Members.PRE_JOIN_PREDICATE.name()).value(m_preJoinPredicate);
stringer.key(Members.JOIN_PREDICATE.name()).value(m_joinPredicate);
stringer.key(Members.WHERE_PREDICATE.name()).value(m_wherePredicate);
if (m_outputSchemaPreInlineAgg != m_outputSchema) {
stringer.key(Members.OUTPUT_SCHEMA_PRE_AGG.name());
stringer.array();
for (SchemaColumn column : m_outputSchemaPreInlineAgg.getColumns()) {
column.toJSONString(stringer, true);
}
stringer.endArray();
}
}
@Override
public void loadFromJSONObject(JSONObject jobj, Database db)
throws JSONException {
helpLoadFromJSONObject(jobj, db);
m_joinType = JoinType.get( jobj.getString( Members.JOIN_TYPE.name() ) );
m_preJoinPredicate = AbstractExpression.fromJSONChild(jobj, Members.PRE_JOIN_PREDICATE.name());
m_joinPredicate = AbstractExpression.fromJSONChild(jobj, Members.JOIN_PREDICATE.name());
m_wherePredicate = AbstractExpression.fromJSONChild(jobj, Members.WHERE_PREDICATE.name());
if ( !jobj.isNull( Members.OUTPUT_SCHEMA_PRE_AGG.name() ) ) {
m_hasSignificantOutputSchema = true;
m_outputSchemaPreInlineAgg = loadSchemaFromJSONObject(jobj,
Members.OUTPUT_SCHEMA_PRE_AGG.name());
}
else {
m_outputSchemaPreInlineAgg = m_outputSchema;
}
}
/**
*
* @param expression
* @param outer_schema
* @param inner_schema
*/
protected static void resolvePredicate(AbstractExpression expression,
NodeSchema outer_schema, NodeSchema inner_schema) {
List<TupleValueExpression> predicate_tves =
ExpressionUtil.getTupleValueExpressions(expression);
for (TupleValueExpression tve : predicate_tves) {
int index = tve.setColumnIndexUsingSchema(outer_schema);
int tableIdx = 0; // 0 for outer table
if (index == -1) {
index = tve.setColumnIndexUsingSchema(inner_schema);
if (index == -1) {
throw new RuntimeException(
"Unable to resolve column index for join TVE: " +
tve.toString());
}
tableIdx = 1; // 1 for inner table
}
tve.setTableIndex(tableIdx);
}
}
protected static void resolvePredicate(List<AbstractExpression> expressions,
NodeSchema outer_schema, NodeSchema inner_schema) {
for (AbstractExpression expr : expressions) {
resolvePredicate(expr, outer_schema, inner_schema);
}
}
protected String explainFilters(String indent) {
String result = "";
String prefix = "\n" + indent + " filter by ";
AbstractExpression[] predicates = { m_preJoinPredicate, m_joinPredicate, m_wherePredicate };
for (AbstractExpression pred : predicates) {
if (pred != null) {
result += prefix + pred.explain("!?"); // No default table name prefix for columns.
prefix = " AND ";
}
}
return result;
}
@Override
public void findAllExpressionsOfClass(Class< ? extends AbstractExpression> aeClass, Set<AbstractExpression> collected) {
super.findAllExpressionsOfClass(aeClass, collected);
if (m_preJoinPredicate != null) {
collected.addAll(m_preJoinPredicate.findAllSubexpressionsOfClass(aeClass));
}
if (m_joinPredicate != null) {
collected.addAll(m_joinPredicate.findAllSubexpressionsOfClass(aeClass));
}
if (m_wherePredicate != null) {
collected.addAll(m_wherePredicate.findAllSubexpressionsOfClass(aeClass));
}
}
/**
* Discount join node child estimates based on the number of its filters
*
* @param childNode
* @return discounted estimates
*/
protected long discountEstimatedProcessedTupleCount(AbstractPlanNode childNode) {
// Discount estimated processed tuple count for the outer child based on the number of
// filter expressions this child has with a rapidly diminishing effect
// that ranges from a discount of 0.09 (ORETATION_EQAUL)
// or 0.045 (all other expression types) for one post filter to a max discount approaching
// 0.888... (=8/9) for many EQUALITY filters.
// The discount value is less than the partial index discount (0.1) to make sure
// the index wins
AbstractExpression predicate = null;
if (childNode instanceof AbstractScanPlanNode) {
predicate = ((AbstractScanPlanNode) childNode).getPredicate();
} else if (childNode instanceof NestLoopPlanNode) {
predicate = ((NestLoopPlanNode) childNode).getWherePredicate();
} else if (childNode instanceof NestLoopIndexPlanNode) {
AbstractPlanNode inlineIndexScan = ((NestLoopIndexPlanNode) childNode).getInlinePlanNode(PlanNodeType.INDEXSCAN);
assert(inlineIndexScan != null);
predicate = ((AbstractScanPlanNode) inlineIndexScan).getPredicate();
} else {
return childNode.getEstimatedProcessedTupleCount();
}
if (predicate == null) {
return childNode.getEstimatedProcessedTupleCount();
}
List<AbstractExpression> predicateExprs = ExpressionUtil.uncombinePredicate(predicate);
// Counters to count the number of equality and all other expressions
int eqCount = 0;
int otherCount = 0;
final double MAX_EQ_POST_FILTER_DISCOUNT = 0.09;
final double MAX_OTHER_POST_FILTER_DISCOUNT = 0.045;
double discountCountFactor = 1.0;
// Discount tuple count.
for (AbstractExpression predicateExpr: predicateExprs) {
if (ExpressionType.COMPARE_EQUAL == predicateExpr.getExpressionType()) {
discountCountFactor -= Math.pow(MAX_EQ_POST_FILTER_DISCOUNT, ++eqCount);
} else {
discountCountFactor -= Math.pow(MAX_OTHER_POST_FILTER_DISCOUNT, ++otherCount);
}
}
return (long) (childNode.getEstimatedProcessedTupleCount() * discountCountFactor);
}
/**
* When a project node is added to the top of the plan, we need to adjust
* the differentiator field of TVEs to reflect differences in the scan
* schema vs the storage schema of a table, so that fields with duplicate
* names produced by expanding "SELECT *" can resolve correctly.
*
* We recurse until we find either a join node or a scan node.
*
* Resolution of columns produced by "SELECT *" is not a problem for
* joins because there is always a sequential scan at the top of plans
* that have this problem, so just use the tve's coluymn index as its
* differentiator here.
*
* @param tve
*/
@Override
public void adjustDifferentiatorField(TupleValueExpression tve) {
tve.setDifferentiator(tve.getColumnIndex());
}
@Override
public IndexUseForOrderBy indexUse() {
return m_indexUse;
}
@Override
public AbstractPlanNode planNode() {
return this;
}
}