AbstractJoinPlanNode.java example

Explorer
voltdb-master
/* This file is part of VoltDB.
 * Copyright (C) 2008-2017 VoltDB Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with VoltDB.  If not, see <http://www.gnu.org/licenses/>.
 */

package org.voltdb.plannodes;

import java.util.Collection;
import java.util.List;
import java.util.Set;

import org.json_voltpatches.JSONException;
import org.json_voltpatches.JSONObject;
import org.json_voltpatches.JSONStringer;
import org.voltdb.catalog.Database;
import org.voltdb.expressions.AbstractExpression;
import org.voltdb.expressions.AbstractSubqueryExpression;
import org.voltdb.expressions.ExpressionUtil;
import org.voltdb.expressions.TupleValueExpression;
import org.voltdb.types.ExpressionType;
import org.voltdb.types.JoinType;
import org.voltdb.types.PlanNodeType;
import org.voltdb.types.SortDirectionType;

public abstract class AbstractJoinPlanNode extends AbstractPlanNode implements IndexSortablePlanNode {

    public enum Members {
        SORT_DIRECTION,
        JOIN_TYPE,
        PRE_JOIN_PREDICATE,
        JOIN_PREDICATE,
        WHERE_PREDICATE,
        OUTPUT_SCHEMA_PRE_AGG;
    }

    protected JoinType m_joinType = JoinType.INNER;
    // sortDirection is only used in handleOrderBy(),
    // and the sortDirection used in EE is from inlined IndexScan node for NLIJ
    protected SortDirectionType m_sortDirection = SortDirectionType.INVALID;
    protected AbstractExpression m_preJoinPredicate = null;
    protected AbstractExpression m_joinPredicate = null;
    protected AbstractExpression m_wherePredicate = null;

    protected NodeSchema m_outputSchemaPreInlineAgg = null;
    private final IndexUseForOrderBy m_indexUse = new IndexUseForOrderBy();

    protected AbstractJoinPlanNode() {
        super();
    }

    @Override
    public void validate() throws Exception {
        super.validate();

        if (m_preJoinPredicate != null) {
            m_preJoinPredicate.validate();
        }
        if (m_joinPredicate != null) {
            m_joinPredicate.validate();
        }
        if (m_wherePredicate != null) {
            m_wherePredicate.validate();
        }
    }

    /**
     * @return the join_type
     */
    public JoinType getJoinType() {
        return m_joinType;
    }

    /**
     * @param join_type the join_type to set
     */
    public void setJoinType(JoinType join_type) {
        m_joinType = join_type;
    }

    /**
     * @return the  pre join predicate
     */
    public AbstractExpression getPreJoinPredicate() {
        return m_preJoinPredicate;
    }

    /**
     * @return the  join predicate
     */
    public AbstractExpression getJoinPredicate() {
        return m_joinPredicate;
    }

    /**
     * @return the  where predicate
     */
    public AbstractExpression getWherePredicate() {
        return m_wherePredicate;
    }

    /**
     * @param predicate the where predicate to set
     */
    public void setWherePredicate(AbstractExpression predicate)
    {
        if (predicate != null) {
            m_wherePredicate = predicate.clone();
        } else {
            m_wherePredicate = null;
        }
    }

    /**
     * @param predicate the join predicate to set
     */
    public void setPreJoinPredicate(AbstractExpression predicate)
    {
        if (predicate != null) {
            m_preJoinPredicate = predicate.clone();
        } else {
            m_preJoinPredicate = null;
        }
    }

    /**
     * @param predicate the join predicate to set
     */
    public void setJoinPredicate(AbstractExpression predicate)
    {
        if (predicate != null) {
            m_joinPredicate = predicate.clone();
        } else {
            m_joinPredicate = null;
        }
    }

    @Override
    public void generateOutputSchema(Database db)
    {
        // FUTURE: At some point it would be awesome to further
        // cull the columns out of the join to remove columns that were only
        // used by scans/joins.  I think we can coerce HSQL into provide this
        // info relatively easily. --izzy

        // Index join will have to override this method.
        // Assert and provide functionality for generic join
        assert(m_children.size() == 2);
        for (AbstractPlanNode child : m_children) {
            child.generateOutputSchema(db);
        }

        // Generate the output schema for subqueries
        Collection<AbstractExpression> subqueryExpressions = findAllSubquerySubexpressions();
        for (AbstractExpression expr : subqueryExpressions) {
            ((AbstractSubqueryExpression) expr).generateOutputSchema(db);
        }

        // Join the schema together to form the output schema
        m_outputSchemaPreInlineAgg =
            m_children.get(0).getOutputSchema().
            join(m_children.get(1).getOutputSchema()).copyAndReplaceWithTVE();
        m_hasSignificantOutputSchema = true;

        generateRealOutputSchema(db);
    }

    protected void generateRealOutputSchema(Database db) {
        AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(this);
        if (aggNode != null) {
            // generate its subquery output schema
            aggNode.generateOutputSchema(db);

            m_outputSchema = aggNode.getOutputSchema().copyAndReplaceWithTVE();
        } else {
            m_outputSchema = m_outputSchemaPreInlineAgg;
        }
    }

    // Given any non-inlined type of join, this method will resolve the column
    // order and TVE indexes for the output SchemaColumns.
    @Override
    public void resolveColumnIndexes()
    {
        // First, assert that our topology is sane and then
        // recursively resolve all child/inline column indexes
        IndexScanPlanNode index_scan =
            (IndexScanPlanNode) getInlinePlanNode(PlanNodeType.INDEXSCAN);
        assert(m_children.size() == 2 && index_scan == null);
        for (AbstractPlanNode child : m_children) {
            child.resolveColumnIndexes();
        }

        final NodeSchema outer_schema = m_children.get(0).getOutputSchema();
        final NodeSchema inner_schema = m_children.get(1).getOutputSchema();
        final int outerSize = outer_schema.size();
        final int innerSize = inner_schema.size();

        // resolve predicates
        resolvePredicate(m_preJoinPredicate, outer_schema, inner_schema);
        resolvePredicate(m_joinPredicate, outer_schema, inner_schema);
        resolvePredicate(m_wherePredicate, outer_schema, inner_schema);

        // Resolve subquery expression indexes
        resolveSubqueryColumnIndexes();

        // Resolve TVE indexes for each schema column.
        for (int i = 0; i < m_outputSchemaPreInlineAgg.size(); ++i) {
            SchemaColumn col = m_outputSchemaPreInlineAgg.getColumns().get(i);

            // These will all be TVEs.
            assert(col.getExpression() instanceof TupleValueExpression);
            TupleValueExpression tve = (TupleValueExpression)col.getExpression();
            int index;
            if (i < outerSize) {
                index = tve.setColumnIndexUsingSchema(outer_schema);
            }
            else {
                index = tve.setColumnIndexUsingSchema(inner_schema);
                index += outerSize;
            }

            if (index == -1) {
                throw new RuntimeException("Unable to find index for column: " +
                                               col.toString());
            }

            tve.setColumnIndex(index);
            tve.setDifferentiator(index);
        }

        // We want the output columns to be ordered like [outer table columns][inner table columns],
        // and further ordered by TVE index within the left- and righthand sides.
        // generateOutputSchema already places outer columns on the left and inner on the right,
        // so we just need to order the left- and righthand sides by TVE index separately.
        m_outputSchemaPreInlineAgg.sortByTveIndex(0, outer_schema.size());
        m_outputSchemaPreInlineAgg.sortByTveIndex(outer_schema.size(), m_outputSchemaPreInlineAgg.size());
        m_hasSignificantOutputSchema = true;

        resolveRealOutputSchema();
    }

    protected void resolveRealOutputSchema() {
        AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(this);
        if (aggNode != null) {
            aggNode.resolveColumnIndexesUsingSchema(m_outputSchemaPreInlineAgg);
            m_outputSchema = aggNode.getOutputSchema().clone();
        } else {
            m_outputSchema = m_outputSchemaPreInlineAgg;
        }
    }

    public SortDirectionType getSortDirection() {
        return m_sortDirection;
    }

    @Override
    public boolean isOutputOrdered (List<AbstractExpression> sortExpressions, List<SortDirectionType> sortDirections) {
        AbstractPlanNode outerTable = m_children.get(0);
        AbstractPlanNode aggrNode = AggregatePlanNode.getInlineAggregationNode(this);
        if (aggrNode != null && aggrNode.getPlanNodeType() == PlanNodeType.HASHAGGREGATE) {
            return false;
        }
        // Not yet handling ORDER BY expressions based on more than just the left-most table
        if (outerTable.getPlanNodeType() == PlanNodeType.INDEXSCAN || outerTable instanceof AbstractJoinPlanNode) {
            return outerTable.isOutputOrdered(sortExpressions, sortDirections);
        }
        return false;
    }

    // TODO: need to extend the sort direction for join from one table to the other table if possible
    // right now, only consider the sort direction on the outer table
    public void resolveSortDirection() {
        AbstractPlanNode outerTable = m_children.get(0);
        if (m_joinType == JoinType.FULL) {
            // Disable the usual optimizations for ordering join output by
            // outer table only. In case of FULL join, the unmatched inner table tuples
            // get appended to the end of the join's output table thus invalidating
            // the outer table join order.
            m_sortDirection = SortDirectionType.INVALID;
            return;
        }
        if (outerTable instanceof IndexSortablePlanNode) {
            m_sortDirection = ((IndexSortablePlanNode)outerTable).indexUse().getSortOrderFromIndexScan();
        }
    }

    @Override
    public void toJSONString(JSONStringer stringer) throws JSONException {
        super.toJSONString(stringer);
        stringer.keySymbolValuePair(Members.JOIN_TYPE.name(), m_joinType.toString());
        stringer.key(Members.PRE_JOIN_PREDICATE.name()).value(m_preJoinPredicate);
        stringer.key(Members.JOIN_PREDICATE.name()).value(m_joinPredicate);
        stringer.key(Members.WHERE_PREDICATE.name()).value(m_wherePredicate);

        if (m_outputSchemaPreInlineAgg != m_outputSchema) {
            stringer.key(Members.OUTPUT_SCHEMA_PRE_AGG.name());
            stringer.array();
            for (SchemaColumn column : m_outputSchemaPreInlineAgg.getColumns()) {
                column.toJSONString(stringer, true);
            }
            stringer.endArray();
        }
    }

    @Override
    public void loadFromJSONObject(JSONObject jobj, Database db)
            throws JSONException {
        helpLoadFromJSONObject(jobj, db);
        m_joinType = JoinType.get( jobj.getString( Members.JOIN_TYPE.name() ) );
        m_preJoinPredicate = AbstractExpression.fromJSONChild(jobj, Members.PRE_JOIN_PREDICATE.name());
        m_joinPredicate = AbstractExpression.fromJSONChild(jobj, Members.JOIN_PREDICATE.name());
        m_wherePredicate = AbstractExpression.fromJSONChild(jobj, Members.WHERE_PREDICATE.name());

        if ( !jobj.isNull( Members.OUTPUT_SCHEMA_PRE_AGG.name() ) ) {
            m_hasSignificantOutputSchema = true;
            m_outputSchemaPreInlineAgg = loadSchemaFromJSONObject(jobj,
                    Members.OUTPUT_SCHEMA_PRE_AGG.name());
        }
        else {
            m_outputSchemaPreInlineAgg = m_outputSchema;
        }
    }


    /**
     *
     * @param expression
     * @param outer_schema
     * @param inner_schema
     */
    protected static void resolvePredicate(AbstractExpression expression,
            NodeSchema outer_schema, NodeSchema inner_schema) {
        List<TupleValueExpression> predicate_tves =
                ExpressionUtil.getTupleValueExpressions(expression);
        for (TupleValueExpression tve : predicate_tves) {
            int index = tve.setColumnIndexUsingSchema(outer_schema);
            int tableIdx = 0;   // 0 for outer table
            if (index == -1) {
                index = tve.setColumnIndexUsingSchema(inner_schema);
                if (index == -1) {
                    throw new RuntimeException(
                            "Unable to resolve column index for join TVE: " +
                            tve.toString());
                }
                tableIdx = 1;   // 1 for inner table
            }
            tve.setTableIndex(tableIdx);
        }
    }

    protected static void resolvePredicate(List<AbstractExpression> expressions,
            NodeSchema outer_schema, NodeSchema inner_schema) {
        for (AbstractExpression expr : expressions) {
            resolvePredicate(expr, outer_schema, inner_schema);
        }
    }

    protected String explainFilters(String indent) {
        String result = "";
        String prefix = "\n" + indent + " filter by ";
        AbstractExpression[] predicates = { m_preJoinPredicate, m_joinPredicate, m_wherePredicate };
        for (AbstractExpression pred : predicates) {
            if (pred != null) {
                result += prefix + pred.explain("!?"); // No default table name prefix for columns.
                prefix = " AND ";
            }
        }
        return result;
    }

    @Override
    public void findAllExpressionsOfClass(Class< ? extends AbstractExpression> aeClass, Set<AbstractExpression> collected) {
        super.findAllExpressionsOfClass(aeClass, collected);
        if (m_preJoinPredicate != null) {
            collected.addAll(m_preJoinPredicate.findAllSubexpressionsOfClass(aeClass));
        }
        if (m_joinPredicate != null) {
            collected.addAll(m_joinPredicate.findAllSubexpressionsOfClass(aeClass));
        }
        if (m_wherePredicate != null) {
            collected.addAll(m_wherePredicate.findAllSubexpressionsOfClass(aeClass));
        }
    }

    /**
     * Discount join node child estimates based on the number of its filters
     *
     * @param childNode
     * @return discounted estimates
     */
    protected long discountEstimatedProcessedTupleCount(AbstractPlanNode childNode) {
        // Discount estimated processed tuple count for the outer child based on the number of
        // filter expressions this child has with a rapidly diminishing effect
        // that ranges from a discount of 0.09 (ORETATION_EQAUL)
        // or 0.045 (all other expression types) for one post filter to a max discount approaching
        // 0.888... (=8/9) for many EQUALITY filters.
        // The discount value is less than the partial index discount (0.1) to make sure
        // the index wins
        AbstractExpression predicate = null;
        if (childNode instanceof AbstractScanPlanNode) {
            predicate = ((AbstractScanPlanNode) childNode).getPredicate();
        } else if (childNode instanceof NestLoopPlanNode) {
            predicate = ((NestLoopPlanNode) childNode).getWherePredicate();
        } else if (childNode instanceof NestLoopIndexPlanNode) {
            AbstractPlanNode inlineIndexScan = ((NestLoopIndexPlanNode) childNode).getInlinePlanNode(PlanNodeType.INDEXSCAN);
            assert(inlineIndexScan != null);
            predicate = ((AbstractScanPlanNode) inlineIndexScan).getPredicate();
        } else {
            return childNode.getEstimatedProcessedTupleCount();
        }

        if (predicate == null) {
            return childNode.getEstimatedProcessedTupleCount();
        }

        List<AbstractExpression> predicateExprs = ExpressionUtil.uncombinePredicate(predicate);
        // Counters to count the number of equality and all other expressions
        int eqCount = 0;
        int otherCount = 0;
        final double MAX_EQ_POST_FILTER_DISCOUNT = 0.09;
        final double MAX_OTHER_POST_FILTER_DISCOUNT = 0.045;
        double discountCountFactor = 1.0;
        // Discount tuple count.
        for (AbstractExpression predicateExpr: predicateExprs) {
            if (ExpressionType.COMPARE_EQUAL == predicateExpr.getExpressionType()) {
                discountCountFactor -= Math.pow(MAX_EQ_POST_FILTER_DISCOUNT, ++eqCount);
            } else {
                discountCountFactor -= Math.pow(MAX_OTHER_POST_FILTER_DISCOUNT, ++otherCount);
            }
        }
        return  (long) (childNode.getEstimatedProcessedTupleCount() * discountCountFactor);
    }

    /**
     * When a project node is added to the top of the plan, we need to adjust
     * the differentiator field of TVEs to reflect differences in the scan
     * schema vs the storage schema of a table, so that fields with duplicate
     * names produced by expanding "SELECT *" can resolve correctly.
     *
     * We recurse until we find either a join node or a scan node.
     *
     * Resolution of columns produced by "SELECT *" is not a problem for
     * joins because there is always a sequential scan at the top of plans
     * that have this problem, so just use the tve's coluymn index as its
     * differentiator here.
     *
     * @param  tve
     */
    @Override
    public void adjustDifferentiatorField(TupleValueExpression tve) {
        tve.setDifferentiator(tve.getColumnIndex());
    }

    @Override
    public IndexUseForOrderBy indexUse() {
        return m_indexUse;
    }

    @Override
    public AbstractPlanNode planNode() {
        return this;
    }

}