/* This file is part of VoltDB. * Copyright (C) 2008-2017 VoltDB Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with VoltDB. If not, see <http://www.gnu.org/licenses/>. */ package org.voltdb.plannodes; import java.util.ArrayList; import java.util.Collection; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import org.json_voltpatches.JSONException; import org.json_voltpatches.JSONObject; import org.json_voltpatches.JSONStringer; import org.voltdb.catalog.CatalogMap; import org.voltdb.catalog.Column; import org.voltdb.catalog.Database; import org.voltdb.expressions.AbstractExpression; import org.voltdb.expressions.AbstractSubqueryExpression; import org.voltdb.expressions.ConstantValueExpression; import org.voltdb.expressions.ExpressionUtil; import org.voltdb.expressions.TupleValueExpression; import org.voltdb.planner.parseinfo.StmtSubqueryScan; import org.voltdb.planner.parseinfo.StmtTableScan; import org.voltdb.planner.parseinfo.StmtTargetTableScan; import org.voltdb.types.PlanNodeType; import org.voltdb.utils.CatalogUtil; public abstract class AbstractScanPlanNode extends AbstractPlanNode { public enum Members { PREDICATE, TARGET_TABLE_NAME, TARGET_TABLE_ALIAS, SUBQUERY_INDICATOR, PREDICATE_FALSE; } // Store the columns from the table as an internal NodeSchema // for consistency of interface protected NodeSchema m_tableSchema = null; private NodeSchema m_preAggOutputSchema; // Store the columns we use from this table as an internal schema protected NodeSchema m_tableScanSchema = new NodeSchema(); protected Map<Integer, Integer> m_differentiatorMap = new HashMap<>(); protected AbstractExpression m_predicate; // The target table is the table that the plannode wants to perform some operation on. protected String m_targetTableName = ""; protected String m_targetTableAlias = null; // Flag marking the sub-query plan protected boolean m_isSubQuery = false; protected StmtTableScan m_tableScan = null; protected AbstractScanPlanNode() { super(); } protected AbstractScanPlanNode(String tableName, String tableAlias) { super(); m_targetTableName = tableName; m_targetTableAlias = tableAlias; } @Override public void getTablesAndIndexes(Map<String, StmtTargetTableScan> tablesRead, Collection<String> indexes) { if (m_tableScan != null) { if (m_tableScan instanceof StmtTargetTableScan) { tablesRead.put(m_targetTableName, (StmtTargetTableScan)m_tableScan); getTablesAndIndexesFromSubqueries(tablesRead, indexes); } else { assert(m_tableScan instanceof StmtSubqueryScan); getChild(0).getTablesAndIndexes(tablesRead, indexes); } } } @Override public void validate() throws Exception { super.validate(); // // TargetTableId // if (m_targetTableName == null) { throw new Exception("ERROR: TargetTableName is null for PlanNode '" + toString() + "'"); } if (m_targetTableAlias == null) { throw new Exception("ERROR: TargetTableAlias is null for PlanNode '" + toString() + "'"); } // // Filter Expression // It is allowed to be null, but we need to check that it's valid // if (m_predicate != null) { m_predicate.validate(); } // All the schema columns better reference this table for (SchemaColumn col : m_tableScanSchema.getColumns()) { if (!m_targetTableName.equals(col.getTableName())) { throw new Exception("ERROR: The scan column: " + col.getColumnName() + " in table: " + m_targetTableName + " refers to " + " table: " + col.getTableName()); } } } /** * @return the target_table_name */ public String getTargetTableName() { assert(m_targetTableName != null); return m_targetTableName; } /** * @param name */ public void setTargetTableName(String name) { assert(m_isSubQuery || name != null); m_targetTableName = name; } /** * @return the target_table_alias */ public String getTargetTableAlias() { assert(m_targetTableAlias != null); return m_targetTableAlias; } /** * @param alias */ public void setTargetTableAlias(String alias) { assert(alias != null); m_targetTableAlias = alias; } public void setTableScan(StmtTableScan tableScan) { m_tableScan = tableScan; setSubQuery(tableScan instanceof StmtSubqueryScan); setTargetTableAlias(tableScan.getTableAlias()); setTargetTableName(tableScan.getTableName()); List<SchemaColumn> scanColumns = tableScan.getScanColumns(); if (scanColumns != null && ! scanColumns.isEmpty()) { setScanColumns(scanColumns); } } public StmtTableScan getTableScan() { return m_tableScan; } /** * @return the predicate */ public AbstractExpression getPredicate() { return m_predicate; } /** * @param exps the predicates to clone and combine into one predicate */ @SafeVarargs public final void setPredicate(Collection<AbstractExpression>... colExps) { assert(colExps != null); // PlanNodes all need private deep copies of expressions // so that the resolveColumnIndexes results // don't get bashed by other nodes or subsequent planner runs m_predicate = ExpressionUtil.cloneAndCombinePredicates(colExps); } protected void setScanColumns(List<SchemaColumn> scanColumns) { assert(scanColumns != null); int i = 0; for (SchemaColumn col : scanColumns) { TupleValueExpression tve = (TupleValueExpression)col.getExpression(); int difftor = tve.getDifferentiator(); m_differentiatorMap.put(difftor, i); SchemaColumn clonedCol = col.clone(); clonedCol.setDifferentiator(i); m_tableScanSchema.addColumn(clonedCol); ++i; } } /** * When a project node is added to the top of the plan, we need to adjust * the differentiator field of TVEs to reflect differences in the scan * schema vs the storage schema of a table, so that fields with duplicate names * produced by expanding "SELECT *" can resolve correctly. * * We recurse until we find either a join node or a scan node. * * For scan nodes, we need to reflect the difference between the * storage order of columns produced by a subquery, and the columns * that are actually projected (via an inlined project) from the scan, * since unused columns are typically omitted from the output schema * of the scan. * * @param tve */ @Override public void adjustDifferentiatorField(TupleValueExpression tve) { int storageIndex = tve.getColumnIndex(); Integer scanIndex = m_differentiatorMap.get(storageIndex); assert(scanIndex != null); tve.setDifferentiator(storageIndex); } NodeSchema getTableSchema() { return m_tableSchema; } /** * Set the sub-query flag * @param isSubQuery */ public void setSubQuery(boolean isSubQuery) { m_isSubQuery = isSubQuery; } /** * Accessor to return the sub-query flag * @return m_isSubQuery */ @Override public boolean isSubQuery() { return m_isSubQuery; } @Override public void generateOutputSchema(Database db) { // fill in the table schema if we haven't already if (m_tableSchema == null) { initTableSchema(db); } initPreAggOutputSchema(); // Generate the output schema for subqueries Collection<AbstractExpression> exprs = findAllSubquerySubexpressions(); for (AbstractExpression expr: exprs) { ((AbstractSubqueryExpression) expr).generateOutputSchema(db); } AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(this); if (aggNode != null) { // generate its subquery output schema aggNode.generateOutputSchema(db); m_outputSchema = aggNode.getOutputSchema().copyAndReplaceWithTVE(); m_hasSignificantOutputSchema = true; } } // Until the scan has an implicit projection rather than an explicitly // inlined one, the output schema generation is going to be a bit odd. // It will depend on two bits of state: whether any scan columns were // specified for this table and whether or not there is an inlined // projection. // // If there is an inlined projection, then we'll just steal that // output schema as our own. // If there is no inlined projection, then, if there are no scan columns // specified, use the entire table's schema as the output schema. // Otherwise add an inline projection that projects the scan columns // and then take that output schema as our own. // These have the effect of repeatably generating the correct output // schema if called again and again, but also allowing the planner // to overwrite the inline projection and still have the right thing // happen. // // Note that when an index scan is inlined into a join node (as with // nested loop index joins), then there will be a project node inlined into // the index scan node that determines which columns from the inner table // are used as an output of the join, but that predicates evaluated against // this table should use the complete schema of the table being scanned. // See also the comments in NestLoopIndexPlanNode.resolveColumnIndexes. // Related tickets: ENG-9389, ENG-9533. private void initPreAggOutputSchema() { ProjectionPlanNode proj = (ProjectionPlanNode)getInlinePlanNode(PlanNodeType.PROJECTION); if (proj != null) { // Does this operation needs to change complex expressions // into tuple value expressions with an column alias? // Is this always true for clone? Or do we need a new method? m_outputSchema = proj.getOutputSchema().copyAndReplaceWithTVE(); // It's just a cheap knock-off of the projection's m_hasSignificantOutputSchema = false; } else if (m_tableScanSchema.size() != 0) { // Order the scan columns according to the table schema // before we stick them in the projection output int difftor = 0; for (SchemaColumn col : m_tableScanSchema.getColumns()) { col.setDifferentiator(difftor); ++difftor; AbstractExpression colExpr = col.getExpression(); assert(colExpr instanceof TupleValueExpression); TupleValueExpression tve = (TupleValueExpression) colExpr; tve.setColumnIndexUsingSchema(m_tableSchema); } // and update their indexes against the table schema m_tableScanSchema.sortByTveIndex(); // Create inline projection to map table outputs to scan outputs ProjectionPlanNode projectionNode = new ProjectionPlanNode(m_tableScanSchema); addInlinePlanNode(projectionNode); // a bit redundant but logically consistent m_outputSchema = projectionNode.getOutputSchema().copyAndReplaceWithTVE(); m_hasSignificantOutputSchema = false; // It's just a cheap knock-off of the projection's } else { // We come here if m_tableScanSchema is empty. // // m_tableScanSchema might be empty for cases like // select now from table; // where there are no columns in the table that are accessed. // // Just fill m_outputSchema with the table's columns. m_outputSchema = m_tableSchema.clone(); m_hasSignificantOutputSchema = true; } m_preAggOutputSchema = m_outputSchema; } private void initTableSchema(Database db) { if (isSubQuery()) { assert(m_children.size() == 1); AbstractPlanNode childNode = m_children.get(0); childNode.generateOutputSchema(db); m_tableSchema = childNode.getOutputSchema(); // step to transfer derived table schema to upper level m_tableSchema = m_tableSchema.replaceTableClone(getTargetTableAlias()); } else { m_tableSchema = new NodeSchema(); CatalogMap<Column> cols = db.getTables().getExact(m_targetTableName).getColumns(); // you don't strictly need to sort this, // but it makes diff-ing easier List<Column> sortedCols = CatalogUtil.getSortedCatalogItems(cols, "index"); for (Column col : sortedCols) { // must produce a tuple value expression for this column. TupleValueExpression tve = new TupleValueExpression( m_targetTableName, m_targetTableAlias, col, col.getIndex()); m_tableSchema.addColumn(m_targetTableName, m_targetTableAlias, col.getTypeName(), col.getTypeName(), tve, col.getIndex()); } } } @Override public void resolveColumnIndexes() { // The following applies to both seq and index scan. Index scan has // some additional expressions that need to be handled as well // predicate expression List<TupleValueExpression> predicate_tves = ExpressionUtil.getTupleValueExpressions(m_predicate); for (TupleValueExpression tve : predicate_tves) { tve.setColumnIndexUsingSchema(m_tableSchema); } // inline projection ProjectionPlanNode proj = (ProjectionPlanNode)getInlinePlanNode(PlanNodeType.PROJECTION); if (proj != null) { proj.resolveColumnIndexesUsingSchema(m_tableSchema); m_outputSchema = proj.getOutputSchema().clone(); } else { m_outputSchema = m_preAggOutputSchema; // With no inline projection to define the output columns, // iterate through the output schema TVEs // and sort them by table schema index order. for (SchemaColumn col : m_outputSchema.getColumns()) { AbstractExpression colExpr = col.getExpression(); // At this point, they'd better all be TVEs. assert(colExpr instanceof TupleValueExpression); TupleValueExpression tve = (TupleValueExpression) colExpr; tve.setColumnIndexUsingSchema(m_tableSchema); } m_outputSchema.sortByTveIndex(); } // The outputschema of an inline limit node is completely irrelevant to the EE except that // serialization will complain if it contains expressions of unresolved columns. // Logically, the limited scan output has the same schema as the pre-limit scan. // It's at least as easy to just re-use the known-good output schema of the scan // than it would be to carefully resolve the limit node's current output schema. // And this simply works regardless of whether the limit was originally applied or inlined // before or after the (possibly inline) projection. // There's no need to be concerned about re-adjusting the irrelevant outputschema // based on the different schema of the original raw scan and the projection. LimitPlanNode limit = (LimitPlanNode)getInlinePlanNode(PlanNodeType.LIMIT); if (limit != null) { limit.m_outputSchema = m_outputSchema.clone(); limit.m_hasSignificantOutputSchema = false; // It's just another cheap knock-off } // Resolve subquery expression indexes resolveSubqueryColumnIndexes(); AggregatePlanNode aggNode = AggregatePlanNode.getInlineAggregationNode(this); if (aggNode != null) { aggNode.resolveColumnIndexesUsingSchema(m_outputSchema); m_outputSchema = aggNode.getOutputSchema().copyAndReplaceWithTVE(); // Aggregate plan node change its output schema, and // EE does not have special code to get output schema from inlined aggregate node. m_hasSignificantOutputSchema = true; } } @Override public void toJSONString(JSONStringer stringer) throws JSONException { super.toJSONString(stringer); if (m_predicate != null) { if (ConstantValueExpression.isBooleanFalse(m_predicate)) { stringer.keySymbolValuePair(Members.PREDICATE_FALSE.name(), "TRUE"); } stringer.key(Members.PREDICATE.name()); stringer.value(m_predicate); } stringer.keySymbolValuePair(Members.TARGET_TABLE_NAME.name(), m_targetTableName); stringer.keySymbolValuePair(Members.TARGET_TABLE_ALIAS.name(), m_targetTableAlias); if (m_isSubQuery) { stringer.keySymbolValuePair(Members.SUBQUERY_INDICATOR.name(), "TRUE"); } } @Override public void loadFromJSONObject( JSONObject jobj, Database db ) throws JSONException { helpLoadFromJSONObject(jobj, db); m_predicate = AbstractExpression.fromJSONChild(jobj, Members.PREDICATE.name(), m_tableScan); m_targetTableName = jobj.getString( Members.TARGET_TABLE_NAME.name() ); m_targetTableAlias = jobj.getString( Members.TARGET_TABLE_ALIAS.name() ); if (jobj.has("SUBQUERY_INDICATOR")) { m_isSubQuery = "TRUE".equals(jobj.getString( Members.SUBQUERY_INDICATOR.name() )); } } @Override public void getScanNodeList_recurse(ArrayList<AbstractScanPlanNode> collected, HashSet<AbstractPlanNode> visited) { if (visited.contains(this)) { assert(false): "do not expect loops in plangraph."; return; } visited.add(this); collected.add(this); } protected String explainPredicate(String prefix) { if (m_predicate != null) { return prefix + m_predicate.explain(getTableNameForExplain()); } return ""; } protected String getTableNameForExplain() { return (m_targetTableAlias != null) ? m_targetTableAlias : m_targetTableName; } @Override public void findAllExpressionsOfClass(Class< ? extends AbstractExpression> aeClass, Set<AbstractExpression> collected) { super.findAllExpressionsOfClass(aeClass, collected); if (m_predicate != null) { collected.addAll(m_predicate.findAllSubexpressionsOfClass(aeClass)); } } protected void copyDifferentiatorMap( Map<Integer, Integer> diffMap) { m_differentiatorMap = new HashMap<>(diffMap); } }