/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.api.sql.optimizers.name; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import net.sf.jsqlparser.expression.BinaryExpression; import net.sf.jsqlparser.expression.Expression; import net.sf.jsqlparser.expression.Parenthesis; import net.sf.jsqlparser.expression.operators.conditional.AndExpression; import net.sf.jsqlparser.expression.operators.conditional.OrExpression; import net.sf.jsqlparser.statement.select.SelectItem; import ch.epfl.data.squall.api.sql.optimizers.CompGen; import ch.epfl.data.squall.api.sql.schema.Schema; import ch.epfl.data.squall.api.sql.util.HierarchyExtractor; import ch.epfl.data.squall.api.sql.util.ParserUtil; import ch.epfl.data.squall.api.sql.util.TupleSchema; import ch.epfl.data.squall.api.sql.visitors.jsql.AndVisitor; import ch.epfl.data.squall.api.sql.visitors.jsql.SQLVisitor; import ch.epfl.data.squall.api.sql.visitors.squall.NameJoinHashVisitor; import ch.epfl.data.squall.api.sql.visitors.squall.NameSelectItemsVisitor; import ch.epfl.data.squall.api.sql.visitors.squall.NameWhereVisitor; import ch.epfl.data.squall.components.Component; import ch.epfl.data.squall.components.DataSourceComponent; import ch.epfl.data.squall.components.EquiJoinComponent; import ch.epfl.data.squall.components.OperatorComponent; import ch.epfl.data.squall.expressions.ValueExpression; import ch.epfl.data.squall.operators.AggregateOperator; import ch.epfl.data.squall.operators.ProjectOperator; import ch.epfl.data.squall.operators.SelectOperator; import ch.epfl.data.squall.query_plans.QueryBuilder; import ch.epfl.data.squall.utilities.DeepCopy; import ch.epfl.data.squall.utilities.SystemParameters; /* * It is necessary that this class operates with Tables, * since we don't want multiple CG sharing the same copy of DataSourceComponent. */ public class NameCompGen implements CompGen { private final SQLVisitor _pq; private final Map _map; private final Schema _schema; private final String _dataPath; private final String _extension; private final String _queryName; private QueryBuilder _queryBuilder = new QueryBuilder(); // compName, CostParams for all the components from _queryPlan private Map<String, CostParams> _compCost = new HashMap<String, CostParams>(); private CostEstimator _costEst; private CostParallelismAssigner _parAssigner; // used for SelectOperator (only from WHERE clause) private final Map<String, Expression> _compNamesAndExprs = new HashMap<String, Expression>(); private final Map<Set<String>, Expression> _compNamesOrExprs = new HashMap<Set<String>, Expression>(); // used for ProjectOperator (both from SELECT and WHERE clauses) private final ProjGlobalCollect _globalCollect; // we don't use it, because we have always to do deepCopy because of // translateExpr // public NameCompGen(Schema schema, // SQLVisitor pq, // Map map, // CostParallelismAssigner parAssigner, // Map<String, Expression> compNamesAndExprs, // Map<Set<String>, Expression> compNamesOrExprs, // ProjGlobalCollect globalCollect){ // _pq = pq; // _map = map; // _schema = schema; // // _dataPath = SystemParameters.getString(map, "DIP_DATA_PATH"); // _extension = SystemParameters.getString(map, "DIP_EXTENSION"); // // if(parAssigner != null){ // _parAssigner = parAssigner; // _costEst = new CostEstimator(schema, pq, _compCost, parAssigner); // } // // _compNamesAndExprs = compNamesAndExprs; // _compNamesOrExprs = compNamesOrExprs; // // _globalCollect = globalCollect; // } // CPA initialized in NameCompGenFactory public NameCompGen(Schema schema, Map map, CostParallelismAssigner parAssigner) { _schema = schema; _map = map; _pq = ParserUtil.parseQuery(map); _dataPath = SystemParameters.getString(map, "DIP_DATA_PATH"); _extension = SystemParameters.getString(map, "DIP_EXTENSION"); _queryName = SystemParameters.getString(map, "DIP_QUERY_NAME"); if (parAssigner != null) { _parAssigner = parAssigner; _costEst = new CostEstimator(_queryName, schema, _pq, _compCost, parAssigner); } // initializes _compNamesAndExprs and _compNamesOrExprs initWhereClause(_pq.getWhereExpr()); _globalCollect = new ProjGlobalCollect(_pq.getSelectItems(), _pq.getWhereExpr()); _globalCollect.process(); } private void addHash(Component component, List<ValueExpression> hashExpressions) { // if joinCondition is a R.A + 5 = S.A, and inputTupleSchema is // "R.A + 5", HashExpression is a ColumnReference(0) if (ParserUtil.isAllColumnRefs(hashExpressions)) { // all the join conditions are represented through columns, no // ValueExpression // guaranteed that both joined components will have joined columns // visited in the same order // i.e R.A=S.A and R.B=S.B, the columns are (R.A, R.B), (S.A, S.B), // respectively final List<Integer> hashIndexes = ParserUtil .extractColumnIndexes(hashExpressions); // hash indexes in join condition component.setOutputPartKey(hashIndexes); } else // hash expressions in join condition component.setHashExpressions(hashExpressions); } /************************************************************************************* * HASH *************************************************************************************/ // set hash for this component, knowing its position in the query plan. // Conditions are related only to parents of join, // but we have to filter who belongs to my branch in NameJoinHashVisitor. // We don't want to hash on something which will be used to join with same // later component in the hierarchy. private void addJoinHash(Component component, List<Expression> joinCondition) { final TupleSchema tupleSchema = _compCost.get(component.getName()) .getSchema(); final NameJoinHashVisitor joinOn = new NameJoinHashVisitor(tupleSchema, component); for (final Expression exp : joinCondition) exp.accept(joinOn); final List<ValueExpression> hashExpressions = joinOn.getExpressions(); addHash(component, hashExpressions); } /************************************************************************************* * Project operator *************************************************************************************/ private void addProjectOperator(Component component) { final String compName = component.getName(); final TupleSchema inputTupleSchema = _compCost.get(compName) .getSchema(); final ProjSchemaCreator psc = new ProjSchemaCreator(_globalCollect, inputTupleSchema, component, _pq, _schema); psc.create(); final TupleSchema outputTupleSchema = psc.getOutputSchema(); if (!ParserUtil.isSameSchema(inputTupleSchema, outputTupleSchema)) { // no need to add projectOperator unless it changes something attachProjectOperator(component, psc.getProjectOperator()); processProjectCost(component, outputTupleSchema); } } /************************************************************************************* * WHERE clause - SelectOperator *************************************************************************************/ private void addSelectOperator(Component component) { final Expression whereCompExpr = createWhereForComponent(component); processWhereForComponent(component, whereCompExpr); if (_costEst != null) _costEst.processWhereCost(component, whereCompExpr); } private Expression appendAnd(Expression fullExpr, Expression atomicExpr) { if (atomicExpr != null) if (fullExpr != null) // appending to previous expressions fullExpr = new AndExpression(fullExpr, atomicExpr); else // this is the first expression for this component fullExpr = atomicExpr; return fullExpr; } private Expression appendOr(Expression fullExpr, Expression atomicExpr) { if (atomicExpr != null) if (fullExpr != null) // appending to previous expressions fullExpr = new OrExpression(fullExpr, atomicExpr); else // this is the first expression for this component fullExpr = atomicExpr; return fullExpr; } private void attachProjectOperator(Component component, ProjectOperator project) { component.add(project); } private void attachSelectClauseOnLastJoin(Component lastComponent, NameSelectItemsVisitor selectVisitor) { final List<AggregateOperator> aggOps = selectVisitor.getAggOps(); ProjectOperator project = null; if (!(selectVisitor.getGroupByVEs() == null || selectVisitor .getGroupByVEs().isEmpty())) project = new ProjectOperator(selectVisitor.getGroupByVEs()); if (aggOps.isEmpty()) { if (project != null) lastComponent.add(project); } else if (aggOps.size() == 1) { // all the others are group by final AggregateOperator firstAgg = aggOps.get(0); if (project != null) firstAgg.setGroupByProjection(project); /* * Avg result cannot be aggregated over multiple nodes. Solution is * one of the following: a) the output of average is keeped in a * form (Sum, Count) and then a user is responsible to aggregate it * over nodes b) if NameTranslator.isSuperset for last join keys and * GroupBy is not fullfilled create new level node with aggregation * as the only operation To be akin to Sum and Count aggregates, we * opted for a) */ if (firstAgg.getDistinct() == null) lastComponent.add(firstAgg); else // in general groupByVEs is not a ColumnReference (it can be an // addition, for example). // ProjectOperator is not obliged to create schema which fully // fits in what FinalAggregation wants addHash(lastComponent, selectVisitor.getGroupByVEs()); } else throw new RuntimeException( "For now only one aggregate function supported!"); } private void attachWhereClause(Component affectedComponent, SelectOperator select) { affectedComponent.add(select); } private DataSourceComponent createAddDataSource(String tableCompName) { final String tableSchemaName = _pq.getTan() .getSchemaName(tableCompName); final String sourceFile = tableSchemaName.toLowerCase(); final DataSourceComponent relation = new DataSourceComponent( tableCompName, _dataPath + sourceFile + _extension); _queryBuilder.add(relation); return relation; } private EquiJoinComponent createAndAddEquiJoin(Component left, Component right) { final EquiJoinComponent joinComponent = new EquiJoinComponent(left, right); _queryBuilder.add(joinComponent); return joinComponent; } private OperatorComponent createAndAddOperatorComp(Component lastComponent) { final OperatorComponent opComp = new OperatorComponent(lastComponent, ParserUtil.generateUniqueName("OPERATOR")); _queryBuilder.add(opComp); return opComp; } /* * Setting schema for DataSourceComponent */ private void createCompCost(DataSourceComponent source) { final String compName = source.getName(); final String schemaName = _pq.getTan().getSchemaName(compName); final CostParams costParams = new CostParams(); // schema is consisted of TableAlias.columnName costParams.setSchema(ParserUtil.createAliasedSchema( _schema.getTableSchema(schemaName), compName)); _compCost.put(compName, costParams); } /* * This can estimate selectivity/cardinality of a join between between any * two components but with a restriction - rightParent has only one * component mentioned in joinCondition. If connection between any * components is allowed, we have to find a way combining multiple distinct * selectivities (for example having a component R-S and T-V, how to combine * R.A=T.A and S.B=V.B?) This method is based on usual way to join tables - * on their appropriate keys. It works for cyclic queries as well (TPCH5 is * an example). */ private void createCompCost(EquiJoinComponent joinComponent) { // create schema and selectivity wrt leftParent final String compName = joinComponent.getName(); final CostParams costParams = new CostParams(); // *********set schema final TupleSchema schema = ParserUtil.joinSchema( joinComponent.getParents(), _compCost); costParams.setSchema(schema); _compCost.put(compName, costParams); } private void createCompCost(OperatorComponent opComp) { final String compName = opComp.getName(); final CostParams costParams = new CostParams(); // *********set schema final TupleSchema schema = _compCost.get( opComp.getParents()[0].getName()).getSchema(); costParams.setSchema(schema); _compCost.put(compName, costParams); } /* * Merging atomicExpr and orExpressions corresponding to this component */ private Expression createWhereForComponent(Component component) { Expression expr = _compNamesAndExprs.get(component.getName()); for (final Map.Entry<Set<String>, Expression> orEntry : _compNamesOrExprs .entrySet()) { final Set<String> orCompNames = orEntry.getKey(); // TODO-PRIO: the full solution would be that OrExpressions are // split into subexpressions // which might be executed on their LCM // Not implemented because it's quite rare - only TPCH7 // Even in TPCH7 there is no need for multiple LCM. // TODO-PRIO: selectivityEstimation for pushing OR need to be // improved final Expression orExpr = orEntry.getValue(); if (HierarchyExtractor.isLCM(component, orCompNames)) expr = appendAnd(expr, orExpr); else if (component instanceof DataSourceComponent) { final DataSourceComponent source = (DataSourceComponent) component; final Expression addedExpr = getMineSubset(source, orExpr); expr = appendAnd(expr, addedExpr); } } return expr; } /* * Used in CostOptimizer when different plans are possible from the same * subplan main reason is translateExpr method - synonyms in NameTranslator */ public NameCompGen deepCopy() { // map, schema, dataPath, dataExt are shared because they are constants // all the time // parAssigner is computed once in NameCompGenFactory and then can be // shared // pq, globalProject, compNamesAndExprs, compNamesOrExprs are created // from scratch in the constructor // ideally, this should be deep-copied, because it can be changed due to // NameTranslator.synonims // not possible because JSQL is not serializable // but this is only matter of performance final NameCompGen copy = new NameCompGen(_schema, _map, _parAssigner); // the rest needs to be explicitly deep-copied copy._compCost = (Map<String, CostParams>) DeepCopy.copy(_compCost); // _compCost from Estimator and from NCG has to be the same reference copy._costEst = new CostEstimator(_queryName, copy._schema, copy._pq, copy._compCost, copy._parAssigner); copy._queryBuilder = (QueryBuilder) DeepCopy.copy(_queryBuilder); return copy; } /* * adding a DataSourceComponent to the list of components */ @Override public DataSourceComponent generateDataSource(String tableCompName) { final DataSourceComponent source = createAddDataSource(tableCompName); createCompCost(source); if (_costEst != null) _costEst.setInputParams(source); // operators addSelectOperator(source); addProjectOperator(source); // For single-dataSource plans (such as TPCH6) NameSelectItemsVisitor nsiv = null; if (ParserUtil.isFinalComponent(source, _pq)) { // final component in terms of joins nsiv = getFinalSelectVisitor(source); attachSelectClauseOnLastJoin(source, nsiv); } if (_costEst != null) _costEst.setOutputParamsAndPar(source); // we have to create newComponent after processing statistics of the // joinComponent if (ParserUtil.isFinalComponent(source, _pq)) generateOperatorComp(source, nsiv); return source; } /* * Join between two components List<Expression> is a set of join conditions * between two components. */ @Override public EquiJoinComponent generateEquiJoin(Component left, Component right) { final EquiJoinComponent joinComponent = createAndAddEquiJoin(left, right); // compute join condition final List<Expression> joinCondition = ParserUtil.getJoinCondition(_pq, left, right); if (joinCondition == null) throw new RuntimeException( "There is no join conditition between components " + left.getName() + " and " + right.getName()); // set hashes for two parents, has to be before createCompCost addJoinHash(left, joinCondition); addJoinHash(right, joinCondition); createCompCost(joinComponent); if (_costEst != null) _costEst.setInputParams(joinComponent, joinCondition); // operators addSelectOperator(joinComponent); // TODO when single last component: decomment when NSIV.visit(Column) is // fixed // - issue in TPCH9 // if(!ParserUtil.isFinalJoin(joinComponent, _pq)){ addProjectOperator(joinComponent); // assume no operators between projection and final aggregation // final aggregation is able to do projection in GroupByProjection // } NameSelectItemsVisitor nsiv = null; if (ParserUtil.isFinalComponent(joinComponent, _pq)) { // final component in terms of joins nsiv = getFinalSelectVisitor(joinComponent); attachSelectClauseOnLastJoin(joinComponent, nsiv); } if (_costEst != null) _costEst.setOutputParamsAndPar(joinComponent); // we have to create newComponent after processing statistics of the // joinComponent if (ParserUtil.isFinalComponent(joinComponent, _pq)) generateOperatorComp(joinComponent, nsiv); return joinComponent; } private OperatorComponent generateOperatorComp(Component lastComponent, NameSelectItemsVisitor selectVisitor) { final List<AggregateOperator> aggOps = selectVisitor.getAggOps(); if (aggOps.size() != 1) return null; OperatorComponent opComp = null; // projectOperator is already set to firstAgg in attachLastJoin method // if we decide to do construct new NSIV, then projectOperator has to be // set as well final AggregateOperator firstAgg = aggOps.get(0); // Setting new level of components is only necessary for distinct in // aggregates if (firstAgg.getDistinct() != null) { opComp = createAndAddOperatorComp(lastComponent); createCompCost(opComp); if (_costEst != null) _costEst.setInputParams(opComp); // we can use the same firstAgg, because we no tupleSchema change // occurred after LAST_COMPONENT:FinalAgg and NEW_COMPONENT:FinalAgg // Namely, NEW_COMPONENT has only FinalAgg operator opComp.add(firstAgg); if (_costEst != null) _costEst.setOutputParamsAndPar(opComp); } return opComp; } public Map<String, CostParams> getCompCost() { return _compCost; } public CostParams getCostParameters(String componentName) { return _compCost.get(componentName); } /************************************************************************************* * SELECT clause - Final aggregation *************************************************************************************/ private NameSelectItemsVisitor getFinalSelectVisitor(Component lastComponent) { final TupleSchema tupleSchema = _compCost.get(lastComponent.getName()) .getSchema(); final NameSelectItemsVisitor selectVisitor = new NameSelectItemsVisitor( tupleSchema, _map, lastComponent); for (final SelectItem elem : _pq.getSelectItems()) elem.accept(selectVisitor); return selectVisitor; } /* * get a list of WhereExpressions (connected by OR) belonging to source For * example (N1.NATION = FRANCE AND N2.NATION = GERMANY) OR (N1.NATION = * GERMANY AND N2.NATION = FRANCE) returns N1.NATION = FRANCE OR N1.NATION = * GERMANY */ public Expression getMineSubset(DataSourceComponent source, Expression expr) { final List<String> compNames = ParserUtil .getCompNamesFromColumns(ParserUtil.getJSQLColumns(expr)); boolean mine = true; for (final String compName : compNames) if (!compName.equals(source.getName())) { mine = false; break; } if (mine) return expr; Expression result = null; if (expr instanceof OrExpression || expr instanceof AndExpression) { final BinaryExpression be = (BinaryExpression) expr; result = appendOr(result, getMineSubset(source, be.getLeftExpression())); result = appendOr(result, getMineSubset(source, be.getRightExpression())); } else if (expr instanceof Parenthesis) { final Parenthesis prnth = (Parenthesis) expr; result = getMineSubset(source, prnth.getExpression()); } // whatever is not fully recognized (all the compNames = source), and is // not And or Or, returns null return result; } @Override public QueryBuilder getQueryBuilder() { return _queryBuilder; } @Override public List<Component> getSubPlans() { throw new RuntimeException("Should not be invoked for lefty plans!"); } private void initWhereClause(Expression whereExpr) { if (whereExpr == null) return; final AndVisitor andVisitor = new AndVisitor(); whereExpr.accept(andVisitor); final List<Expression> atomicAndExprs = andVisitor.getAtomicExprs(); final List<OrExpression> orExprs = andVisitor.getOrExprs(); /* * we have to group atomicExpr (conjunctive terms) by ComponentName * there might be multiple columns from a single DataSourceComponent, * and we want to group them conditions such as R.A + R.B = 10 are * possible not possible to have ColumnReference from multiple tables, * because than it would be join condition */ ParserUtil.addAndExprsToComps(_compNamesAndExprs, atomicAndExprs); ParserUtil.addOrExprsToComps(_compNamesOrExprs, orExprs); } private void processProjectCost(Component component, TupleSchema outputTupleSchema) { // only schema is changed final String compName = component.getName(); _compCost.get(compName).setSchema(outputTupleSchema); } /* * whereCompExpression is the part of WHERE clause which refers to * affectedComponent This is the only method in this class where * IndexWhereVisitor is actually instantiated and invoked SelectOperator is * able to deal with ValueExpressions (and not only with ColumnReferences), * but here we recognize JSQL expressions here which can be built of * inputTupleSchema (constants included) */ private void processWhereForComponent(Component affectedComponent, Expression whereCompExpr) { if (whereCompExpr != null) { // first get the current schema of the component final TupleSchema tupleSchema = _compCost.get( affectedComponent.getName()).getSchema(); final NameWhereVisitor whereVisitor = new NameWhereVisitor( tupleSchema, affectedComponent); whereCompExpr.accept(whereVisitor); attachWhereClause(affectedComponent, whereVisitor.getSelectOperator()); } } }