/* * Copyright (c) 2011-2015 EPFL DATA Laboratory * Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE) * * All rights reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package ch.epfl.data.squall.api.sql.optimizers.index; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Set; import net.sf.jsqlparser.expression.Expression; import net.sf.jsqlparser.expression.operators.conditional.OrExpression; import net.sf.jsqlparser.schema.Table; import net.sf.jsqlparser.statement.select.SelectItem; import org.apache.log4j.Logger; import ch.epfl.data.squall.api.sql.optimizers.Optimizer; import ch.epfl.data.squall.api.sql.schema.Schema; import ch.epfl.data.squall.api.sql.util.HierarchyExtractor; import ch.epfl.data.squall.api.sql.util.JoinTablesExprs; import ch.epfl.data.squall.api.sql.util.ParserUtil; import ch.epfl.data.squall.api.sql.visitors.jsql.AndVisitor; import ch.epfl.data.squall.api.sql.visitors.jsql.SQLVisitor; import ch.epfl.data.squall.api.sql.visitors.squall.IndexSelectItemsVisitor; import ch.epfl.data.squall.api.sql.visitors.squall.IndexWhereVisitor; import ch.epfl.data.squall.components.Component; import ch.epfl.data.squall.components.DataSourceComponent; import ch.epfl.data.squall.components.OperatorComponent; import ch.epfl.data.squall.expressions.ValueExpression; import ch.epfl.data.squall.operators.AggregateOperator; import ch.epfl.data.squall.operators.ProjectOperator; import ch.epfl.data.squall.operators.SelectOperator; import ch.epfl.data.squall.query_plans.QueryBuilder; import ch.epfl.data.squall.utilities.DeepCopy; /* * It generates a single query plan, adds a final aggregation, * adds selections (WHERE clause) and do early projections (all unused columns are projected away) * * Does not take relation cardinalities into account. * Assume no projections before the aggregation, so that EarlyProjection may impose some projections. * Aggregation only on the last level. */ public class IndexRuleOptimizer implements Optimizer { private static Logger LOG = Logger.getLogger(IndexRuleOptimizer.class); private final Schema _schema; private final SQLVisitor _pq; private IndexCompGen _cg; private final IndexTranslator _it; private final Map _map; // map is updates in place public IndexRuleOptimizer(Map map) { _map = map; _pq = ParserUtil.parseQuery(map); _schema = new Schema(map); _it = new IndexTranslator(_schema, _pq.getTan()); } private void attachSelectClause(Component lastComponent, List<AggregateOperator> aggOps, List<ValueExpression> groupByVEs) { if (aggOps.isEmpty()) { final ProjectOperator project = new ProjectOperator(groupByVEs); lastComponent.add(project); } else if (aggOps.size() == 1) { // all the others are group by final AggregateOperator firstAgg = aggOps.get(0); if (ParserUtil.isAllColumnRefs(groupByVEs)) { // plain fields in select final List<Integer> groupByColumns = ParserUtil .extractColumnIndexes(groupByVEs); firstAgg.setGroupByColumns(groupByColumns); // Setting new level of components is necessary for correctness // only for distinct in aggregates // but it's certainly pleasant to have the final result grouped // on nodes by group by columns. final boolean newLevel = !(_it.isHashedBy(lastComponent, groupByColumns)); if (newLevel) { lastComponent.setOutputPartKey(groupByColumns); OperatorComponent oc = new OperatorComponent(lastComponent, ParserUtil.generateUniqueName("OPERATOR")) .add(firstAgg); _cg.getQueryBuilder().add(oc); } else lastComponent.add(firstAgg); } else { // Sometimes groupByVEs contains other functions, so we have to // use projections instead of simple groupBy // always new level // WARNING: groupByVEs cannot be used on two places: that's why // we do deep copy final ProjectOperator groupByProj = new ProjectOperator( (List<ValueExpression>) DeepCopy.copy(groupByVEs)); if (!(groupByProj.getExpressions() == null || groupByProj .getExpressions().isEmpty())) firstAgg.setGroupByProjection(groupByProj); // current component lastComponent .setHashExpressions((List<ValueExpression>) DeepCopy .copy(groupByVEs)); OperatorComponent oc = new OperatorComponent(lastComponent, ParserUtil.generateUniqueName("OPERATOR")) .add(firstAgg); _cg.getQueryBuilder().add(oc); } } else throw new RuntimeException( "For now only one aggregate function supported!"); } private void attachWhereClause(Component affectedComponent, SelectOperator select) { affectedComponent.add(select); } private void earlyProjection(QueryBuilder queryPlan) { final EarlyProjection early = new EarlyProjection(_schema, _pq.getTan()); early.operate(queryPlan); } @Override public QueryBuilder generate() { _cg = generateTableJoins(); LOG.info("Before WHERE, SELECT and EarlyProjection: "); LOG.info(ParserUtil.toString(_cg.getQueryBuilder())); // selectItems might add OperatorComponent, this is why it goes first final int queryType = processSelectClause(_pq.getSelectItems()); processWhereClause(_pq.getWhereExpr()); if (queryType == IndexSelectItemsVisitor.NON_AGG) LOG.info("Early projection will not be performed since the query is NON_AGG type (contains projections)!"); else earlyProjection(_cg.getQueryBuilder()); ParserUtil.orderOperators(_cg.getQueryBuilder()); final RuleParallelismAssigner parAssign = new RuleParallelismAssigner( _cg.getQueryBuilder(), _pq.getTan(), _schema, _map); parAssign.assignPar(); return _cg.getQueryBuilder(); } private IndexCompGen generateTableJoins() { final List<Table> tableList = _pq.getTableList(); final TableSelector ts = new TableSelector(tableList, _schema, _pq.getTan()); final JoinTablesExprs jte = _pq.getJte(); final IndexCompGen cg = new IndexCompGen(_schema, _pq, _map); // first phase // make high level pairs final List<String> skippedBestTableNames = new ArrayList<String>(); final int numTables = tableList.size(); if (numTables == 1) { cg.generateDataSource(ParserUtil.getComponentName(tableList.get(0))); return cg; } else { final int highLevelPairs = getNumHighLevelPairs(numTables); for (int i = 0; i < highLevelPairs; i++) { final String bestTableName = ts.removeBestTableName(); // enumerates all the tables it has joinCondition to join with final List<String> joinedWith = jte .getJoinedWith(bestTableName); // dependent on previously used tables, so might return null final String bestPairedTable = ts .removeBestPairedTableName(joinedWith); if (bestPairedTable != null) { // we found a pair final DataSourceComponent bestSource = cg .generateDataSource(bestTableName); final DataSourceComponent bestPairedSource = cg .generateDataSource(bestPairedTable); cg.generateEquiJoin(bestSource, bestPairedSource); } else // we have to keep this table for latter processing skippedBestTableNames.add(bestTableName); } } // second phase // join (2-way join components) with unused tables, until there is no // more tables List<Component> subPlans = cg.getSubPlans(); /* * Why outer loop is unpairedTables, and inner is subPlans: 1) We first * take care of small tables 2) In general, there is smaller number of * unpaired tables than tables 3) Number of ancestors always grow, while * number of joinedTables is a constant Bad side is updating of * subPlanAncestors, but than has to be done anyway LinkedHashMap * guarantees in order iterator */ List<String> unpairedTableNames = ts.removeAll(); unpairedTableNames.addAll(skippedBestTableNames); while (!unpairedTableNames.isEmpty()) { final List<String> stillUnprocessed = new ArrayList<String>(); // we will try to join all the tables, but some of them cannot be // joined before some other tables // that's why we have while outer loop for (final String unpaired : unpairedTableNames) { boolean processed = false; for (final Component currentComp : subPlans) if (_pq.getJte().joinExistsBetween(unpaired, ParserUtil.getSourceNameList(currentComp))) { final DataSourceComponent unpairedSource = cg .generateDataSource(unpaired); cg.generateEquiJoin(currentComp, unpairedSource); processed = true; break; } if (!processed) stillUnprocessed.add(unpaired); } unpairedTableNames = stillUnprocessed; } // third phase: joining Components until there is a single component subPlans = cg.getSubPlans(); while (subPlans.size() > 1) { // this is joining of components having approximately the same // number of ancestors - the same level final Component firstComp = subPlans.get(0); final List<String> firstAncestors = ParserUtil .getSourceNameList(firstComp); for (int i = 1; i < subPlans.size(); i++) { final Component otherComp = subPlans.get(i); final List<String> otherAncestors = ParserUtil .getSourceNameList(otherComp); if (_pq.getJte().joinExistsBetween(firstAncestors, otherAncestors)) { cg.generateEquiJoin(firstComp, otherComp); break; } } // until this point, we change subPlans by locally remove operations // when going to the next level, whesh look over subPlans is taken subPlans = cg.getSubPlans(); } return cg; } private int getNumHighLevelPairs(int numTables) { int highLevelPairs = 0; if (numTables == 2) highLevelPairs = 1; else if (numTables > 2) highLevelPairs = (numTables % 2 == 0 ? numTables / 2 - 1 : numTables / 2); return highLevelPairs; } /* * this method returns a list of <ComponentName, whereCompExpression> * * @whereCompExpression part of JSQL expression which relates to the * corresponding Component */ private Map<String, Expression> getWhereForComponents(Expression whereExpr) { final AndVisitor andVisitor = new AndVisitor(); whereExpr.accept(andVisitor); final List<Expression> atomicExprs = andVisitor.getAtomicExprs(); final List<OrExpression> orExprs = andVisitor.getOrExprs(); /* * we have to group atomicExpr (conjuctive terms) by ComponentName there * might be mutliple columns from a single DataSourceComponent, and we * want to group them conditions such as R.A + R.B = 10 are possible not * possible to have ColumnReference from multiple tables, because than * it would be join condition */ final Map<String, Expression> collocatedExprs = new HashMap<String, Expression>(); ParserUtil.addAndExprsToComps(collocatedExprs, atomicExprs); final Map<Set<String>, Expression> collocatedOrs = new HashMap<Set<String>, Expression>(); ParserUtil.addOrExprsToComps(collocatedOrs, orExprs); for (final Map.Entry<Set<String>, Expression> orEntry : collocatedOrs .entrySet()) { final List<String> compNames = new ArrayList<String>( orEntry.getKey()); final List<Component> compList = ParserUtil.getComponents( compNames, _cg); final Component affectedComponent = HierarchyExtractor .getLCM(compList); final Expression orExpr = orEntry.getValue(); ParserUtil.addAndExprToComp(collocatedExprs, orExpr, affectedComponent.getName()); } return collocatedExprs; } /************************************************************************************* * SELECT clause - Final Aggregation *************************************************************************************/ private int processSelectClause(List<SelectItem> selectItems) { final IndexSelectItemsVisitor selectVisitor = new IndexSelectItemsVisitor( _cg.getQueryBuilder(), _schema, _pq.getTan(), _map); for (final SelectItem elem : selectItems) elem.accept(selectVisitor); final List<AggregateOperator> aggOps = selectVisitor.getAggOps(); final List<ValueExpression> groupByVEs = selectVisitor.getGroupByVEs(); final Component affectedComponent = _cg.getQueryBuilder() .getLastComponent(); attachSelectClause(affectedComponent, aggOps, groupByVEs); return (aggOps.isEmpty() ? IndexSelectItemsVisitor.NON_AGG : IndexSelectItemsVisitor.AGG); } /************************************************************************************* * WHERE clause - SelectOperator *************************************************************************************/ private void processWhereClause(Expression whereExpr) { if (whereExpr == null) return; // assinging JSQL expressions to Components final Map<String, Expression> whereCompExprPairs = getWhereForComponents(whereExpr); // Each component process its own part of JSQL whereExpression for (final Map.Entry<String, Expression> whereCompExprPair : whereCompExprPairs .entrySet()) { final Component affectedComponent = _cg.getQueryBuilder() .getComponent(whereCompExprPair.getKey()); final Expression whereCompExpr = whereCompExprPair.getValue(); processWhereForComponent(affectedComponent, whereCompExpr); } } /* * whereCompExpression is the part of WHERE clause which refers to * affectedComponent This is the only method in this class where * IndexWhereVisitor is actually instantiated and invoked */ private void processWhereForComponent(Component affectedComponent, Expression whereCompExpression) { final IndexWhereVisitor whereVisitor = new IndexWhereVisitor( affectedComponent, _schema, _pq.getTan()); whereCompExpression.accept(whereVisitor); attachWhereClause(affectedComponent, whereVisitor.getSelectOperator()); } }