/*
* Copyright (c) 2011-2015 EPFL DATA Laboratory
* Copyright (c) 2014-2015 The Squall Collaboration (see NOTICE)
*
* All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package ch.epfl.data.squall.api.sql.optimizers.index;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.operators.conditional.OrExpression;
import net.sf.jsqlparser.schema.Table;
import net.sf.jsqlparser.statement.select.SelectItem;
import org.apache.log4j.Logger;
import ch.epfl.data.squall.api.sql.optimizers.Optimizer;
import ch.epfl.data.squall.api.sql.schema.Schema;
import ch.epfl.data.squall.api.sql.util.HierarchyExtractor;
import ch.epfl.data.squall.api.sql.util.JoinTablesExprs;
import ch.epfl.data.squall.api.sql.util.ParserUtil;
import ch.epfl.data.squall.api.sql.visitors.jsql.AndVisitor;
import ch.epfl.data.squall.api.sql.visitors.jsql.SQLVisitor;
import ch.epfl.data.squall.api.sql.visitors.squall.IndexSelectItemsVisitor;
import ch.epfl.data.squall.api.sql.visitors.squall.IndexWhereVisitor;
import ch.epfl.data.squall.components.Component;
import ch.epfl.data.squall.components.DataSourceComponent;
import ch.epfl.data.squall.components.OperatorComponent;
import ch.epfl.data.squall.expressions.ValueExpression;
import ch.epfl.data.squall.operators.AggregateOperator;
import ch.epfl.data.squall.operators.ProjectOperator;
import ch.epfl.data.squall.operators.SelectOperator;
import ch.epfl.data.squall.query_plans.QueryBuilder;
import ch.epfl.data.squall.utilities.DeepCopy;
/*
* It generates a single query plan, adds a final aggregation,
* adds selections (WHERE clause) and do early projections (all unused columns are projected away)
*
* Does not take relation cardinalities into account.
* Assume no projections before the aggregation, so that EarlyProjection may impose some projections.
* Aggregation only on the last level.
*/
public class IndexRuleOptimizer implements Optimizer {
private static Logger LOG = Logger.getLogger(IndexRuleOptimizer.class);
private final Schema _schema;
private final SQLVisitor _pq;
private IndexCompGen _cg;
private final IndexTranslator _it;
private final Map _map; // map is updates in place
public IndexRuleOptimizer(Map map) {
_map = map;
_pq = ParserUtil.parseQuery(map);
_schema = new Schema(map);
_it = new IndexTranslator(_schema, _pq.getTan());
}
private void attachSelectClause(Component lastComponent,
List<AggregateOperator> aggOps, List<ValueExpression> groupByVEs) {
if (aggOps.isEmpty()) {
final ProjectOperator project = new ProjectOperator(groupByVEs);
lastComponent.add(project);
} else if (aggOps.size() == 1) {
// all the others are group by
final AggregateOperator firstAgg = aggOps.get(0);
if (ParserUtil.isAllColumnRefs(groupByVEs)) {
// plain fields in select
final List<Integer> groupByColumns = ParserUtil
.extractColumnIndexes(groupByVEs);
firstAgg.setGroupByColumns(groupByColumns);
// Setting new level of components is necessary for correctness
// only for distinct in aggregates
// but it's certainly pleasant to have the final result grouped
// on nodes by group by columns.
final boolean newLevel = !(_it.isHashedBy(lastComponent,
groupByColumns));
if (newLevel) {
lastComponent.setOutputPartKey(groupByColumns);
OperatorComponent oc = new OperatorComponent(lastComponent,
ParserUtil.generateUniqueName("OPERATOR"))
.add(firstAgg);
_cg.getQueryBuilder().add(oc);
} else
lastComponent.add(firstAgg);
} else {
// Sometimes groupByVEs contains other functions, so we have to
// use projections instead of simple groupBy
// always new level
// WARNING: groupByVEs cannot be used on two places: that's why
// we do deep copy
final ProjectOperator groupByProj = new ProjectOperator(
(List<ValueExpression>) DeepCopy.copy(groupByVEs));
if (!(groupByProj.getExpressions() == null || groupByProj
.getExpressions().isEmpty()))
firstAgg.setGroupByProjection(groupByProj);
// current component
lastComponent
.setHashExpressions((List<ValueExpression>) DeepCopy
.copy(groupByVEs));
OperatorComponent oc = new OperatorComponent(lastComponent,
ParserUtil.generateUniqueName("OPERATOR"))
.add(firstAgg);
_cg.getQueryBuilder().add(oc);
}
} else
throw new RuntimeException(
"For now only one aggregate function supported!");
}
private void attachWhereClause(Component affectedComponent,
SelectOperator select) {
affectedComponent.add(select);
}
private void earlyProjection(QueryBuilder queryPlan) {
final EarlyProjection early = new EarlyProjection(_schema, _pq.getTan());
early.operate(queryPlan);
}
@Override
public QueryBuilder generate() {
_cg = generateTableJoins();
LOG.info("Before WHERE, SELECT and EarlyProjection: ");
LOG.info(ParserUtil.toString(_cg.getQueryBuilder()));
// selectItems might add OperatorComponent, this is why it goes first
final int queryType = processSelectClause(_pq.getSelectItems());
processWhereClause(_pq.getWhereExpr());
if (queryType == IndexSelectItemsVisitor.NON_AGG)
LOG.info("Early projection will not be performed since the query is NON_AGG type (contains projections)!");
else
earlyProjection(_cg.getQueryBuilder());
ParserUtil.orderOperators(_cg.getQueryBuilder());
final RuleParallelismAssigner parAssign = new RuleParallelismAssigner(
_cg.getQueryBuilder(), _pq.getTan(), _schema, _map);
parAssign.assignPar();
return _cg.getQueryBuilder();
}
private IndexCompGen generateTableJoins() {
final List<Table> tableList = _pq.getTableList();
final TableSelector ts = new TableSelector(tableList, _schema,
_pq.getTan());
final JoinTablesExprs jte = _pq.getJte();
final IndexCompGen cg = new IndexCompGen(_schema, _pq, _map);
// first phase
// make high level pairs
final List<String> skippedBestTableNames = new ArrayList<String>();
final int numTables = tableList.size();
if (numTables == 1) {
cg.generateDataSource(ParserUtil.getComponentName(tableList.get(0)));
return cg;
} else {
final int highLevelPairs = getNumHighLevelPairs(numTables);
for (int i = 0; i < highLevelPairs; i++) {
final String bestTableName = ts.removeBestTableName();
// enumerates all the tables it has joinCondition to join with
final List<String> joinedWith = jte
.getJoinedWith(bestTableName);
// dependent on previously used tables, so might return null
final String bestPairedTable = ts
.removeBestPairedTableName(joinedWith);
if (bestPairedTable != null) {
// we found a pair
final DataSourceComponent bestSource = cg
.generateDataSource(bestTableName);
final DataSourceComponent bestPairedSource = cg
.generateDataSource(bestPairedTable);
cg.generateEquiJoin(bestSource, bestPairedSource);
} else
// we have to keep this table for latter processing
skippedBestTableNames.add(bestTableName);
}
}
// second phase
// join (2-way join components) with unused tables, until there is no
// more tables
List<Component> subPlans = cg.getSubPlans();
/*
* Why outer loop is unpairedTables, and inner is subPlans: 1) We first
* take care of small tables 2) In general, there is smaller number of
* unpaired tables than tables 3) Number of ancestors always grow, while
* number of joinedTables is a constant Bad side is updating of
* subPlanAncestors, but than has to be done anyway LinkedHashMap
* guarantees in order iterator
*/
List<String> unpairedTableNames = ts.removeAll();
unpairedTableNames.addAll(skippedBestTableNames);
while (!unpairedTableNames.isEmpty()) {
final List<String> stillUnprocessed = new ArrayList<String>();
// we will try to join all the tables, but some of them cannot be
// joined before some other tables
// that's why we have while outer loop
for (final String unpaired : unpairedTableNames) {
boolean processed = false;
for (final Component currentComp : subPlans)
if (_pq.getJte().joinExistsBetween(unpaired,
ParserUtil.getSourceNameList(currentComp))) {
final DataSourceComponent unpairedSource = cg
.generateDataSource(unpaired);
cg.generateEquiJoin(currentComp, unpairedSource);
processed = true;
break;
}
if (!processed)
stillUnprocessed.add(unpaired);
}
unpairedTableNames = stillUnprocessed;
}
// third phase: joining Components until there is a single component
subPlans = cg.getSubPlans();
while (subPlans.size() > 1) {
// this is joining of components having approximately the same
// number of ancestors - the same level
final Component firstComp = subPlans.get(0);
final List<String> firstAncestors = ParserUtil
.getSourceNameList(firstComp);
for (int i = 1; i < subPlans.size(); i++) {
final Component otherComp = subPlans.get(i);
final List<String> otherAncestors = ParserUtil
.getSourceNameList(otherComp);
if (_pq.getJte().joinExistsBetween(firstAncestors,
otherAncestors)) {
cg.generateEquiJoin(firstComp, otherComp);
break;
}
}
// until this point, we change subPlans by locally remove operations
// when going to the next level, whesh look over subPlans is taken
subPlans = cg.getSubPlans();
}
return cg;
}
private int getNumHighLevelPairs(int numTables) {
int highLevelPairs = 0;
if (numTables == 2)
highLevelPairs = 1;
else if (numTables > 2)
highLevelPairs = (numTables % 2 == 0 ? numTables / 2 - 1
: numTables / 2);
return highLevelPairs;
}
/*
* this method returns a list of <ComponentName, whereCompExpression>
*
* @whereCompExpression part of JSQL expression which relates to the
* corresponding Component
*/
private Map<String, Expression> getWhereForComponents(Expression whereExpr) {
final AndVisitor andVisitor = new AndVisitor();
whereExpr.accept(andVisitor);
final List<Expression> atomicExprs = andVisitor.getAtomicExprs();
final List<OrExpression> orExprs = andVisitor.getOrExprs();
/*
* we have to group atomicExpr (conjuctive terms) by ComponentName there
* might be mutliple columns from a single DataSourceComponent, and we
* want to group them conditions such as R.A + R.B = 10 are possible not
* possible to have ColumnReference from multiple tables, because than
* it would be join condition
*/
final Map<String, Expression> collocatedExprs = new HashMap<String, Expression>();
ParserUtil.addAndExprsToComps(collocatedExprs, atomicExprs);
final Map<Set<String>, Expression> collocatedOrs = new HashMap<Set<String>, Expression>();
ParserUtil.addOrExprsToComps(collocatedOrs, orExprs);
for (final Map.Entry<Set<String>, Expression> orEntry : collocatedOrs
.entrySet()) {
final List<String> compNames = new ArrayList<String>(
orEntry.getKey());
final List<Component> compList = ParserUtil.getComponents(
compNames, _cg);
final Component affectedComponent = HierarchyExtractor
.getLCM(compList);
final Expression orExpr = orEntry.getValue();
ParserUtil.addAndExprToComp(collocatedExprs, orExpr,
affectedComponent.getName());
}
return collocatedExprs;
}
/*************************************************************************************
* SELECT clause - Final Aggregation
*************************************************************************************/
private int processSelectClause(List<SelectItem> selectItems) {
final IndexSelectItemsVisitor selectVisitor = new IndexSelectItemsVisitor(
_cg.getQueryBuilder(), _schema, _pq.getTan(), _map);
for (final SelectItem elem : selectItems)
elem.accept(selectVisitor);
final List<AggregateOperator> aggOps = selectVisitor.getAggOps();
final List<ValueExpression> groupByVEs = selectVisitor.getGroupByVEs();
final Component affectedComponent = _cg.getQueryBuilder()
.getLastComponent();
attachSelectClause(affectedComponent, aggOps, groupByVEs);
return (aggOps.isEmpty() ? IndexSelectItemsVisitor.NON_AGG
: IndexSelectItemsVisitor.AGG);
}
/*************************************************************************************
* WHERE clause - SelectOperator
*************************************************************************************/
private void processWhereClause(Expression whereExpr) {
if (whereExpr == null)
return;
// assinging JSQL expressions to Components
final Map<String, Expression> whereCompExprPairs = getWhereForComponents(whereExpr);
// Each component process its own part of JSQL whereExpression
for (final Map.Entry<String, Expression> whereCompExprPair : whereCompExprPairs
.entrySet()) {
final Component affectedComponent = _cg.getQueryBuilder()
.getComponent(whereCompExprPair.getKey());
final Expression whereCompExpr = whereCompExprPair.getValue();
processWhereForComponent(affectedComponent, whereCompExpr);
}
}
/*
* whereCompExpression is the part of WHERE clause which refers to
* affectedComponent This is the only method in this class where
* IndexWhereVisitor is actually instantiated and invoked
*/
private void processWhereForComponent(Component affectedComponent,
Expression whereCompExpression) {
final IndexWhereVisitor whereVisitor = new IndexWhereVisitor(
affectedComponent, _schema, _pq.getTan());
whereCompExpression.accept(whereVisitor);
attachWhereClause(affectedComponent, whereVisitor.getSelectOperator());
}
}